{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 82053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.0936224147806904e-05, "grad_norm": 3.397636890411377, "learning_rate": 4.874482086278334e-09, "loss": 1.348, "step": 5 }, { "epoch": 0.00012187244829561381, "grad_norm": 5.308322906494141, "learning_rate": 1.096758469412625e-08, "loss": 1.3206, "step": 10 }, { "epoch": 0.0001828086724434207, "grad_norm": 3.494178295135498, "learning_rate": 1.7060687301974167e-08, "loss": 1.205, "step": 15 }, { "epoch": 0.00024374489659122762, "grad_norm": 5.071898460388184, "learning_rate": 2.3153789909822082e-08, "loss": 1.3487, "step": 20 }, { "epoch": 0.00030468112073903455, "grad_norm": 2.7193572521209717, "learning_rate": 2.924689251767e-08, "loss": 1.2251, "step": 25 }, { "epoch": 0.0003656173448868414, "grad_norm": 3.1772279739379883, "learning_rate": 3.5339995125517914e-08, "loss": 1.2024, "step": 30 }, { "epoch": 0.0004265535690346483, "grad_norm": 4.439204692840576, "learning_rate": 4.143309773336583e-08, "loss": 1.2422, "step": 35 }, { "epoch": 0.00048748979318245523, "grad_norm": 4.175978183746338, "learning_rate": 4.752620034121375e-08, "loss": 1.1726, "step": 40 }, { "epoch": 0.0005484260173302622, "grad_norm": 4.0924153327941895, "learning_rate": 5.361930294906167e-08, "loss": 1.3354, "step": 45 }, { "epoch": 0.0006093622414780691, "grad_norm": 3.6985867023468018, "learning_rate": 5.971240555690958e-08, "loss": 1.3003, "step": 50 }, { "epoch": 0.0006702984656258759, "grad_norm": 5.070741176605225, "learning_rate": 6.580550816475749e-08, "loss": 1.3633, "step": 55 }, { "epoch": 0.0007312346897736828, "grad_norm": 3.5589599609375, "learning_rate": 7.189861077260542e-08, "loss": 1.3165, "step": 60 }, { "epoch": 0.0007921709139214897, "grad_norm": 4.040579319000244, "learning_rate": 7.799171338045334e-08, "loss": 1.2182, "step": 65 }, { "epoch": 0.0008531071380692966, "grad_norm": 4.17079496383667, "learning_rate": 8.408481598830125e-08, "loss": 1.2701, "step": 70 }, { "epoch": 0.0009140433622171036, "grad_norm": 3.9834723472595215, "learning_rate": 9.017791859614917e-08, "loss": 1.2565, "step": 75 }, { "epoch": 0.0009749795863649105, "grad_norm": 3.6043105125427246, "learning_rate": 9.627102120399708e-08, "loss": 1.2076, "step": 80 }, { "epoch": 0.0010359158105127174, "grad_norm": 3.0077874660491943, "learning_rate": 1.02364123811845e-07, "loss": 1.2331, "step": 85 }, { "epoch": 0.0010968520346605244, "grad_norm": 5.804072856903076, "learning_rate": 1.0845722641969292e-07, "loss": 1.3083, "step": 90 }, { "epoch": 0.0011577882588083312, "grad_norm": 3.8890929222106934, "learning_rate": 1.1455032902754083e-07, "loss": 1.2261, "step": 95 }, { "epoch": 0.0012187244829561382, "grad_norm": 4.1053080558776855, "learning_rate": 1.2064343163538875e-07, "loss": 1.3419, "step": 100 }, { "epoch": 0.001279660707103945, "grad_norm": 3.604611396789551, "learning_rate": 1.2673653424323666e-07, "loss": 1.2876, "step": 105 }, { "epoch": 0.0013405969312517518, "grad_norm": 4.3041791915893555, "learning_rate": 1.3282963685108458e-07, "loss": 1.3407, "step": 110 }, { "epoch": 0.0014015331553995588, "grad_norm": 2.822507858276367, "learning_rate": 1.389227394589325e-07, "loss": 1.1588, "step": 115 }, { "epoch": 0.0014624693795473656, "grad_norm": 3.5707716941833496, "learning_rate": 1.450158420667804e-07, "loss": 1.2773, "step": 120 }, { "epoch": 0.0015234056036951727, "grad_norm": 3.7942135334014893, "learning_rate": 1.5110894467462833e-07, "loss": 1.2385, "step": 125 }, { "epoch": 0.0015843418278429795, "grad_norm": 3.859905958175659, "learning_rate": 1.5720204728247624e-07, "loss": 1.1943, "step": 130 }, { "epoch": 0.0016452780519907865, "grad_norm": 3.8184738159179688, "learning_rate": 1.6329514989032416e-07, "loss": 1.2575, "step": 135 }, { "epoch": 0.0017062142761385933, "grad_norm": 3.3514156341552734, "learning_rate": 1.6938825249817207e-07, "loss": 1.3266, "step": 140 }, { "epoch": 0.0017671505002864003, "grad_norm": 3.8051507472991943, "learning_rate": 1.7548135510602e-07, "loss": 1.2087, "step": 145 }, { "epoch": 0.001828086724434207, "grad_norm": 3.6675021648406982, "learning_rate": 1.815744577138679e-07, "loss": 1.2124, "step": 150 }, { "epoch": 0.0018890229485820141, "grad_norm": 3.0821852684020996, "learning_rate": 1.8766756032171582e-07, "loss": 1.2367, "step": 155 }, { "epoch": 0.001949959172729821, "grad_norm": 3.348248243331909, "learning_rate": 1.9376066292956374e-07, "loss": 1.1598, "step": 160 }, { "epoch": 0.002010895396877628, "grad_norm": 3.2708187103271484, "learning_rate": 1.9985376553741168e-07, "loss": 1.3578, "step": 165 }, { "epoch": 0.0020718316210254347, "grad_norm": 3.0486459732055664, "learning_rate": 2.059468681452596e-07, "loss": 1.2185, "step": 170 }, { "epoch": 0.0021327678451732415, "grad_norm": 3.054227352142334, "learning_rate": 2.120399707531075e-07, "loss": 1.1864, "step": 175 }, { "epoch": 0.0021937040693210488, "grad_norm": 3.3520071506500244, "learning_rate": 2.1813307336095543e-07, "loss": 1.2777, "step": 180 }, { "epoch": 0.0022546402934688556, "grad_norm": 3.426292896270752, "learning_rate": 2.2422617596880334e-07, "loss": 1.263, "step": 185 }, { "epoch": 0.0023155765176166624, "grad_norm": 2.9325296878814697, "learning_rate": 2.3031927857665126e-07, "loss": 1.1693, "step": 190 }, { "epoch": 0.002376512741764469, "grad_norm": 2.551910638809204, "learning_rate": 2.3641238118449917e-07, "loss": 1.2057, "step": 195 }, { "epoch": 0.0024374489659122764, "grad_norm": 2.454063892364502, "learning_rate": 2.4250548379234706e-07, "loss": 1.2037, "step": 200 }, { "epoch": 0.0024983851900600832, "grad_norm": 2.7006869316101074, "learning_rate": 2.48598586400195e-07, "loss": 1.1728, "step": 205 }, { "epoch": 0.00255932141420789, "grad_norm": 2.6302649974823, "learning_rate": 2.546916890080429e-07, "loss": 1.25, "step": 210 }, { "epoch": 0.002620257638355697, "grad_norm": 2.847248077392578, "learning_rate": 2.6078479161589084e-07, "loss": 1.2733, "step": 215 }, { "epoch": 0.0026811938625035036, "grad_norm": 3.723315715789795, "learning_rate": 2.668778942237387e-07, "loss": 1.1952, "step": 220 }, { "epoch": 0.002742130086651311, "grad_norm": 2.9534964561462402, "learning_rate": 2.7297099683158667e-07, "loss": 1.2313, "step": 225 }, { "epoch": 0.0028030663107991177, "grad_norm": 2.4173145294189453, "learning_rate": 2.7906409943943456e-07, "loss": 1.251, "step": 230 }, { "epoch": 0.0028640025349469245, "grad_norm": 2.6452882289886475, "learning_rate": 2.851572020472825e-07, "loss": 1.1559, "step": 235 }, { "epoch": 0.0029249387590947313, "grad_norm": 2.4741554260253906, "learning_rate": 2.912503046551304e-07, "loss": 1.2399, "step": 240 }, { "epoch": 0.0029858749832425385, "grad_norm": 2.4682464599609375, "learning_rate": 2.9734340726297833e-07, "loss": 1.1917, "step": 245 }, { "epoch": 0.0030468112073903453, "grad_norm": 3.0936830043792725, "learning_rate": 3.0343650987082627e-07, "loss": 1.1153, "step": 250 }, { "epoch": 0.003107747431538152, "grad_norm": 2.6012089252471924, "learning_rate": 3.0952961247867416e-07, "loss": 1.1467, "step": 255 }, { "epoch": 0.003168683655685959, "grad_norm": 2.431595802307129, "learning_rate": 3.156227150865221e-07, "loss": 1.2778, "step": 260 }, { "epoch": 0.003229619879833766, "grad_norm": 2.5592269897460938, "learning_rate": 3.2171581769437e-07, "loss": 1.1722, "step": 265 }, { "epoch": 0.003290556103981573, "grad_norm": 2.3710758686065674, "learning_rate": 3.2780892030221794e-07, "loss": 1.1875, "step": 270 }, { "epoch": 0.0033514923281293798, "grad_norm": 2.709648609161377, "learning_rate": 3.339020229100658e-07, "loss": 1.1772, "step": 275 }, { "epoch": 0.0034124285522771866, "grad_norm": 2.449497938156128, "learning_rate": 3.3999512551791377e-07, "loss": 1.1626, "step": 280 }, { "epoch": 0.003473364776424994, "grad_norm": 1.9582606554031372, "learning_rate": 3.4608822812576166e-07, "loss": 1.1035, "step": 285 }, { "epoch": 0.0035343010005728006, "grad_norm": 2.7465357780456543, "learning_rate": 3.521813307336096e-07, "loss": 1.3081, "step": 290 }, { "epoch": 0.0035952372247206074, "grad_norm": 3.0046682357788086, "learning_rate": 3.582744333414575e-07, "loss": 1.2439, "step": 295 }, { "epoch": 0.003656173448868414, "grad_norm": 2.371018409729004, "learning_rate": 3.6436753594930543e-07, "loss": 1.0864, "step": 300 }, { "epoch": 0.0037171096730162214, "grad_norm": 2.4127509593963623, "learning_rate": 3.704606385571533e-07, "loss": 1.168, "step": 305 }, { "epoch": 0.0037780458971640282, "grad_norm": 2.336841583251953, "learning_rate": 3.765537411650012e-07, "loss": 1.2752, "step": 310 }, { "epoch": 0.003838982121311835, "grad_norm": 2.5299365520477295, "learning_rate": 3.8264684377284915e-07, "loss": 1.124, "step": 315 }, { "epoch": 0.003899918345459642, "grad_norm": 2.5832178592681885, "learning_rate": 3.8873994638069704e-07, "loss": 1.1048, "step": 320 }, { "epoch": 0.003960854569607449, "grad_norm": 2.484370470046997, "learning_rate": 3.94833048988545e-07, "loss": 1.0959, "step": 325 }, { "epoch": 0.004021790793755256, "grad_norm": 2.838447332382202, "learning_rate": 4.0092615159639287e-07, "loss": 1.2041, "step": 330 }, { "epoch": 0.004082727017903062, "grad_norm": 2.348329782485962, "learning_rate": 4.070192542042408e-07, "loss": 1.0244, "step": 335 }, { "epoch": 0.0041436632420508695, "grad_norm": 2.3661816120147705, "learning_rate": 4.131123568120887e-07, "loss": 1.1226, "step": 340 }, { "epoch": 0.004204599466198677, "grad_norm": 2.6628828048706055, "learning_rate": 4.1920545941993665e-07, "loss": 1.1091, "step": 345 }, { "epoch": 0.004265535690346483, "grad_norm": 2.4273757934570312, "learning_rate": 4.2529856202778454e-07, "loss": 1.1405, "step": 350 }, { "epoch": 0.00432647191449429, "grad_norm": 2.0622668266296387, "learning_rate": 4.313916646356325e-07, "loss": 1.1129, "step": 355 }, { "epoch": 0.0043874081386420976, "grad_norm": 2.9355361461639404, "learning_rate": 4.3748476724348047e-07, "loss": 1.152, "step": 360 }, { "epoch": 0.004448344362789904, "grad_norm": 2.0299434661865234, "learning_rate": 4.4357786985132836e-07, "loss": 1.1198, "step": 365 }, { "epoch": 0.004509280586937711, "grad_norm": 2.1304826736450195, "learning_rate": 4.496709724591763e-07, "loss": 1.176, "step": 370 }, { "epoch": 0.0045702168110855175, "grad_norm": 2.35275936126709, "learning_rate": 4.557640750670242e-07, "loss": 1.1861, "step": 375 }, { "epoch": 0.004631153035233325, "grad_norm": 2.3338961601257324, "learning_rate": 4.618571776748721e-07, "loss": 1.0755, "step": 380 }, { "epoch": 0.004692089259381132, "grad_norm": 3.106661319732666, "learning_rate": 4.6795028028272e-07, "loss": 1.2469, "step": 385 }, { "epoch": 0.004753025483528938, "grad_norm": 2.3885233402252197, "learning_rate": 4.740433828905679e-07, "loss": 1.1755, "step": 390 }, { "epoch": 0.004813961707676746, "grad_norm": 2.6341075897216797, "learning_rate": 4.801364854984159e-07, "loss": 1.214, "step": 395 }, { "epoch": 0.004874897931824553, "grad_norm": 2.308364152908325, "learning_rate": 4.862295881062637e-07, "loss": 1.2103, "step": 400 }, { "epoch": 0.004935834155972359, "grad_norm": 2.212538003921509, "learning_rate": 4.923226907141116e-07, "loss": 1.1707, "step": 405 }, { "epoch": 0.0049967703801201665, "grad_norm": 2.3623104095458984, "learning_rate": 4.984157933219596e-07, "loss": 1.0957, "step": 410 }, { "epoch": 0.005057706604267973, "grad_norm": 2.4375321865081787, "learning_rate": 5.045088959298075e-07, "loss": 1.1348, "step": 415 }, { "epoch": 0.00511864282841578, "grad_norm": 2.7337746620178223, "learning_rate": 5.106019985376554e-07, "loss": 1.0744, "step": 420 }, { "epoch": 0.005179579052563587, "grad_norm": 2.7103452682495117, "learning_rate": 5.166951011455033e-07, "loss": 1.1869, "step": 425 }, { "epoch": 0.005240515276711394, "grad_norm": 2.3158881664276123, "learning_rate": 5.227882037533513e-07, "loss": 1.1435, "step": 430 }, { "epoch": 0.005301451500859201, "grad_norm": 2.5724236965179443, "learning_rate": 5.288813063611992e-07, "loss": 1.1334, "step": 435 }, { "epoch": 0.005362387725007007, "grad_norm": 2.4515068531036377, "learning_rate": 5.349744089690471e-07, "loss": 1.097, "step": 440 }, { "epoch": 0.0054233239491548145, "grad_norm": 2.268280267715454, "learning_rate": 5.41067511576895e-07, "loss": 1.148, "step": 445 }, { "epoch": 0.005484260173302622, "grad_norm": 2.86027193069458, "learning_rate": 5.47160614184743e-07, "loss": 1.0669, "step": 450 }, { "epoch": 0.005545196397450428, "grad_norm": 2.0975375175476074, "learning_rate": 5.532537167925908e-07, "loss": 1.2266, "step": 455 }, { "epoch": 0.005606132621598235, "grad_norm": 1.985101342201233, "learning_rate": 5.593468194004387e-07, "loss": 1.1095, "step": 460 }, { "epoch": 0.005667068845746043, "grad_norm": 4.2085490226745605, "learning_rate": 5.654399220082866e-07, "loss": 1.075, "step": 465 }, { "epoch": 0.005728005069893849, "grad_norm": 2.451211452484131, "learning_rate": 5.715330246161345e-07, "loss": 1.073, "step": 470 }, { "epoch": 0.005788941294041656, "grad_norm": 2.0737650394439697, "learning_rate": 5.776261272239825e-07, "loss": 1.1397, "step": 475 }, { "epoch": 0.0058498775181894625, "grad_norm": 2.472423553466797, "learning_rate": 5.837192298318304e-07, "loss": 1.1159, "step": 480 }, { "epoch": 0.00591081374233727, "grad_norm": 2.1806111335754395, "learning_rate": 5.898123324396783e-07, "loss": 1.0846, "step": 485 }, { "epoch": 0.005971749966485077, "grad_norm": 2.10571551322937, "learning_rate": 5.959054350475262e-07, "loss": 1.1425, "step": 490 }, { "epoch": 0.006032686190632883, "grad_norm": 2.2325470447540283, "learning_rate": 6.019985376553742e-07, "loss": 1.1698, "step": 495 }, { "epoch": 0.006093622414780691, "grad_norm": 2.313863515853882, "learning_rate": 6.080916402632221e-07, "loss": 1.1103, "step": 500 }, { "epoch": 0.006154558638928498, "grad_norm": 2.073378801345825, "learning_rate": 6.1418474287107e-07, "loss": 1.0734, "step": 505 }, { "epoch": 0.006215494863076304, "grad_norm": 1.964837670326233, "learning_rate": 6.202778454789178e-07, "loss": 1.1318, "step": 510 }, { "epoch": 0.0062764310872241115, "grad_norm": 2.097323179244995, "learning_rate": 6.263709480867658e-07, "loss": 1.13, "step": 515 }, { "epoch": 0.006337367311371918, "grad_norm": 2.0217792987823486, "learning_rate": 6.324640506946138e-07, "loss": 1.0973, "step": 520 }, { "epoch": 0.006398303535519725, "grad_norm": 2.207719564437866, "learning_rate": 6.385571533024616e-07, "loss": 1.055, "step": 525 }, { "epoch": 0.006459239759667532, "grad_norm": 1.9994405508041382, "learning_rate": 6.446502559103096e-07, "loss": 1.0382, "step": 530 }, { "epoch": 0.006520175983815339, "grad_norm": 2.6116485595703125, "learning_rate": 6.507433585181575e-07, "loss": 1.1632, "step": 535 }, { "epoch": 0.006581112207963146, "grad_norm": 2.4910247325897217, "learning_rate": 6.568364611260054e-07, "loss": 1.1056, "step": 540 }, { "epoch": 0.006642048432110952, "grad_norm": 2.2304534912109375, "learning_rate": 6.629295637338533e-07, "loss": 1.1189, "step": 545 }, { "epoch": 0.0067029846562587595, "grad_norm": 1.9707574844360352, "learning_rate": 6.690226663417013e-07, "loss": 1.0855, "step": 550 }, { "epoch": 0.006763920880406567, "grad_norm": 2.197544574737549, "learning_rate": 6.751157689495492e-07, "loss": 1.0983, "step": 555 }, { "epoch": 0.006824857104554373, "grad_norm": 2.547457456588745, "learning_rate": 6.81208871557397e-07, "loss": 1.0734, "step": 560 }, { "epoch": 0.00688579332870218, "grad_norm": 2.9198031425476074, "learning_rate": 6.873019741652449e-07, "loss": 1.1555, "step": 565 }, { "epoch": 0.006946729552849988, "grad_norm": 2.16929292678833, "learning_rate": 6.933950767730929e-07, "loss": 1.0022, "step": 570 }, { "epoch": 0.007007665776997794, "grad_norm": 2.08019757270813, "learning_rate": 6.994881793809407e-07, "loss": 1.0528, "step": 575 }, { "epoch": 0.007068602001145601, "grad_norm": 2.1355931758880615, "learning_rate": 7.055812819887887e-07, "loss": 1.0888, "step": 580 }, { "epoch": 0.0071295382252934076, "grad_norm": 2.2392725944519043, "learning_rate": 7.116743845966366e-07, "loss": 1.0652, "step": 585 }, { "epoch": 0.007190474449441215, "grad_norm": 2.0600264072418213, "learning_rate": 7.177674872044846e-07, "loss": 1.1377, "step": 590 }, { "epoch": 0.007251410673589022, "grad_norm": 2.5330560207366943, "learning_rate": 7.238605898123326e-07, "loss": 1.1574, "step": 595 }, { "epoch": 0.007312346897736828, "grad_norm": 2.269057512283325, "learning_rate": 7.299536924201804e-07, "loss": 1.0495, "step": 600 }, { "epoch": 0.007373283121884636, "grad_norm": 2.493035316467285, "learning_rate": 7.360467950280284e-07, "loss": 1.1166, "step": 605 }, { "epoch": 0.007434219346032443, "grad_norm": 2.6063241958618164, "learning_rate": 7.421398976358763e-07, "loss": 1.1399, "step": 610 }, { "epoch": 0.007495155570180249, "grad_norm": 2.265033721923828, "learning_rate": 7.482330002437243e-07, "loss": 1.0633, "step": 615 }, { "epoch": 0.0075560917943280565, "grad_norm": 2.346189260482788, "learning_rate": 7.54326102851572e-07, "loss": 1.1373, "step": 620 }, { "epoch": 0.007617028018475863, "grad_norm": 2.457827091217041, "learning_rate": 7.6041920545942e-07, "loss": 1.0937, "step": 625 }, { "epoch": 0.00767796424262367, "grad_norm": 2.373110771179199, "learning_rate": 7.665123080672679e-07, "loss": 1.1228, "step": 630 }, { "epoch": 0.007738900466771477, "grad_norm": 2.416576862335205, "learning_rate": 7.726054106751159e-07, "loss": 1.0968, "step": 635 }, { "epoch": 0.007799836690919284, "grad_norm": 2.05502986907959, "learning_rate": 7.786985132829637e-07, "loss": 1.0862, "step": 640 }, { "epoch": 0.00786077291506709, "grad_norm": 2.077695608139038, "learning_rate": 7.847916158908117e-07, "loss": 1.1173, "step": 645 }, { "epoch": 0.007921709139214897, "grad_norm": 2.4680354595184326, "learning_rate": 7.908847184986596e-07, "loss": 1.0934, "step": 650 }, { "epoch": 0.007982645363362705, "grad_norm": 2.0386500358581543, "learning_rate": 7.969778211065076e-07, "loss": 1.0538, "step": 655 }, { "epoch": 0.008043581587510512, "grad_norm": 2.4717788696289062, "learning_rate": 8.030709237143554e-07, "loss": 1.012, "step": 660 }, { "epoch": 0.008104517811658319, "grad_norm": 2.188884973526001, "learning_rate": 8.091640263222034e-07, "loss": 1.0901, "step": 665 }, { "epoch": 0.008165454035806124, "grad_norm": 2.4737682342529297, "learning_rate": 8.152571289300512e-07, "loss": 1.1625, "step": 670 }, { "epoch": 0.008226390259953932, "grad_norm": 2.3151888847351074, "learning_rate": 8.213502315378992e-07, "loss": 0.9823, "step": 675 }, { "epoch": 0.008287326484101739, "grad_norm": 2.115748405456543, "learning_rate": 8.27443334145747e-07, "loss": 1.1128, "step": 680 }, { "epoch": 0.008348262708249546, "grad_norm": 1.8420383930206299, "learning_rate": 8.33536436753595e-07, "loss": 1.1126, "step": 685 }, { "epoch": 0.008409198932397353, "grad_norm": 2.392395257949829, "learning_rate": 8.396295393614429e-07, "loss": 1.137, "step": 690 }, { "epoch": 0.00847013515654516, "grad_norm": 2.540564775466919, "learning_rate": 8.457226419692908e-07, "loss": 1.1227, "step": 695 }, { "epoch": 0.008531071380692966, "grad_norm": 2.4746809005737305, "learning_rate": 8.518157445771387e-07, "loss": 1.0519, "step": 700 }, { "epoch": 0.008592007604840773, "grad_norm": 2.1100239753723145, "learning_rate": 8.579088471849867e-07, "loss": 1.0739, "step": 705 }, { "epoch": 0.00865294382898858, "grad_norm": 2.39827823638916, "learning_rate": 8.640019497928346e-07, "loss": 1.0988, "step": 710 }, { "epoch": 0.008713880053136388, "grad_norm": 2.234402656555176, "learning_rate": 8.700950524006825e-07, "loss": 1.1106, "step": 715 }, { "epoch": 0.008774816277284195, "grad_norm": 2.4189510345458984, "learning_rate": 8.761881550085304e-07, "loss": 1.0599, "step": 720 }, { "epoch": 0.008835752501432, "grad_norm": 2.417086124420166, "learning_rate": 8.822812576163783e-07, "loss": 1.0036, "step": 725 }, { "epoch": 0.008896688725579808, "grad_norm": 2.6726367473602295, "learning_rate": 8.883743602242261e-07, "loss": 1.1528, "step": 730 }, { "epoch": 0.008957624949727615, "grad_norm": 2.2996606826782227, "learning_rate": 8.944674628320741e-07, "loss": 1.0499, "step": 735 }, { "epoch": 0.009018561173875422, "grad_norm": 2.283782958984375, "learning_rate": 9.00560565439922e-07, "loss": 1.0879, "step": 740 }, { "epoch": 0.00907949739802323, "grad_norm": 2.2364046573638916, "learning_rate": 9.0665366804777e-07, "loss": 1.1024, "step": 745 }, { "epoch": 0.009140433622171035, "grad_norm": 2.2551376819610596, "learning_rate": 9.12746770655618e-07, "loss": 1.039, "step": 750 }, { "epoch": 0.009201369846318842, "grad_norm": 2.0844268798828125, "learning_rate": 9.188398732634658e-07, "loss": 1.0315, "step": 755 }, { "epoch": 0.00926230607046665, "grad_norm": 2.2769033908843994, "learning_rate": 9.249329758713138e-07, "loss": 0.9875, "step": 760 }, { "epoch": 0.009323242294614457, "grad_norm": 2.0663022994995117, "learning_rate": 9.310260784791617e-07, "loss": 1.1251, "step": 765 }, { "epoch": 0.009384178518762264, "grad_norm": 2.2938621044158936, "learning_rate": 9.371191810870097e-07, "loss": 1.1423, "step": 770 }, { "epoch": 0.00944511474291007, "grad_norm": 2.0215604305267334, "learning_rate": 9.432122836948575e-07, "loss": 1.1129, "step": 775 }, { "epoch": 0.009506050967057877, "grad_norm": 2.505554437637329, "learning_rate": 9.493053863027054e-07, "loss": 1.0686, "step": 780 }, { "epoch": 0.009566987191205684, "grad_norm": 2.2688472270965576, "learning_rate": 9.553984889105532e-07, "loss": 1.0644, "step": 785 }, { "epoch": 0.009627923415353491, "grad_norm": 2.2120096683502197, "learning_rate": 9.614915915184012e-07, "loss": 1.0635, "step": 790 }, { "epoch": 0.009688859639501298, "grad_norm": 2.3053433895111084, "learning_rate": 9.675846941262492e-07, "loss": 1.1202, "step": 795 }, { "epoch": 0.009749795863649106, "grad_norm": 2.1399035453796387, "learning_rate": 9.736777967340972e-07, "loss": 1.0899, "step": 800 }, { "epoch": 0.009810732087796911, "grad_norm": 2.3801066875457764, "learning_rate": 9.79770899341945e-07, "loss": 1.0092, "step": 805 }, { "epoch": 0.009871668311944718, "grad_norm": 1.9231010675430298, "learning_rate": 9.85864001949793e-07, "loss": 1.0134, "step": 810 }, { "epoch": 0.009932604536092526, "grad_norm": 2.3659682273864746, "learning_rate": 9.919571045576408e-07, "loss": 1.0834, "step": 815 }, { "epoch": 0.009993540760240333, "grad_norm": 2.4435925483703613, "learning_rate": 9.980502071654888e-07, "loss": 0.9934, "step": 820 }, { "epoch": 0.01005447698438814, "grad_norm": 2.6979920864105225, "learning_rate": 1.0041433097733366e-06, "loss": 1.0604, "step": 825 }, { "epoch": 0.010115413208535946, "grad_norm": 2.1988017559051514, "learning_rate": 1.0102364123811846e-06, "loss": 1.06, "step": 830 }, { "epoch": 0.010176349432683753, "grad_norm": 1.9197158813476562, "learning_rate": 1.0163295149890325e-06, "loss": 1.0307, "step": 835 }, { "epoch": 0.01023728565683156, "grad_norm": 2.070194959640503, "learning_rate": 1.0224226175968805e-06, "loss": 1.0241, "step": 840 }, { "epoch": 0.010298221880979367, "grad_norm": 2.0446906089782715, "learning_rate": 1.0285157202047283e-06, "loss": 1.0968, "step": 845 }, { "epoch": 0.010359158105127175, "grad_norm": 2.0749454498291016, "learning_rate": 1.0346088228125763e-06, "loss": 1.031, "step": 850 }, { "epoch": 0.01042009432927498, "grad_norm": 3.0218429565429688, "learning_rate": 1.040701925420424e-06, "loss": 1.0862, "step": 855 }, { "epoch": 0.010481030553422787, "grad_norm": 2.198345184326172, "learning_rate": 1.046795028028272e-06, "loss": 1.0651, "step": 860 }, { "epoch": 0.010541966777570595, "grad_norm": 2.1365861892700195, "learning_rate": 1.0528881306361199e-06, "loss": 1.0555, "step": 865 }, { "epoch": 0.010602903001718402, "grad_norm": 1.9961298704147339, "learning_rate": 1.0589812332439679e-06, "loss": 1.0413, "step": 870 }, { "epoch": 0.010663839225866209, "grad_norm": 2.9104113578796387, "learning_rate": 1.0650743358518159e-06, "loss": 1.0608, "step": 875 }, { "epoch": 0.010724775450014015, "grad_norm": 2.095500946044922, "learning_rate": 1.0711674384596637e-06, "loss": 1.0682, "step": 880 }, { "epoch": 0.010785711674161822, "grad_norm": 2.0748469829559326, "learning_rate": 1.0772605410675117e-06, "loss": 1.103, "step": 885 }, { "epoch": 0.010846647898309629, "grad_norm": 2.123347282409668, "learning_rate": 1.0833536436753596e-06, "loss": 1.09, "step": 890 }, { "epoch": 0.010907584122457436, "grad_norm": 1.8817765712738037, "learning_rate": 1.0894467462832074e-06, "loss": 1.0566, "step": 895 }, { "epoch": 0.010968520346605243, "grad_norm": 2.487422227859497, "learning_rate": 1.0955398488910554e-06, "loss": 1.0887, "step": 900 }, { "epoch": 0.01102945657075305, "grad_norm": 2.305600166320801, "learning_rate": 1.1016329514989034e-06, "loss": 0.9835, "step": 905 }, { "epoch": 0.011090392794900856, "grad_norm": 2.7391111850738525, "learning_rate": 1.1077260541067512e-06, "loss": 0.9991, "step": 910 }, { "epoch": 0.011151329019048663, "grad_norm": 2.373936176300049, "learning_rate": 1.1138191567145992e-06, "loss": 1.0028, "step": 915 }, { "epoch": 0.01121226524319647, "grad_norm": 2.536684274673462, "learning_rate": 1.119912259322447e-06, "loss": 1.0621, "step": 920 }, { "epoch": 0.011273201467344278, "grad_norm": 2.0354743003845215, "learning_rate": 1.126005361930295e-06, "loss": 0.9487, "step": 925 }, { "epoch": 0.011334137691492085, "grad_norm": 2.067113161087036, "learning_rate": 1.132098464538143e-06, "loss": 1.1112, "step": 930 }, { "epoch": 0.01139507391563989, "grad_norm": 2.1465024948120117, "learning_rate": 1.138191567145991e-06, "loss": 1.0205, "step": 935 }, { "epoch": 0.011456010139787698, "grad_norm": 2.353869915008545, "learning_rate": 1.1442846697538388e-06, "loss": 1.0707, "step": 940 }, { "epoch": 0.011516946363935505, "grad_norm": 1.998075246810913, "learning_rate": 1.1503777723616867e-06, "loss": 1.0733, "step": 945 }, { "epoch": 0.011577882588083312, "grad_norm": 2.6344358921051025, "learning_rate": 1.1564708749695345e-06, "loss": 1.0754, "step": 950 }, { "epoch": 0.01163881881223112, "grad_norm": 2.3558027744293213, "learning_rate": 1.1625639775773825e-06, "loss": 1.0817, "step": 955 }, { "epoch": 0.011699755036378925, "grad_norm": 2.1814565658569336, "learning_rate": 1.1686570801852303e-06, "loss": 1.0905, "step": 960 }, { "epoch": 0.011760691260526732, "grad_norm": 2.437260866165161, "learning_rate": 1.1747501827930783e-06, "loss": 1.0457, "step": 965 }, { "epoch": 0.01182162748467454, "grad_norm": 2.0644326210021973, "learning_rate": 1.1808432854009263e-06, "loss": 1.0923, "step": 970 }, { "epoch": 0.011882563708822347, "grad_norm": 2.1754822731018066, "learning_rate": 1.1869363880087743e-06, "loss": 1.0848, "step": 975 }, { "epoch": 0.011943499932970154, "grad_norm": 2.4039628505706787, "learning_rate": 1.193029490616622e-06, "loss": 1.0881, "step": 980 }, { "epoch": 0.01200443615711796, "grad_norm": 1.9558101892471313, "learning_rate": 1.19912259322447e-06, "loss": 1.0809, "step": 985 }, { "epoch": 0.012065372381265767, "grad_norm": 2.0821354389190674, "learning_rate": 1.2052156958323179e-06, "loss": 1.0479, "step": 990 }, { "epoch": 0.012126308605413574, "grad_norm": 2.012993097305298, "learning_rate": 1.2113087984401659e-06, "loss": 1.0457, "step": 995 }, { "epoch": 0.012187244829561381, "grad_norm": 1.9540451765060425, "learning_rate": 1.2174019010480136e-06, "loss": 1.0455, "step": 1000 }, { "epoch": 0.012248181053709188, "grad_norm": 1.9814950227737427, "learning_rate": 1.2234950036558616e-06, "loss": 1.0776, "step": 1005 }, { "epoch": 0.012309117277856996, "grad_norm": 2.3414955139160156, "learning_rate": 1.2295881062637096e-06, "loss": 0.996, "step": 1010 }, { "epoch": 0.012370053502004801, "grad_norm": 2.0849392414093018, "learning_rate": 1.2356812088715574e-06, "loss": 1.0354, "step": 1015 }, { "epoch": 0.012430989726152608, "grad_norm": 2.4285330772399902, "learning_rate": 1.2417743114794054e-06, "loss": 1.0305, "step": 1020 }, { "epoch": 0.012491925950300416, "grad_norm": 2.197754383087158, "learning_rate": 1.2478674140872534e-06, "loss": 1.0227, "step": 1025 }, { "epoch": 0.012552862174448223, "grad_norm": 2.0282461643218994, "learning_rate": 1.2539605166951014e-06, "loss": 1.0368, "step": 1030 }, { "epoch": 0.01261379839859603, "grad_norm": 2.4706077575683594, "learning_rate": 1.2600536193029492e-06, "loss": 1.0702, "step": 1035 }, { "epoch": 0.012674734622743836, "grad_norm": 2.0628502368927, "learning_rate": 1.266146721910797e-06, "loss": 1.0625, "step": 1040 }, { "epoch": 0.012735670846891643, "grad_norm": 2.232121467590332, "learning_rate": 1.272239824518645e-06, "loss": 1.0183, "step": 1045 }, { "epoch": 0.01279660707103945, "grad_norm": 2.0203804969787598, "learning_rate": 1.278332927126493e-06, "loss": 1.0592, "step": 1050 }, { "epoch": 0.012857543295187257, "grad_norm": 2.1802425384521484, "learning_rate": 1.2844260297343407e-06, "loss": 1.0573, "step": 1055 }, { "epoch": 0.012918479519335065, "grad_norm": 2.0241167545318604, "learning_rate": 1.2905191323421887e-06, "loss": 1.0199, "step": 1060 }, { "epoch": 0.01297941574348287, "grad_norm": 2.6857919692993164, "learning_rate": 1.2966122349500367e-06, "loss": 1.041, "step": 1065 }, { "epoch": 0.013040351967630677, "grad_norm": 2.253425359725952, "learning_rate": 1.3027053375578847e-06, "loss": 1.1759, "step": 1070 }, { "epoch": 0.013101288191778485, "grad_norm": 2.3805322647094727, "learning_rate": 1.3087984401657325e-06, "loss": 1.0041, "step": 1075 }, { "epoch": 0.013162224415926292, "grad_norm": 2.1944684982299805, "learning_rate": 1.3148915427735803e-06, "loss": 1.0601, "step": 1080 }, { "epoch": 0.013223160640074099, "grad_norm": 2.3855745792388916, "learning_rate": 1.3209846453814285e-06, "loss": 1.0765, "step": 1085 }, { "epoch": 0.013284096864221905, "grad_norm": 2.369410276412964, "learning_rate": 1.3270777479892763e-06, "loss": 1.0381, "step": 1090 }, { "epoch": 0.013345033088369712, "grad_norm": 1.9468525648117065, "learning_rate": 1.333170850597124e-06, "loss": 1.0688, "step": 1095 }, { "epoch": 0.013405969312517519, "grad_norm": 2.6217124462127686, "learning_rate": 1.339263953204972e-06, "loss": 1.0967, "step": 1100 }, { "epoch": 0.013466905536665326, "grad_norm": 2.206559658050537, "learning_rate": 1.34535705581282e-06, "loss": 1.0731, "step": 1105 }, { "epoch": 0.013527841760813133, "grad_norm": 1.933610200881958, "learning_rate": 1.351450158420668e-06, "loss": 1.0375, "step": 1110 }, { "epoch": 0.01358877798496094, "grad_norm": 2.7868897914886475, "learning_rate": 1.3575432610285158e-06, "loss": 1.0451, "step": 1115 }, { "epoch": 0.013649714209108746, "grad_norm": 2.0804660320281982, "learning_rate": 1.3636363636363636e-06, "loss": 1.0039, "step": 1120 }, { "epoch": 0.013710650433256553, "grad_norm": 2.22611141204834, "learning_rate": 1.3697294662442118e-06, "loss": 0.9726, "step": 1125 }, { "epoch": 0.01377158665740436, "grad_norm": 2.7292442321777344, "learning_rate": 1.3758225688520596e-06, "loss": 1.0596, "step": 1130 }, { "epoch": 0.013832522881552168, "grad_norm": 2.37463116645813, "learning_rate": 1.3819156714599074e-06, "loss": 1.0738, "step": 1135 }, { "epoch": 0.013893459105699975, "grad_norm": 2.453467845916748, "learning_rate": 1.3880087740677554e-06, "loss": 1.0042, "step": 1140 }, { "epoch": 0.01395439532984778, "grad_norm": 2.1062605381011963, "learning_rate": 1.3941018766756034e-06, "loss": 1.0831, "step": 1145 }, { "epoch": 0.014015331553995588, "grad_norm": 2.0498249530792236, "learning_rate": 1.4001949792834514e-06, "loss": 0.9997, "step": 1150 }, { "epoch": 0.014076267778143395, "grad_norm": 1.890413522720337, "learning_rate": 1.4062880818912992e-06, "loss": 0.9957, "step": 1155 }, { "epoch": 0.014137204002291202, "grad_norm": 2.442983388900757, "learning_rate": 1.412381184499147e-06, "loss": 1.078, "step": 1160 }, { "epoch": 0.01419814022643901, "grad_norm": 2.355250597000122, "learning_rate": 1.4184742871069951e-06, "loss": 0.997, "step": 1165 }, { "epoch": 0.014259076450586815, "grad_norm": 2.086005210876465, "learning_rate": 1.424567389714843e-06, "loss": 1.111, "step": 1170 }, { "epoch": 0.014320012674734622, "grad_norm": 2.423517942428589, "learning_rate": 1.4306604923226907e-06, "loss": 0.9599, "step": 1175 }, { "epoch": 0.01438094889888243, "grad_norm": 1.9388822317123413, "learning_rate": 1.4367535949305387e-06, "loss": 1.0518, "step": 1180 }, { "epoch": 0.014441885123030237, "grad_norm": 2.9475760459899902, "learning_rate": 1.4428466975383867e-06, "loss": 1.0141, "step": 1185 }, { "epoch": 0.014502821347178044, "grad_norm": 2.734813690185547, "learning_rate": 1.4489398001462345e-06, "loss": 0.9383, "step": 1190 }, { "epoch": 0.01456375757132585, "grad_norm": 1.9670337438583374, "learning_rate": 1.4550329027540825e-06, "loss": 1.0577, "step": 1195 }, { "epoch": 0.014624693795473657, "grad_norm": 2.3233354091644287, "learning_rate": 1.4611260053619303e-06, "loss": 1.1428, "step": 1200 }, { "epoch": 0.014685630019621464, "grad_norm": 2.8374080657958984, "learning_rate": 1.4672191079697785e-06, "loss": 1.0722, "step": 1205 }, { "epoch": 0.014746566243769271, "grad_norm": 2.4692771434783936, "learning_rate": 1.4733122105776263e-06, "loss": 0.9414, "step": 1210 }, { "epoch": 0.014807502467917079, "grad_norm": 2.160914421081543, "learning_rate": 1.479405313185474e-06, "loss": 1.0365, "step": 1215 }, { "epoch": 0.014868438692064886, "grad_norm": 2.1367366313934326, "learning_rate": 1.4854984157933222e-06, "loss": 1.0425, "step": 1220 }, { "epoch": 0.014929374916212691, "grad_norm": 2.5190253257751465, "learning_rate": 1.49159151840117e-06, "loss": 1.0766, "step": 1225 }, { "epoch": 0.014990311140360498, "grad_norm": 2.4365317821502686, "learning_rate": 1.4976846210090178e-06, "loss": 1.0139, "step": 1230 }, { "epoch": 0.015051247364508306, "grad_norm": 2.2071123123168945, "learning_rate": 1.5037777236168658e-06, "loss": 1.1224, "step": 1235 }, { "epoch": 0.015112183588656113, "grad_norm": 2.3415637016296387, "learning_rate": 1.5098708262247138e-06, "loss": 1.125, "step": 1240 }, { "epoch": 0.01517311981280392, "grad_norm": 2.5307445526123047, "learning_rate": 1.5159639288325618e-06, "loss": 1.0208, "step": 1245 }, { "epoch": 0.015234056036951726, "grad_norm": 2.1925909519195557, "learning_rate": 1.5220570314404096e-06, "loss": 1.0396, "step": 1250 }, { "epoch": 0.015294992261099533, "grad_norm": 1.8569350242614746, "learning_rate": 1.5281501340482574e-06, "loss": 1.0104, "step": 1255 }, { "epoch": 0.01535592848524734, "grad_norm": 2.294498920440674, "learning_rate": 1.5342432366561056e-06, "loss": 1.0417, "step": 1260 }, { "epoch": 0.015416864709395147, "grad_norm": 2.21519136428833, "learning_rate": 1.5403363392639534e-06, "loss": 1.0419, "step": 1265 }, { "epoch": 0.015477800933542955, "grad_norm": 2.0913004875183105, "learning_rate": 1.5464294418718011e-06, "loss": 0.9834, "step": 1270 }, { "epoch": 0.01553873715769076, "grad_norm": 2.2223479747772217, "learning_rate": 1.5525225444796491e-06, "loss": 0.9651, "step": 1275 }, { "epoch": 0.015599673381838567, "grad_norm": 2.3510637283325195, "learning_rate": 1.5586156470874971e-06, "loss": 1.0585, "step": 1280 }, { "epoch": 0.015660609605986375, "grad_norm": 2.336404800415039, "learning_rate": 1.5647087496953451e-06, "loss": 1.0132, "step": 1285 }, { "epoch": 0.01572154583013418, "grad_norm": 2.0679409503936768, "learning_rate": 1.570801852303193e-06, "loss": 0.9647, "step": 1290 }, { "epoch": 0.01578248205428199, "grad_norm": 2.1926887035369873, "learning_rate": 1.5768949549110407e-06, "loss": 1.0011, "step": 1295 }, { "epoch": 0.015843418278429795, "grad_norm": 2.028348445892334, "learning_rate": 1.582988057518889e-06, "loss": 1.007, "step": 1300 }, { "epoch": 0.015904354502577604, "grad_norm": 2.127666711807251, "learning_rate": 1.5890811601267367e-06, "loss": 1.023, "step": 1305 }, { "epoch": 0.01596529072672541, "grad_norm": 2.3541622161865234, "learning_rate": 1.5951742627345845e-06, "loss": 0.9153, "step": 1310 }, { "epoch": 0.016026226950873215, "grad_norm": 2.0365207195281982, "learning_rate": 1.6012673653424325e-06, "loss": 0.9974, "step": 1315 }, { "epoch": 0.016087163175021024, "grad_norm": 2.2546441555023193, "learning_rate": 1.6073604679502805e-06, "loss": 1.0798, "step": 1320 }, { "epoch": 0.01614809939916883, "grad_norm": 2.3772332668304443, "learning_rate": 1.6134535705581284e-06, "loss": 0.9463, "step": 1325 }, { "epoch": 0.016209035623316638, "grad_norm": 2.185126304626465, "learning_rate": 1.6195466731659762e-06, "loss": 0.9534, "step": 1330 }, { "epoch": 0.016269971847464443, "grad_norm": 2.2006030082702637, "learning_rate": 1.625639775773824e-06, "loss": 1.0765, "step": 1335 }, { "epoch": 0.01633090807161225, "grad_norm": 2.042996644973755, "learning_rate": 1.6317328783816722e-06, "loss": 1.0643, "step": 1340 }, { "epoch": 0.016391844295760058, "grad_norm": 2.706965208053589, "learning_rate": 1.63782598098952e-06, "loss": 0.9631, "step": 1345 }, { "epoch": 0.016452780519907863, "grad_norm": 1.895202398300171, "learning_rate": 1.6439190835973678e-06, "loss": 1.0174, "step": 1350 }, { "epoch": 0.016513716744055672, "grad_norm": 2.625420570373535, "learning_rate": 1.6500121862052158e-06, "loss": 1.0553, "step": 1355 }, { "epoch": 0.016574652968203478, "grad_norm": 2.1365315914154053, "learning_rate": 1.6561052888130638e-06, "loss": 1.094, "step": 1360 }, { "epoch": 0.016635589192351283, "grad_norm": 2.299610137939453, "learning_rate": 1.6621983914209116e-06, "loss": 1.0061, "step": 1365 }, { "epoch": 0.016696525416499092, "grad_norm": 3.1685051918029785, "learning_rate": 1.6682914940287596e-06, "loss": 0.9963, "step": 1370 }, { "epoch": 0.016757461640646898, "grad_norm": 2.153327703475952, "learning_rate": 1.6743845966366076e-06, "loss": 1.0508, "step": 1375 }, { "epoch": 0.016818397864794707, "grad_norm": 2.4042651653289795, "learning_rate": 1.6804776992444555e-06, "loss": 1.0264, "step": 1380 }, { "epoch": 0.016879334088942512, "grad_norm": 2.34021258354187, "learning_rate": 1.6865708018523033e-06, "loss": 0.9579, "step": 1385 }, { "epoch": 0.01694027031309032, "grad_norm": 2.1644420623779297, "learning_rate": 1.6926639044601511e-06, "loss": 1.0721, "step": 1390 }, { "epoch": 0.017001206537238127, "grad_norm": 1.8522045612335205, "learning_rate": 1.6987570070679993e-06, "loss": 1.003, "step": 1395 }, { "epoch": 0.017062142761385932, "grad_norm": 2.466245412826538, "learning_rate": 1.7048501096758471e-06, "loss": 1.0208, "step": 1400 }, { "epoch": 0.01712307898553374, "grad_norm": 2.2634997367858887, "learning_rate": 1.7109432122836949e-06, "loss": 1.0245, "step": 1405 }, { "epoch": 0.017184015209681547, "grad_norm": 2.4229812622070312, "learning_rate": 1.7170363148915429e-06, "loss": 1.0282, "step": 1410 }, { "epoch": 0.017244951433829356, "grad_norm": 1.8645529747009277, "learning_rate": 1.7231294174993909e-06, "loss": 0.9696, "step": 1415 }, { "epoch": 0.01730588765797716, "grad_norm": 2.0952823162078857, "learning_rate": 1.7292225201072389e-06, "loss": 1.0276, "step": 1420 }, { "epoch": 0.017366823882124967, "grad_norm": 2.1084020137786865, "learning_rate": 1.7353156227150867e-06, "loss": 1.0069, "step": 1425 }, { "epoch": 0.017427760106272776, "grad_norm": 2.0639543533325195, "learning_rate": 1.7414087253229344e-06, "loss": 1.0805, "step": 1430 }, { "epoch": 0.01748869633042058, "grad_norm": 2.9938549995422363, "learning_rate": 1.7475018279307826e-06, "loss": 1.0185, "step": 1435 }, { "epoch": 0.01754963255456839, "grad_norm": 2.2650868892669678, "learning_rate": 1.7535949305386304e-06, "loss": 1.0436, "step": 1440 }, { "epoch": 0.017610568778716196, "grad_norm": 2.2309298515319824, "learning_rate": 1.7596880331464782e-06, "loss": 1.002, "step": 1445 }, { "epoch": 0.017671505002864, "grad_norm": 2.628127098083496, "learning_rate": 1.7657811357543262e-06, "loss": 1.0258, "step": 1450 }, { "epoch": 0.01773244122701181, "grad_norm": 2.5207173824310303, "learning_rate": 1.7718742383621742e-06, "loss": 0.9832, "step": 1455 }, { "epoch": 0.017793377451159616, "grad_norm": 2.3942947387695312, "learning_rate": 1.7779673409700222e-06, "loss": 0.9883, "step": 1460 }, { "epoch": 0.017854313675307425, "grad_norm": 2.216517448425293, "learning_rate": 1.78406044357787e-06, "loss": 0.9761, "step": 1465 }, { "epoch": 0.01791524989945523, "grad_norm": 2.093020439147949, "learning_rate": 1.7901535461857178e-06, "loss": 1.0694, "step": 1470 }, { "epoch": 0.017976186123603036, "grad_norm": 2.2985405921936035, "learning_rate": 1.796246648793566e-06, "loss": 1.0262, "step": 1475 }, { "epoch": 0.018037122347750845, "grad_norm": 2.9598400592803955, "learning_rate": 1.8023397514014138e-06, "loss": 1.0636, "step": 1480 }, { "epoch": 0.01809805857189865, "grad_norm": 2.2091970443725586, "learning_rate": 1.8084328540092615e-06, "loss": 1.0333, "step": 1485 }, { "epoch": 0.01815899479604646, "grad_norm": 1.9710242748260498, "learning_rate": 1.8145259566171095e-06, "loss": 1.0323, "step": 1490 }, { "epoch": 0.018219931020194265, "grad_norm": 1.913962960243225, "learning_rate": 1.8206190592249575e-06, "loss": 1.0283, "step": 1495 }, { "epoch": 0.01828086724434207, "grad_norm": 2.458481788635254, "learning_rate": 1.8267121618328053e-06, "loss": 1.0416, "step": 1500 }, { "epoch": 0.01834180346848988, "grad_norm": 2.5715088844299316, "learning_rate": 1.8328052644406533e-06, "loss": 1.0262, "step": 1505 }, { "epoch": 0.018402739692637685, "grad_norm": 2.11381196975708, "learning_rate": 1.838898367048501e-06, "loss": 1.001, "step": 1510 }, { "epoch": 0.018463675916785494, "grad_norm": 2.693702220916748, "learning_rate": 1.8449914696563493e-06, "loss": 1.0422, "step": 1515 }, { "epoch": 0.0185246121409333, "grad_norm": 2.185966730117798, "learning_rate": 1.851084572264197e-06, "loss": 1.0243, "step": 1520 }, { "epoch": 0.018585548365081105, "grad_norm": 2.2688236236572266, "learning_rate": 1.8571776748720449e-06, "loss": 0.8776, "step": 1525 }, { "epoch": 0.018646484589228914, "grad_norm": 2.0082101821899414, "learning_rate": 1.863270777479893e-06, "loss": 1.1056, "step": 1530 }, { "epoch": 0.01870742081337672, "grad_norm": 2.3301820755004883, "learning_rate": 1.8693638800877409e-06, "loss": 1.0249, "step": 1535 }, { "epoch": 0.018768357037524528, "grad_norm": 2.063070297241211, "learning_rate": 1.8754569826955886e-06, "loss": 1.0072, "step": 1540 }, { "epoch": 0.018829293261672334, "grad_norm": 2.067974805831909, "learning_rate": 1.8815500853034366e-06, "loss": 0.9603, "step": 1545 }, { "epoch": 0.01889022948582014, "grad_norm": 1.9766230583190918, "learning_rate": 1.8876431879112846e-06, "loss": 0.9737, "step": 1550 }, { "epoch": 0.018951165709967948, "grad_norm": 1.974348783493042, "learning_rate": 1.8937362905191326e-06, "loss": 1.0565, "step": 1555 }, { "epoch": 0.019012101934115753, "grad_norm": 2.2146053314208984, "learning_rate": 1.8998293931269804e-06, "loss": 1.0338, "step": 1560 }, { "epoch": 0.019073038158263562, "grad_norm": 2.068389415740967, "learning_rate": 1.9059224957348282e-06, "loss": 0.9828, "step": 1565 }, { "epoch": 0.019133974382411368, "grad_norm": 2.1320810317993164, "learning_rate": 1.912015598342676e-06, "loss": 1.0178, "step": 1570 }, { "epoch": 0.019194910606559173, "grad_norm": 2.251316785812378, "learning_rate": 1.9181087009505244e-06, "loss": 1.0087, "step": 1575 }, { "epoch": 0.019255846830706982, "grad_norm": 2.7186830043792725, "learning_rate": 1.924201803558372e-06, "loss": 0.9967, "step": 1580 }, { "epoch": 0.019316783054854788, "grad_norm": 1.9373568296432495, "learning_rate": 1.93029490616622e-06, "loss": 1.0071, "step": 1585 }, { "epoch": 0.019377719279002597, "grad_norm": 2.091482400894165, "learning_rate": 1.936388008774068e-06, "loss": 1.0074, "step": 1590 }, { "epoch": 0.019438655503150402, "grad_norm": 2.603739023208618, "learning_rate": 1.942481111381916e-06, "loss": 0.9909, "step": 1595 }, { "epoch": 0.01949959172729821, "grad_norm": 2.0106043815612793, "learning_rate": 1.9485742139897637e-06, "loss": 1.1053, "step": 1600 }, { "epoch": 0.019560527951446017, "grad_norm": 1.9582633972167969, "learning_rate": 1.9546673165976115e-06, "loss": 1.0601, "step": 1605 }, { "epoch": 0.019621464175593822, "grad_norm": 2.062143564224243, "learning_rate": 1.9607604192054597e-06, "loss": 1.0146, "step": 1610 }, { "epoch": 0.01968240039974163, "grad_norm": 2.6619043350219727, "learning_rate": 1.9668535218133075e-06, "loss": 0.978, "step": 1615 }, { "epoch": 0.019743336623889437, "grad_norm": 2.7971179485321045, "learning_rate": 1.9729466244211553e-06, "loss": 1.0115, "step": 1620 }, { "epoch": 0.019804272848037246, "grad_norm": 2.0667920112609863, "learning_rate": 1.979039727029003e-06, "loss": 1.0694, "step": 1625 }, { "epoch": 0.01986520907218505, "grad_norm": 2.109739303588867, "learning_rate": 1.9851328296368513e-06, "loss": 0.9458, "step": 1630 }, { "epoch": 0.019926145296332857, "grad_norm": 1.8992940187454224, "learning_rate": 1.991225932244699e-06, "loss": 0.9726, "step": 1635 }, { "epoch": 0.019987081520480666, "grad_norm": 2.0969045162200928, "learning_rate": 1.997319034852547e-06, "loss": 0.9818, "step": 1640 }, { "epoch": 0.02004801774462847, "grad_norm": 2.665583848953247, "learning_rate": 2.003412137460395e-06, "loss": 1.067, "step": 1645 }, { "epoch": 0.02010895396877628, "grad_norm": 1.9580292701721191, "learning_rate": 2.009505240068243e-06, "loss": 0.9453, "step": 1650 }, { "epoch": 0.020169890192924086, "grad_norm": 2.523287057876587, "learning_rate": 2.0155983426760906e-06, "loss": 1.0232, "step": 1655 }, { "epoch": 0.02023082641707189, "grad_norm": 2.129793405532837, "learning_rate": 2.021691445283939e-06, "loss": 0.9301, "step": 1660 }, { "epoch": 0.0202917626412197, "grad_norm": 1.6810649633407593, "learning_rate": 2.0277845478917866e-06, "loss": 0.9356, "step": 1665 }, { "epoch": 0.020352698865367506, "grad_norm": 2.3393821716308594, "learning_rate": 2.033877650499635e-06, "loss": 0.963, "step": 1670 }, { "epoch": 0.020413635089515315, "grad_norm": 2.338376045227051, "learning_rate": 2.0399707531074826e-06, "loss": 0.9773, "step": 1675 }, { "epoch": 0.02047457131366312, "grad_norm": 1.7810523509979248, "learning_rate": 2.0460638557153304e-06, "loss": 0.9304, "step": 1680 }, { "epoch": 0.020535507537810926, "grad_norm": 2.126671552658081, "learning_rate": 2.0521569583231786e-06, "loss": 1.0284, "step": 1685 }, { "epoch": 0.020596443761958735, "grad_norm": 1.9177496433258057, "learning_rate": 2.0582500609310264e-06, "loss": 0.9453, "step": 1690 }, { "epoch": 0.02065737998610654, "grad_norm": 2.030352830886841, "learning_rate": 2.064343163538874e-06, "loss": 1.0241, "step": 1695 }, { "epoch": 0.02071831621025435, "grad_norm": 2.938796281814575, "learning_rate": 2.070436266146722e-06, "loss": 1.051, "step": 1700 }, { "epoch": 0.020779252434402155, "grad_norm": 2.3332250118255615, "learning_rate": 2.07652936875457e-06, "loss": 1.1166, "step": 1705 }, { "epoch": 0.02084018865854996, "grad_norm": 2.79314923286438, "learning_rate": 2.082622471362418e-06, "loss": 0.919, "step": 1710 }, { "epoch": 0.02090112488269777, "grad_norm": 2.0991342067718506, "learning_rate": 2.0887155739702657e-06, "loss": 1.0394, "step": 1715 }, { "epoch": 0.020962061106845575, "grad_norm": 2.195678472518921, "learning_rate": 2.0948086765781135e-06, "loss": 1.0342, "step": 1720 }, { "epoch": 0.021022997330993384, "grad_norm": 2.4033002853393555, "learning_rate": 2.1009017791859617e-06, "loss": 1.0488, "step": 1725 }, { "epoch": 0.02108393355514119, "grad_norm": 2.109794855117798, "learning_rate": 2.1069948817938095e-06, "loss": 1.034, "step": 1730 }, { "epoch": 0.021144869779288995, "grad_norm": 2.0978713035583496, "learning_rate": 2.1130879844016573e-06, "loss": 1.0333, "step": 1735 }, { "epoch": 0.021205806003436804, "grad_norm": 2.110180616378784, "learning_rate": 2.1191810870095055e-06, "loss": 0.9941, "step": 1740 }, { "epoch": 0.02126674222758461, "grad_norm": 2.585651159286499, "learning_rate": 2.1252741896173533e-06, "loss": 0.9359, "step": 1745 }, { "epoch": 0.021327678451732418, "grad_norm": 2.218932867050171, "learning_rate": 2.1313672922252015e-06, "loss": 1.0427, "step": 1750 }, { "epoch": 0.021388614675880224, "grad_norm": 2.0754952430725098, "learning_rate": 2.1374603948330493e-06, "loss": 1.0203, "step": 1755 }, { "epoch": 0.02144955090002803, "grad_norm": 2.2424516677856445, "learning_rate": 2.143553497440897e-06, "loss": 1.0305, "step": 1760 }, { "epoch": 0.021510487124175838, "grad_norm": 2.2893102169036865, "learning_rate": 2.1496466000487452e-06, "loss": 1.067, "step": 1765 }, { "epoch": 0.021571423348323644, "grad_norm": 2.544826030731201, "learning_rate": 2.155739702656593e-06, "loss": 1.0257, "step": 1770 }, { "epoch": 0.021632359572471452, "grad_norm": 2.2350611686706543, "learning_rate": 2.161832805264441e-06, "loss": 1.0152, "step": 1775 }, { "epoch": 0.021693295796619258, "grad_norm": 2.3671154975891113, "learning_rate": 2.1679259078722886e-06, "loss": 0.9197, "step": 1780 }, { "epoch": 0.021754232020767063, "grad_norm": 2.517324209213257, "learning_rate": 2.174019010480137e-06, "loss": 1.0105, "step": 1785 }, { "epoch": 0.021815168244914872, "grad_norm": 2.3797314167022705, "learning_rate": 2.1801121130879846e-06, "loss": 1.0474, "step": 1790 }, { "epoch": 0.021876104469062678, "grad_norm": 1.9309452772140503, "learning_rate": 2.1862052156958324e-06, "loss": 0.9832, "step": 1795 }, { "epoch": 0.021937040693210487, "grad_norm": 2.355318307876587, "learning_rate": 2.19229831830368e-06, "loss": 1.0377, "step": 1800 }, { "epoch": 0.021997976917358292, "grad_norm": 2.632237434387207, "learning_rate": 2.1983914209115284e-06, "loss": 0.997, "step": 1805 }, { "epoch": 0.0220589131415061, "grad_norm": 2.0991742610931396, "learning_rate": 2.204484523519376e-06, "loss": 1.0495, "step": 1810 }, { "epoch": 0.022119849365653907, "grad_norm": 2.293262004852295, "learning_rate": 2.210577626127224e-06, "loss": 0.9655, "step": 1815 }, { "epoch": 0.022180785589801712, "grad_norm": 1.8352338075637817, "learning_rate": 2.216670728735072e-06, "loss": 0.9825, "step": 1820 }, { "epoch": 0.02224172181394952, "grad_norm": 2.3749592304229736, "learning_rate": 2.22276383134292e-06, "loss": 1.017, "step": 1825 }, { "epoch": 0.022302658038097327, "grad_norm": 2.1249430179595947, "learning_rate": 2.2288569339507677e-06, "loss": 1.0139, "step": 1830 }, { "epoch": 0.022363594262245136, "grad_norm": 1.9212143421173096, "learning_rate": 2.234950036558616e-06, "loss": 1.0048, "step": 1835 }, { "epoch": 0.02242453048639294, "grad_norm": 2.4446098804473877, "learning_rate": 2.2410431391664637e-06, "loss": 1.0233, "step": 1840 }, { "epoch": 0.022485466710540747, "grad_norm": 2.2526278495788574, "learning_rate": 2.247136241774312e-06, "loss": 0.9722, "step": 1845 }, { "epoch": 0.022546402934688556, "grad_norm": 2.4470598697662354, "learning_rate": 2.2532293443821597e-06, "loss": 1.0517, "step": 1850 }, { "epoch": 0.02260733915883636, "grad_norm": 2.3264427185058594, "learning_rate": 2.2593224469900075e-06, "loss": 1.0014, "step": 1855 }, { "epoch": 0.02266827538298417, "grad_norm": 2.2612459659576416, "learning_rate": 2.2654155495978557e-06, "loss": 1.0097, "step": 1860 }, { "epoch": 0.022729211607131976, "grad_norm": 2.4081575870513916, "learning_rate": 2.2715086522057035e-06, "loss": 0.9756, "step": 1865 }, { "epoch": 0.02279014783127978, "grad_norm": 2.112276554107666, "learning_rate": 2.2776017548135512e-06, "loss": 0.9098, "step": 1870 }, { "epoch": 0.02285108405542759, "grad_norm": 2.1384670734405518, "learning_rate": 2.283694857421399e-06, "loss": 0.9074, "step": 1875 }, { "epoch": 0.022912020279575396, "grad_norm": 2.3514392375946045, "learning_rate": 2.2897879600292472e-06, "loss": 1.0623, "step": 1880 }, { "epoch": 0.022972956503723205, "grad_norm": 1.9742062091827393, "learning_rate": 2.295881062637095e-06, "loss": 0.9491, "step": 1885 }, { "epoch": 0.02303389272787101, "grad_norm": 2.2464003562927246, "learning_rate": 2.301974165244943e-06, "loss": 1.0591, "step": 1890 }, { "epoch": 0.023094828952018816, "grad_norm": 2.0313403606414795, "learning_rate": 2.3080672678527906e-06, "loss": 1.0393, "step": 1895 }, { "epoch": 0.023155765176166625, "grad_norm": 2.165633201599121, "learning_rate": 2.3141603704606388e-06, "loss": 1.0491, "step": 1900 }, { "epoch": 0.02321670140031443, "grad_norm": 2.4004745483398438, "learning_rate": 2.3202534730684866e-06, "loss": 1.0293, "step": 1905 }, { "epoch": 0.02327763762446224, "grad_norm": 2.2057719230651855, "learning_rate": 2.3263465756763344e-06, "loss": 0.9931, "step": 1910 }, { "epoch": 0.023338573848610045, "grad_norm": 1.8951270580291748, "learning_rate": 2.3324396782841826e-06, "loss": 0.9654, "step": 1915 }, { "epoch": 0.02339951007275785, "grad_norm": 1.8615295886993408, "learning_rate": 2.3385327808920303e-06, "loss": 0.8745, "step": 1920 }, { "epoch": 0.02346044629690566, "grad_norm": 1.950276255607605, "learning_rate": 2.3446258834998786e-06, "loss": 0.9991, "step": 1925 }, { "epoch": 0.023521382521053465, "grad_norm": 1.9288371801376343, "learning_rate": 2.3507189861077263e-06, "loss": 1.0456, "step": 1930 }, { "epoch": 0.023582318745201274, "grad_norm": 2.0942399501800537, "learning_rate": 2.356812088715574e-06, "loss": 0.9984, "step": 1935 }, { "epoch": 0.02364325496934908, "grad_norm": 1.9294602870941162, "learning_rate": 2.3629051913234223e-06, "loss": 0.9869, "step": 1940 }, { "epoch": 0.023704191193496885, "grad_norm": 2.265928030014038, "learning_rate": 2.36899829393127e-06, "loss": 1.0114, "step": 1945 }, { "epoch": 0.023765127417644694, "grad_norm": 2.385467529296875, "learning_rate": 2.375091396539118e-06, "loss": 1.0161, "step": 1950 }, { "epoch": 0.0238260636417925, "grad_norm": 2.6430206298828125, "learning_rate": 2.3811844991469657e-06, "loss": 0.9921, "step": 1955 }, { "epoch": 0.023886999865940308, "grad_norm": 2.470702886581421, "learning_rate": 2.387277601754814e-06, "loss": 1.0999, "step": 1960 }, { "epoch": 0.023947936090088114, "grad_norm": 2.665325164794922, "learning_rate": 2.3933707043626617e-06, "loss": 1.0468, "step": 1965 }, { "epoch": 0.02400887231423592, "grad_norm": 2.8288443088531494, "learning_rate": 2.3994638069705094e-06, "loss": 1.0545, "step": 1970 }, { "epoch": 0.024069808538383728, "grad_norm": 2.9506235122680664, "learning_rate": 2.4055569095783572e-06, "loss": 0.9598, "step": 1975 }, { "epoch": 0.024130744762531534, "grad_norm": 2.6960349082946777, "learning_rate": 2.4116500121862054e-06, "loss": 0.9539, "step": 1980 }, { "epoch": 0.024191680986679343, "grad_norm": 2.174103021621704, "learning_rate": 2.4177431147940532e-06, "loss": 0.9602, "step": 1985 }, { "epoch": 0.024252617210827148, "grad_norm": 2.36989688873291, "learning_rate": 2.423836217401901e-06, "loss": 1.011, "step": 1990 }, { "epoch": 0.024313553434974954, "grad_norm": 2.268336772918701, "learning_rate": 2.4299293200097492e-06, "loss": 1.0574, "step": 1995 }, { "epoch": 0.024374489659122762, "grad_norm": 2.387397050857544, "learning_rate": 2.436022422617597e-06, "loss": 0.9052, "step": 2000 }, { "epoch": 0.024435425883270568, "grad_norm": 1.9429172277450562, "learning_rate": 2.4421155252254448e-06, "loss": 1.0602, "step": 2005 }, { "epoch": 0.024496362107418377, "grad_norm": 1.9491621255874634, "learning_rate": 2.448208627833293e-06, "loss": 1.0563, "step": 2010 }, { "epoch": 0.024557298331566182, "grad_norm": 2.7708098888397217, "learning_rate": 2.4543017304411408e-06, "loss": 1.036, "step": 2015 }, { "epoch": 0.02461823455571399, "grad_norm": 2.3317649364471436, "learning_rate": 2.460394833048989e-06, "loss": 0.9507, "step": 2020 }, { "epoch": 0.024679170779861797, "grad_norm": 2.088059902191162, "learning_rate": 2.4664879356568368e-06, "loss": 1.0419, "step": 2025 }, { "epoch": 0.024740107004009602, "grad_norm": 2.065762996673584, "learning_rate": 2.4725810382646845e-06, "loss": 0.9725, "step": 2030 }, { "epoch": 0.02480104322815741, "grad_norm": 2.358750581741333, "learning_rate": 2.4786741408725328e-06, "loss": 0.9798, "step": 2035 }, { "epoch": 0.024861979452305217, "grad_norm": 2.5265023708343506, "learning_rate": 2.4847672434803805e-06, "loss": 1.0688, "step": 2040 }, { "epoch": 0.024922915676453026, "grad_norm": 2.7928481101989746, "learning_rate": 2.4908603460882283e-06, "loss": 1.0743, "step": 2045 }, { "epoch": 0.02498385190060083, "grad_norm": 2.2291150093078613, "learning_rate": 2.496953448696076e-06, "loss": 1.0241, "step": 2050 }, { "epoch": 0.025044788124748637, "grad_norm": 1.9996187686920166, "learning_rate": 2.5030465513039243e-06, "loss": 0.9252, "step": 2055 }, { "epoch": 0.025105724348896446, "grad_norm": 1.9106409549713135, "learning_rate": 2.5091396539117717e-06, "loss": 1.0342, "step": 2060 }, { "epoch": 0.02516666057304425, "grad_norm": 2.1948163509368896, "learning_rate": 2.51523275651962e-06, "loss": 1.0662, "step": 2065 }, { "epoch": 0.02522759679719206, "grad_norm": 2.435556650161743, "learning_rate": 2.521325859127468e-06, "loss": 1.0198, "step": 2070 }, { "epoch": 0.025288533021339866, "grad_norm": 2.1847736835479736, "learning_rate": 2.5274189617353154e-06, "loss": 1.0501, "step": 2075 }, { "epoch": 0.02534946924548767, "grad_norm": 2.176088333129883, "learning_rate": 2.5335120643431636e-06, "loss": 0.93, "step": 2080 }, { "epoch": 0.02541040546963548, "grad_norm": 2.130182981491089, "learning_rate": 2.539605166951012e-06, "loss": 1.0308, "step": 2085 }, { "epoch": 0.025471341693783286, "grad_norm": 1.8386199474334717, "learning_rate": 2.5456982695588596e-06, "loss": 0.9776, "step": 2090 }, { "epoch": 0.025532277917931095, "grad_norm": 1.769562840461731, "learning_rate": 2.5517913721667074e-06, "loss": 1.0471, "step": 2095 }, { "epoch": 0.0255932141420789, "grad_norm": 2.182925224304199, "learning_rate": 2.557884474774555e-06, "loss": 1.0642, "step": 2100 }, { "epoch": 0.025654150366226706, "grad_norm": 2.1866960525512695, "learning_rate": 2.5639775773824034e-06, "loss": 1.03, "step": 2105 }, { "epoch": 0.025715086590374515, "grad_norm": 2.3998007774353027, "learning_rate": 2.570070679990251e-06, "loss": 0.9563, "step": 2110 }, { "epoch": 0.02577602281452232, "grad_norm": 1.9971240758895874, "learning_rate": 2.576163782598099e-06, "loss": 0.9561, "step": 2115 }, { "epoch": 0.02583695903867013, "grad_norm": 2.197679042816162, "learning_rate": 2.582256885205947e-06, "loss": 1.0194, "step": 2120 }, { "epoch": 0.025897895262817935, "grad_norm": 2.5285727977752686, "learning_rate": 2.5883499878137954e-06, "loss": 0.9894, "step": 2125 }, { "epoch": 0.02595883148696574, "grad_norm": 2.3421196937561035, "learning_rate": 2.5944430904216428e-06, "loss": 1.0184, "step": 2130 }, { "epoch": 0.02601976771111355, "grad_norm": 2.240675687789917, "learning_rate": 2.600536193029491e-06, "loss": 1.009, "step": 2135 }, { "epoch": 0.026080703935261355, "grad_norm": 2.9012393951416016, "learning_rate": 2.606629295637339e-06, "loss": 1.028, "step": 2140 }, { "epoch": 0.026141640159409164, "grad_norm": 2.1372313499450684, "learning_rate": 2.6127223982451865e-06, "loss": 0.992, "step": 2145 }, { "epoch": 0.02620257638355697, "grad_norm": 2.0786895751953125, "learning_rate": 2.6188155008530347e-06, "loss": 1.0385, "step": 2150 }, { "epoch": 0.026263512607704775, "grad_norm": 2.1354730129241943, "learning_rate": 2.624908603460882e-06, "loss": 0.957, "step": 2155 }, { "epoch": 0.026324448831852584, "grad_norm": 2.154320478439331, "learning_rate": 2.6310017060687303e-06, "loss": 0.981, "step": 2160 }, { "epoch": 0.02638538505600039, "grad_norm": 1.9987454414367676, "learning_rate": 2.6370948086765785e-06, "loss": 0.9224, "step": 2165 }, { "epoch": 0.026446321280148198, "grad_norm": 1.8304270505905151, "learning_rate": 2.6431879112844263e-06, "loss": 0.9761, "step": 2170 }, { "epoch": 0.026507257504296004, "grad_norm": 2.0966954231262207, "learning_rate": 2.649281013892274e-06, "loss": 0.9509, "step": 2175 }, { "epoch": 0.02656819372844381, "grad_norm": 2.2495169639587402, "learning_rate": 2.6553741165001223e-06, "loss": 0.951, "step": 2180 }, { "epoch": 0.026629129952591618, "grad_norm": 2.7553138732910156, "learning_rate": 2.66146721910797e-06, "loss": 1.0669, "step": 2185 }, { "epoch": 0.026690066176739424, "grad_norm": 2.1730611324310303, "learning_rate": 2.667560321715818e-06, "loss": 1.0559, "step": 2190 }, { "epoch": 0.026751002400887233, "grad_norm": 1.9332096576690674, "learning_rate": 2.6736534243236656e-06, "loss": 1.0291, "step": 2195 }, { "epoch": 0.026811938625035038, "grad_norm": 2.211378335952759, "learning_rate": 2.679746526931514e-06, "loss": 1.0355, "step": 2200 }, { "epoch": 0.026872874849182844, "grad_norm": 2.123102903366089, "learning_rate": 2.685839629539362e-06, "loss": 1.072, "step": 2205 }, { "epoch": 0.026933811073330653, "grad_norm": 3.0551986694335938, "learning_rate": 2.6919327321472094e-06, "loss": 0.9782, "step": 2210 }, { "epoch": 0.026994747297478458, "grad_norm": 1.8993085622787476, "learning_rate": 2.6980258347550576e-06, "loss": 0.9996, "step": 2215 }, { "epoch": 0.027055683521626267, "grad_norm": 2.5276944637298584, "learning_rate": 2.704118937362906e-06, "loss": 0.9988, "step": 2220 }, { "epoch": 0.027116619745774072, "grad_norm": 2.1442770957946777, "learning_rate": 2.710212039970753e-06, "loss": 0.9815, "step": 2225 }, { "epoch": 0.02717755596992188, "grad_norm": 2.8174052238464355, "learning_rate": 2.7163051425786014e-06, "loss": 1.021, "step": 2230 }, { "epoch": 0.027238492194069687, "grad_norm": 2.346937417984009, "learning_rate": 2.7223982451864487e-06, "loss": 0.9435, "step": 2235 }, { "epoch": 0.027299428418217492, "grad_norm": 2.795609951019287, "learning_rate": 2.728491347794297e-06, "loss": 1.0437, "step": 2240 }, { "epoch": 0.0273603646423653, "grad_norm": 2.0192434787750244, "learning_rate": 2.734584450402145e-06, "loss": 0.9564, "step": 2245 }, { "epoch": 0.027421300866513107, "grad_norm": 2.262528657913208, "learning_rate": 2.7406775530099925e-06, "loss": 1.0211, "step": 2250 }, { "epoch": 0.027482237090660916, "grad_norm": 2.2595510482788086, "learning_rate": 2.7467706556178407e-06, "loss": 1.053, "step": 2255 }, { "epoch": 0.02754317331480872, "grad_norm": 1.930419683456421, "learning_rate": 2.752863758225689e-06, "loss": 0.9289, "step": 2260 }, { "epoch": 0.027604109538956527, "grad_norm": 2.284479856491089, "learning_rate": 2.7589568608335367e-06, "loss": 1.0631, "step": 2265 }, { "epoch": 0.027665045763104336, "grad_norm": 2.2255213260650635, "learning_rate": 2.7650499634413845e-06, "loss": 1.0861, "step": 2270 }, { "epoch": 0.02772598198725214, "grad_norm": 2.037827968597412, "learning_rate": 2.7711430660492323e-06, "loss": 0.9556, "step": 2275 }, { "epoch": 0.02778691821139995, "grad_norm": 1.827919602394104, "learning_rate": 2.7772361686570805e-06, "loss": 0.9545, "step": 2280 }, { "epoch": 0.027847854435547756, "grad_norm": 2.166940689086914, "learning_rate": 2.7833292712649283e-06, "loss": 1.0313, "step": 2285 }, { "epoch": 0.02790879065969556, "grad_norm": 2.2028262615203857, "learning_rate": 2.789422373872776e-06, "loss": 1.045, "step": 2290 }, { "epoch": 0.02796972688384337, "grad_norm": 1.9552828073501587, "learning_rate": 2.7955154764806243e-06, "loss": 1.0386, "step": 2295 }, { "epoch": 0.028030663107991176, "grad_norm": 1.9248507022857666, "learning_rate": 2.8016085790884725e-06, "loss": 0.9657, "step": 2300 }, { "epoch": 0.028091599332138985, "grad_norm": 1.8788517713546753, "learning_rate": 2.80770168169632e-06, "loss": 0.9194, "step": 2305 }, { "epoch": 0.02815253555628679, "grad_norm": 2.286435127258301, "learning_rate": 2.813794784304168e-06, "loss": 0.996, "step": 2310 }, { "epoch": 0.028213471780434596, "grad_norm": 1.9270886182785034, "learning_rate": 2.8198878869120162e-06, "loss": 1.0927, "step": 2315 }, { "epoch": 0.028274408004582405, "grad_norm": 2.200627326965332, "learning_rate": 2.8259809895198636e-06, "loss": 0.9784, "step": 2320 }, { "epoch": 0.02833534422873021, "grad_norm": 2.188016653060913, "learning_rate": 2.832074092127712e-06, "loss": 1.0213, "step": 2325 }, { "epoch": 0.02839628045287802, "grad_norm": 1.8562135696411133, "learning_rate": 2.838167194735559e-06, "loss": 0.913, "step": 2330 }, { "epoch": 0.028457216677025825, "grad_norm": 2.2547810077667236, "learning_rate": 2.8442602973434074e-06, "loss": 0.9951, "step": 2335 }, { "epoch": 0.02851815290117363, "grad_norm": 2.277519464492798, "learning_rate": 2.8503533999512556e-06, "loss": 1.0036, "step": 2340 }, { "epoch": 0.02857908912532144, "grad_norm": 2.3322670459747314, "learning_rate": 2.8564465025591034e-06, "loss": 0.9322, "step": 2345 }, { "epoch": 0.028640025349469245, "grad_norm": 2.2743523120880127, "learning_rate": 2.862539605166951e-06, "loss": 1.0014, "step": 2350 }, { "epoch": 0.028700961573617054, "grad_norm": 2.2133679389953613, "learning_rate": 2.8686327077747994e-06, "loss": 0.9215, "step": 2355 }, { "epoch": 0.02876189779776486, "grad_norm": 2.206085205078125, "learning_rate": 2.874725810382647e-06, "loss": 0.9619, "step": 2360 }, { "epoch": 0.028822834021912665, "grad_norm": 2.1240456104278564, "learning_rate": 2.880818912990495e-06, "loss": 1.0751, "step": 2365 }, { "epoch": 0.028883770246060474, "grad_norm": 2.0248773097991943, "learning_rate": 2.8869120155983427e-06, "loss": 1.0167, "step": 2370 }, { "epoch": 0.02894470647020828, "grad_norm": 2.4684841632843018, "learning_rate": 2.893005118206191e-06, "loss": 1.0876, "step": 2375 }, { "epoch": 0.029005642694356088, "grad_norm": 1.9347139596939087, "learning_rate": 2.8990982208140387e-06, "loss": 0.9139, "step": 2380 }, { "epoch": 0.029066578918503894, "grad_norm": 2.1059746742248535, "learning_rate": 2.9051913234218865e-06, "loss": 1.0649, "step": 2385 }, { "epoch": 0.0291275151426517, "grad_norm": 2.7258999347686768, "learning_rate": 2.9112844260297347e-06, "loss": 1.0189, "step": 2390 }, { "epoch": 0.029188451366799508, "grad_norm": 2.376356840133667, "learning_rate": 2.917377528637583e-06, "loss": 0.99, "step": 2395 }, { "epoch": 0.029249387590947314, "grad_norm": 2.8676633834838867, "learning_rate": 2.9234706312454303e-06, "loss": 0.9412, "step": 2400 }, { "epoch": 0.029310323815095123, "grad_norm": 2.045501947402954, "learning_rate": 2.9295637338532785e-06, "loss": 0.9955, "step": 2405 }, { "epoch": 0.029371260039242928, "grad_norm": 2.0736703872680664, "learning_rate": 2.935656836461126e-06, "loss": 0.966, "step": 2410 }, { "epoch": 0.029432196263390734, "grad_norm": 2.287158966064453, "learning_rate": 2.941749939068974e-06, "loss": 1.0245, "step": 2415 }, { "epoch": 0.029493132487538543, "grad_norm": 2.667834520339966, "learning_rate": 2.9478430416768222e-06, "loss": 0.9921, "step": 2420 }, { "epoch": 0.029554068711686348, "grad_norm": 2.1947782039642334, "learning_rate": 2.9539361442846696e-06, "loss": 1.0355, "step": 2425 }, { "epoch": 0.029615004935834157, "grad_norm": 1.9532451629638672, "learning_rate": 2.960029246892518e-06, "loss": 0.9126, "step": 2430 }, { "epoch": 0.029675941159981963, "grad_norm": 2.0545241832733154, "learning_rate": 2.966122349500366e-06, "loss": 0.929, "step": 2435 }, { "epoch": 0.02973687738412977, "grad_norm": 1.8839963674545288, "learning_rate": 2.972215452108214e-06, "loss": 0.9181, "step": 2440 }, { "epoch": 0.029797813608277577, "grad_norm": 2.097207546234131, "learning_rate": 2.9783085547160616e-06, "loss": 1.0174, "step": 2445 }, { "epoch": 0.029858749832425382, "grad_norm": 2.303065299987793, "learning_rate": 2.9844016573239098e-06, "loss": 0.9257, "step": 2450 }, { "epoch": 0.02991968605657319, "grad_norm": 2.492689371109009, "learning_rate": 2.9904947599317576e-06, "loss": 0.9643, "step": 2455 }, { "epoch": 0.029980622280720997, "grad_norm": 2.281097650527954, "learning_rate": 2.9965878625396054e-06, "loss": 1.0033, "step": 2460 }, { "epoch": 0.030041558504868806, "grad_norm": 1.7570304870605469, "learning_rate": 3.002680965147453e-06, "loss": 0.973, "step": 2465 }, { "epoch": 0.03010249472901661, "grad_norm": 2.135772943496704, "learning_rate": 3.0087740677553013e-06, "loss": 0.9978, "step": 2470 }, { "epoch": 0.030163430953164417, "grad_norm": 2.3887343406677246, "learning_rate": 3.0148671703631495e-06, "loss": 1.0234, "step": 2475 }, { "epoch": 0.030224367177312226, "grad_norm": 2.1010143756866455, "learning_rate": 3.020960272970997e-06, "loss": 0.9779, "step": 2480 }, { "epoch": 0.03028530340146003, "grad_norm": 2.1577558517456055, "learning_rate": 3.027053375578845e-06, "loss": 1.0211, "step": 2485 }, { "epoch": 0.03034623962560784, "grad_norm": 2.1158089637756348, "learning_rate": 3.0331464781866933e-06, "loss": 0.9374, "step": 2490 }, { "epoch": 0.030407175849755646, "grad_norm": 2.3055999279022217, "learning_rate": 3.0392395807945407e-06, "loss": 1.0662, "step": 2495 }, { "epoch": 0.03046811207390345, "grad_norm": 3.1563639640808105, "learning_rate": 3.045332683402389e-06, "loss": 1.0652, "step": 2500 }, { "epoch": 0.03052904829805126, "grad_norm": 1.8089172840118408, "learning_rate": 3.0514257860102362e-06, "loss": 1.0521, "step": 2505 }, { "epoch": 0.030589984522199066, "grad_norm": 2.4611685276031494, "learning_rate": 3.0575188886180845e-06, "loss": 0.9869, "step": 2510 }, { "epoch": 0.030650920746346875, "grad_norm": 2.248232841491699, "learning_rate": 3.0636119912259327e-06, "loss": 1.0038, "step": 2515 }, { "epoch": 0.03071185697049468, "grad_norm": 2.3844971656799316, "learning_rate": 3.0697050938337804e-06, "loss": 1.0952, "step": 2520 }, { "epoch": 0.030772793194642486, "grad_norm": 2.2184131145477295, "learning_rate": 3.0757981964416282e-06, "loss": 0.949, "step": 2525 }, { "epoch": 0.030833729418790295, "grad_norm": 2.668379306793213, "learning_rate": 3.0818912990494764e-06, "loss": 1.0569, "step": 2530 }, { "epoch": 0.0308946656429381, "grad_norm": 2.4427247047424316, "learning_rate": 3.0879844016573242e-06, "loss": 0.9801, "step": 2535 }, { "epoch": 0.03095560186708591, "grad_norm": 2.3734755516052246, "learning_rate": 3.094077504265172e-06, "loss": 0.9449, "step": 2540 }, { "epoch": 0.031016538091233715, "grad_norm": 2.1593384742736816, "learning_rate": 3.1001706068730198e-06, "loss": 1.0264, "step": 2545 }, { "epoch": 0.03107747431538152, "grad_norm": 2.1619675159454346, "learning_rate": 3.106263709480868e-06, "loss": 0.9412, "step": 2550 }, { "epoch": 0.03113841053952933, "grad_norm": 1.8880919218063354, "learning_rate": 3.1123568120887158e-06, "loss": 0.9926, "step": 2555 }, { "epoch": 0.031199346763677135, "grad_norm": 2.076702356338501, "learning_rate": 3.1184499146965636e-06, "loss": 0.9591, "step": 2560 }, { "epoch": 0.03126028298782494, "grad_norm": 2.0924417972564697, "learning_rate": 3.1245430173044118e-06, "loss": 0.9285, "step": 2565 }, { "epoch": 0.03132121921197275, "grad_norm": 2.012033700942993, "learning_rate": 3.13063611991226e-06, "loss": 0.9373, "step": 2570 }, { "epoch": 0.03138215543612056, "grad_norm": 2.2608845233917236, "learning_rate": 3.1367292225201073e-06, "loss": 0.9826, "step": 2575 }, { "epoch": 0.03144309166026836, "grad_norm": 2.2821028232574463, "learning_rate": 3.1428223251279555e-06, "loss": 0.9538, "step": 2580 }, { "epoch": 0.03150402788441617, "grad_norm": 2.146832227706909, "learning_rate": 3.148915427735803e-06, "loss": 0.9872, "step": 2585 }, { "epoch": 0.03156496410856398, "grad_norm": 2.4763898849487305, "learning_rate": 3.155008530343651e-06, "loss": 0.935, "step": 2590 }, { "epoch": 0.03162590033271179, "grad_norm": 2.2204864025115967, "learning_rate": 3.1611016329514993e-06, "loss": 0.9579, "step": 2595 }, { "epoch": 0.03168683655685959, "grad_norm": 2.352337121963501, "learning_rate": 3.1671947355593467e-06, "loss": 0.9039, "step": 2600 }, { "epoch": 0.0317477727810074, "grad_norm": 2.260850667953491, "learning_rate": 3.173287838167195e-06, "loss": 0.9286, "step": 2605 }, { "epoch": 0.03180870900515521, "grad_norm": 2.0969839096069336, "learning_rate": 3.179380940775043e-06, "loss": 1.0614, "step": 2610 }, { "epoch": 0.03186964522930301, "grad_norm": 2.3220322132110596, "learning_rate": 3.185474043382891e-06, "loss": 0.9743, "step": 2615 }, { "epoch": 0.03193058145345082, "grad_norm": 2.086827278137207, "learning_rate": 3.1915671459907387e-06, "loss": 0.9831, "step": 2620 }, { "epoch": 0.03199151767759863, "grad_norm": 2.0936524868011475, "learning_rate": 3.197660248598587e-06, "loss": 0.9659, "step": 2625 }, { "epoch": 0.03205245390174643, "grad_norm": 2.120546340942383, "learning_rate": 3.2037533512064346e-06, "loss": 1.0107, "step": 2630 }, { "epoch": 0.03211339012589424, "grad_norm": 2.048081159591675, "learning_rate": 3.2098464538142824e-06, "loss": 0.9435, "step": 2635 }, { "epoch": 0.03217432635004205, "grad_norm": 2.075256586074829, "learning_rate": 3.2159395564221302e-06, "loss": 0.9888, "step": 2640 }, { "epoch": 0.032235262574189856, "grad_norm": 2.565420627593994, "learning_rate": 3.2220326590299784e-06, "loss": 0.9493, "step": 2645 }, { "epoch": 0.03229619879833766, "grad_norm": 2.0619983673095703, "learning_rate": 3.2281257616378266e-06, "loss": 0.9713, "step": 2650 }, { "epoch": 0.03235713502248547, "grad_norm": 2.8184762001037598, "learning_rate": 3.234218864245674e-06, "loss": 1.0631, "step": 2655 }, { "epoch": 0.032418071246633276, "grad_norm": 2.646563768386841, "learning_rate": 3.240311966853522e-06, "loss": 1.052, "step": 2660 }, { "epoch": 0.03247900747078108, "grad_norm": 2.1804420948028564, "learning_rate": 3.2464050694613704e-06, "loss": 1.0081, "step": 2665 }, { "epoch": 0.03253994369492889, "grad_norm": 1.986843228340149, "learning_rate": 3.2524981720692178e-06, "loss": 1.003, "step": 2670 }, { "epoch": 0.032600879919076696, "grad_norm": 2.127923011779785, "learning_rate": 3.258591274677066e-06, "loss": 0.8921, "step": 2675 }, { "epoch": 0.0326618161432245, "grad_norm": 2.0358736515045166, "learning_rate": 3.2646843772849133e-06, "loss": 0.982, "step": 2680 }, { "epoch": 0.03272275236737231, "grad_norm": 2.3029143810272217, "learning_rate": 3.2707774798927615e-06, "loss": 0.9776, "step": 2685 }, { "epoch": 0.032783688591520116, "grad_norm": 1.891542673110962, "learning_rate": 3.2768705825006097e-06, "loss": 0.9802, "step": 2690 }, { "epoch": 0.032844624815667925, "grad_norm": 2.1439285278320312, "learning_rate": 3.282963685108457e-06, "loss": 0.9578, "step": 2695 }, { "epoch": 0.03290556103981573, "grad_norm": 2.8776235580444336, "learning_rate": 3.2890567877163053e-06, "loss": 1.0314, "step": 2700 }, { "epoch": 0.032966497263963536, "grad_norm": 2.1541643142700195, "learning_rate": 3.2951498903241535e-06, "loss": 1.016, "step": 2705 }, { "epoch": 0.033027433488111345, "grad_norm": 2.295881748199463, "learning_rate": 3.3012429929320013e-06, "loss": 1.0129, "step": 2710 }, { "epoch": 0.03308836971225915, "grad_norm": 2.0148978233337402, "learning_rate": 3.307336095539849e-06, "loss": 1.0616, "step": 2715 }, { "epoch": 0.033149305936406956, "grad_norm": 2.4073879718780518, "learning_rate": 3.313429198147697e-06, "loss": 0.995, "step": 2720 }, { "epoch": 0.033210242160554765, "grad_norm": 2.1058967113494873, "learning_rate": 3.319522300755545e-06, "loss": 0.9545, "step": 2725 }, { "epoch": 0.03327117838470257, "grad_norm": 2.153179883956909, "learning_rate": 3.325615403363393e-06, "loss": 0.9565, "step": 2730 }, { "epoch": 0.033332114608850376, "grad_norm": 1.979596495628357, "learning_rate": 3.3317085059712406e-06, "loss": 0.9892, "step": 2735 }, { "epoch": 0.033393050832998185, "grad_norm": 2.141157865524292, "learning_rate": 3.337801608579089e-06, "loss": 0.993, "step": 2740 }, { "epoch": 0.033453987057145994, "grad_norm": 2.2896928787231445, "learning_rate": 3.343894711186937e-06, "loss": 1.0202, "step": 2745 }, { "epoch": 0.033514923281293796, "grad_norm": 2.1105523109436035, "learning_rate": 3.3499878137947844e-06, "loss": 1.0113, "step": 2750 }, { "epoch": 0.033575859505441605, "grad_norm": 2.4547581672668457, "learning_rate": 3.3560809164026326e-06, "loss": 0.9648, "step": 2755 }, { "epoch": 0.033636795729589414, "grad_norm": 2.3159687519073486, "learning_rate": 3.362174019010481e-06, "loss": 0.9765, "step": 2760 }, { "epoch": 0.033697731953737216, "grad_norm": 2.124898672103882, "learning_rate": 3.368267121618328e-06, "loss": 0.9142, "step": 2765 }, { "epoch": 0.033758668177885025, "grad_norm": 2.2495102882385254, "learning_rate": 3.3743602242261764e-06, "loss": 1.0176, "step": 2770 }, { "epoch": 0.033819604402032834, "grad_norm": 2.086731195449829, "learning_rate": 3.3804533268340238e-06, "loss": 0.9393, "step": 2775 }, { "epoch": 0.03388054062618064, "grad_norm": 2.208688735961914, "learning_rate": 3.386546429441872e-06, "loss": 0.9969, "step": 2780 }, { "epoch": 0.033941476850328445, "grad_norm": 2.003089189529419, "learning_rate": 3.39263953204972e-06, "loss": 0.9931, "step": 2785 }, { "epoch": 0.034002413074476254, "grad_norm": 2.3607048988342285, "learning_rate": 3.398732634657568e-06, "loss": 0.9739, "step": 2790 }, { "epoch": 0.03406334929862406, "grad_norm": 2.6090104579925537, "learning_rate": 3.4048257372654157e-06, "loss": 0.9538, "step": 2795 }, { "epoch": 0.034124285522771865, "grad_norm": 2.322404384613037, "learning_rate": 3.410918839873264e-06, "loss": 0.9932, "step": 2800 }, { "epoch": 0.034185221746919674, "grad_norm": 1.8748831748962402, "learning_rate": 3.4170119424811117e-06, "loss": 0.9287, "step": 2805 }, { "epoch": 0.03424615797106748, "grad_norm": 1.9689679145812988, "learning_rate": 3.4231050450889595e-06, "loss": 1.0296, "step": 2810 }, { "epoch": 0.034307094195215285, "grad_norm": 2.209704637527466, "learning_rate": 3.4291981476968073e-06, "loss": 0.947, "step": 2815 }, { "epoch": 0.034368030419363094, "grad_norm": 2.4867334365844727, "learning_rate": 3.4352912503046555e-06, "loss": 1.003, "step": 2820 }, { "epoch": 0.0344289666435109, "grad_norm": 2.2483296394348145, "learning_rate": 3.4413843529125037e-06, "loss": 0.9411, "step": 2825 }, { "epoch": 0.03448990286765871, "grad_norm": 2.6648635864257812, "learning_rate": 3.447477455520351e-06, "loss": 0.9359, "step": 2830 }, { "epoch": 0.034550839091806514, "grad_norm": 3.0203793048858643, "learning_rate": 3.4535705581281993e-06, "loss": 1.0274, "step": 2835 }, { "epoch": 0.03461177531595432, "grad_norm": 2.0615806579589844, "learning_rate": 3.4596636607360475e-06, "loss": 0.9854, "step": 2840 }, { "epoch": 0.03467271154010213, "grad_norm": 2.0243630409240723, "learning_rate": 3.465756763343895e-06, "loss": 0.9909, "step": 2845 }, { "epoch": 0.034733647764249934, "grad_norm": 2.040055990219116, "learning_rate": 3.471849865951743e-06, "loss": 1.0091, "step": 2850 }, { "epoch": 0.03479458398839774, "grad_norm": 2.1747169494628906, "learning_rate": 3.4779429685595904e-06, "loss": 0.9618, "step": 2855 }, { "epoch": 0.03485552021254555, "grad_norm": 2.4057774543762207, "learning_rate": 3.4840360711674386e-06, "loss": 0.9201, "step": 2860 }, { "epoch": 0.034916456436693354, "grad_norm": 2.160696268081665, "learning_rate": 3.490129173775287e-06, "loss": 1.0395, "step": 2865 }, { "epoch": 0.03497739266084116, "grad_norm": 2.3195247650146484, "learning_rate": 3.496222276383134e-06, "loss": 0.9941, "step": 2870 }, { "epoch": 0.03503832888498897, "grad_norm": 2.482804298400879, "learning_rate": 3.5023153789909824e-06, "loss": 1.0124, "step": 2875 }, { "epoch": 0.03509926510913678, "grad_norm": 2.1377921104431152, "learning_rate": 3.5084084815988306e-06, "loss": 1.0243, "step": 2880 }, { "epoch": 0.03516020133328458, "grad_norm": 1.9905128479003906, "learning_rate": 3.5145015842066784e-06, "loss": 0.9688, "step": 2885 }, { "epoch": 0.03522113755743239, "grad_norm": 2.2255070209503174, "learning_rate": 3.520594686814526e-06, "loss": 1.0362, "step": 2890 }, { "epoch": 0.0352820737815802, "grad_norm": 1.9598385095596313, "learning_rate": 3.526687789422374e-06, "loss": 0.9627, "step": 2895 }, { "epoch": 0.035343010005728, "grad_norm": 2.778237819671631, "learning_rate": 3.532780892030222e-06, "loss": 0.9537, "step": 2900 }, { "epoch": 0.03540394622987581, "grad_norm": 2.448702096939087, "learning_rate": 3.53887399463807e-06, "loss": 1.0089, "step": 2905 }, { "epoch": 0.03546488245402362, "grad_norm": 2.404555559158325, "learning_rate": 3.5449670972459177e-06, "loss": 0.8844, "step": 2910 }, { "epoch": 0.03552581867817142, "grad_norm": 1.9399135112762451, "learning_rate": 3.551060199853766e-06, "loss": 0.9645, "step": 2915 }, { "epoch": 0.03558675490231923, "grad_norm": 2.256913185119629, "learning_rate": 3.557153302461614e-06, "loss": 0.9824, "step": 2920 }, { "epoch": 0.03564769112646704, "grad_norm": 2.1390554904937744, "learning_rate": 3.5632464050694615e-06, "loss": 0.953, "step": 2925 }, { "epoch": 0.03570862735061485, "grad_norm": 2.4275834560394287, "learning_rate": 3.5693395076773097e-06, "loss": 0.9326, "step": 2930 }, { "epoch": 0.03576956357476265, "grad_norm": 2.0375444889068604, "learning_rate": 3.575432610285158e-06, "loss": 0.9692, "step": 2935 }, { "epoch": 0.03583049979891046, "grad_norm": 2.1449596881866455, "learning_rate": 3.5815257128930053e-06, "loss": 0.9713, "step": 2940 }, { "epoch": 0.03589143602305827, "grad_norm": 2.028691530227661, "learning_rate": 3.5876188155008535e-06, "loss": 0.9865, "step": 2945 }, { "epoch": 0.03595237224720607, "grad_norm": 2.4232821464538574, "learning_rate": 3.593711918108701e-06, "loss": 1.003, "step": 2950 }, { "epoch": 0.03601330847135388, "grad_norm": 2.5633482933044434, "learning_rate": 3.599805020716549e-06, "loss": 1.0376, "step": 2955 }, { "epoch": 0.03607424469550169, "grad_norm": 2.181976795196533, "learning_rate": 3.6058981233243972e-06, "loss": 0.959, "step": 2960 }, { "epoch": 0.0361351809196495, "grad_norm": 1.9720872640609741, "learning_rate": 3.611991225932245e-06, "loss": 0.9347, "step": 2965 }, { "epoch": 0.0361961171437973, "grad_norm": 2.136627435684204, "learning_rate": 3.618084328540093e-06, "loss": 0.9941, "step": 2970 }, { "epoch": 0.03625705336794511, "grad_norm": 2.159546375274658, "learning_rate": 3.624177431147941e-06, "loss": 0.9958, "step": 2975 }, { "epoch": 0.03631798959209292, "grad_norm": 2.227966785430908, "learning_rate": 3.630270533755789e-06, "loss": 0.9091, "step": 2980 }, { "epoch": 0.03637892581624072, "grad_norm": 1.8551913499832153, "learning_rate": 3.6363636363636366e-06, "loss": 0.9772, "step": 2985 }, { "epoch": 0.03643986204038853, "grad_norm": 2.7564539909362793, "learning_rate": 3.6424567389714844e-06, "loss": 0.9693, "step": 2990 }, { "epoch": 0.03650079826453634, "grad_norm": 2.2944040298461914, "learning_rate": 3.6485498415793326e-06, "loss": 1.0218, "step": 2995 }, { "epoch": 0.03656173448868414, "grad_norm": 2.1678144931793213, "learning_rate": 3.6546429441871808e-06, "loss": 0.8956, "step": 3000 }, { "epoch": 0.03662267071283195, "grad_norm": 2.3455443382263184, "learning_rate": 3.660736046795028e-06, "loss": 0.9831, "step": 3005 }, { "epoch": 0.03668360693697976, "grad_norm": 2.401094675064087, "learning_rate": 3.6668291494028763e-06, "loss": 1.0464, "step": 3010 }, { "epoch": 0.03674454316112757, "grad_norm": 2.194732666015625, "learning_rate": 3.6729222520107246e-06, "loss": 1.0374, "step": 3015 }, { "epoch": 0.03680547938527537, "grad_norm": 1.8668485879898071, "learning_rate": 3.679015354618572e-06, "loss": 0.9479, "step": 3020 }, { "epoch": 0.03686641560942318, "grad_norm": 2.312548875808716, "learning_rate": 3.68510845722642e-06, "loss": 1.0261, "step": 3025 }, { "epoch": 0.03692735183357099, "grad_norm": 2.1654229164123535, "learning_rate": 3.6912015598342675e-06, "loss": 1.0264, "step": 3030 }, { "epoch": 0.03698828805771879, "grad_norm": 2.8935611248016357, "learning_rate": 3.6972946624421157e-06, "loss": 1.0208, "step": 3035 }, { "epoch": 0.0370492242818666, "grad_norm": 2.498549222946167, "learning_rate": 3.703387765049964e-06, "loss": 0.9293, "step": 3040 }, { "epoch": 0.03711016050601441, "grad_norm": 2.057310104370117, "learning_rate": 3.7094808676578113e-06, "loss": 1.0065, "step": 3045 }, { "epoch": 0.03717109673016221, "grad_norm": 1.9738004207611084, "learning_rate": 3.7155739702656595e-06, "loss": 0.9666, "step": 3050 }, { "epoch": 0.03723203295431002, "grad_norm": 2.4071245193481445, "learning_rate": 3.7216670728735077e-06, "loss": 0.949, "step": 3055 }, { "epoch": 0.03729296917845783, "grad_norm": 2.1883621215820312, "learning_rate": 3.7277601754813555e-06, "loss": 0.9002, "step": 3060 }, { "epoch": 0.037353905402605636, "grad_norm": 2.0784788131713867, "learning_rate": 3.7338532780892032e-06, "loss": 0.9437, "step": 3065 }, { "epoch": 0.03741484162675344, "grad_norm": 2.286177635192871, "learning_rate": 3.7399463806970514e-06, "loss": 0.9143, "step": 3070 }, { "epoch": 0.03747577785090125, "grad_norm": 2.2387280464172363, "learning_rate": 3.7460394833048992e-06, "loss": 0.8802, "step": 3075 }, { "epoch": 0.037536714075049056, "grad_norm": 2.0129218101501465, "learning_rate": 3.752132585912747e-06, "loss": 1.0686, "step": 3080 }, { "epoch": 0.03759765029919686, "grad_norm": 1.9732598066329956, "learning_rate": 3.758225688520595e-06, "loss": 1.0329, "step": 3085 }, { "epoch": 0.03765858652334467, "grad_norm": 2.4990298748016357, "learning_rate": 3.764318791128443e-06, "loss": 0.9792, "step": 3090 }, { "epoch": 0.037719522747492476, "grad_norm": 2.3186347484588623, "learning_rate": 3.770411893736291e-06, "loss": 0.9612, "step": 3095 }, { "epoch": 0.03778045897164028, "grad_norm": 2.24633526802063, "learning_rate": 3.7765049963441386e-06, "loss": 0.9779, "step": 3100 }, { "epoch": 0.03784139519578809, "grad_norm": 2.543046236038208, "learning_rate": 3.7825980989519868e-06, "loss": 0.9013, "step": 3105 }, { "epoch": 0.037902331419935896, "grad_norm": 2.3290164470672607, "learning_rate": 3.788691201559835e-06, "loss": 0.9186, "step": 3110 }, { "epoch": 0.037963267644083705, "grad_norm": 1.947358250617981, "learning_rate": 3.7947843041676823e-06, "loss": 0.9923, "step": 3115 }, { "epoch": 0.03802420386823151, "grad_norm": 2.4613454341888428, "learning_rate": 3.8008774067755305e-06, "loss": 1.0181, "step": 3120 }, { "epoch": 0.038085140092379316, "grad_norm": 2.327993392944336, "learning_rate": 3.806970509383378e-06, "loss": 1.0045, "step": 3125 }, { "epoch": 0.038146076316527125, "grad_norm": 1.9370499849319458, "learning_rate": 3.813063611991226e-06, "loss": 0.999, "step": 3130 }, { "epoch": 0.03820701254067493, "grad_norm": 1.956012487411499, "learning_rate": 3.819156714599074e-06, "loss": 0.9771, "step": 3135 }, { "epoch": 0.038267948764822736, "grad_norm": 2.199153423309326, "learning_rate": 3.825249817206922e-06, "loss": 1.0894, "step": 3140 }, { "epoch": 0.038328884988970545, "grad_norm": 2.582002639770508, "learning_rate": 3.83134291981477e-06, "loss": 0.9174, "step": 3145 }, { "epoch": 0.03838982121311835, "grad_norm": 1.9262511730194092, "learning_rate": 3.837436022422618e-06, "loss": 0.9406, "step": 3150 }, { "epoch": 0.038450757437266156, "grad_norm": 2.1835310459136963, "learning_rate": 3.8435291250304655e-06, "loss": 0.9645, "step": 3155 }, { "epoch": 0.038511693661413965, "grad_norm": 2.321536064147949, "learning_rate": 3.849622227638314e-06, "loss": 0.9743, "step": 3160 }, { "epoch": 0.038572629885561774, "grad_norm": 2.474736452102661, "learning_rate": 3.855715330246161e-06, "loss": 0.9007, "step": 3165 }, { "epoch": 0.038633566109709576, "grad_norm": 2.053748369216919, "learning_rate": 3.861808432854009e-06, "loss": 0.9968, "step": 3170 }, { "epoch": 0.038694502333857385, "grad_norm": 2.338757038116455, "learning_rate": 3.8679015354618574e-06, "loss": 0.9297, "step": 3175 }, { "epoch": 0.038755438558005194, "grad_norm": 2.0456247329711914, "learning_rate": 3.873994638069705e-06, "loss": 0.9936, "step": 3180 }, { "epoch": 0.038816374782152996, "grad_norm": 2.0016770362854004, "learning_rate": 3.880087740677553e-06, "loss": 0.9804, "step": 3185 }, { "epoch": 0.038877311006300805, "grad_norm": 2.197876453399658, "learning_rate": 3.886180843285401e-06, "loss": 0.9676, "step": 3190 }, { "epoch": 0.038938247230448614, "grad_norm": 3.3914897441864014, "learning_rate": 3.892273945893249e-06, "loss": 1.0129, "step": 3195 }, { "epoch": 0.03899918345459642, "grad_norm": 1.9275579452514648, "learning_rate": 3.898367048501097e-06, "loss": 0.8939, "step": 3200 }, { "epoch": 0.039060119678744225, "grad_norm": 2.2062485218048096, "learning_rate": 3.904460151108945e-06, "loss": 0.9657, "step": 3205 }, { "epoch": 0.039121055902892034, "grad_norm": 1.8917139768600464, "learning_rate": 3.910553253716793e-06, "loss": 0.8903, "step": 3210 }, { "epoch": 0.03918199212703984, "grad_norm": 1.853278398513794, "learning_rate": 3.9166463563246405e-06, "loss": 0.9665, "step": 3215 }, { "epoch": 0.039242928351187645, "grad_norm": 2.230132818222046, "learning_rate": 3.922739458932489e-06, "loss": 0.9857, "step": 3220 }, { "epoch": 0.039303864575335454, "grad_norm": 2.098845958709717, "learning_rate": 3.928832561540337e-06, "loss": 1.0358, "step": 3225 }, { "epoch": 0.03936480079948326, "grad_norm": 1.9850544929504395, "learning_rate": 3.934925664148185e-06, "loss": 1.0164, "step": 3230 }, { "epoch": 0.039425737023631065, "grad_norm": 2.227130889892578, "learning_rate": 3.9410187667560325e-06, "loss": 0.9151, "step": 3235 }, { "epoch": 0.039486673247778874, "grad_norm": 2.115588665008545, "learning_rate": 3.947111869363881e-06, "loss": 0.9676, "step": 3240 }, { "epoch": 0.03954760947192668, "grad_norm": 2.2951948642730713, "learning_rate": 3.953204971971729e-06, "loss": 0.9895, "step": 3245 }, { "epoch": 0.03960854569607449, "grad_norm": 1.8308266401290894, "learning_rate": 3.959298074579576e-06, "loss": 0.9649, "step": 3250 }, { "epoch": 0.039669481920222294, "grad_norm": 2.0787739753723145, "learning_rate": 3.9653911771874245e-06, "loss": 0.9065, "step": 3255 }, { "epoch": 0.0397304181443701, "grad_norm": 2.0929501056671143, "learning_rate": 3.971484279795272e-06, "loss": 1.0112, "step": 3260 }, { "epoch": 0.03979135436851791, "grad_norm": 1.903670072555542, "learning_rate": 3.97757738240312e-06, "loss": 0.9531, "step": 3265 }, { "epoch": 0.039852290592665714, "grad_norm": 2.2818386554718018, "learning_rate": 3.983670485010968e-06, "loss": 0.9793, "step": 3270 }, { "epoch": 0.03991322681681352, "grad_norm": 2.062208890914917, "learning_rate": 3.989763587618816e-06, "loss": 1.0012, "step": 3275 }, { "epoch": 0.03997416304096133, "grad_norm": 2.117384672164917, "learning_rate": 3.995856690226664e-06, "loss": 0.9523, "step": 3280 }, { "epoch": 0.040035099265109134, "grad_norm": 1.9325432777404785, "learning_rate": 4.001949792834512e-06, "loss": 0.9046, "step": 3285 }, { "epoch": 0.04009603548925694, "grad_norm": 2.1204543113708496, "learning_rate": 4.008042895442359e-06, "loss": 0.9965, "step": 3290 }, { "epoch": 0.04015697171340475, "grad_norm": 2.6637449264526367, "learning_rate": 4.014135998050208e-06, "loss": 0.9629, "step": 3295 }, { "epoch": 0.04021790793755256, "grad_norm": 2.0179738998413086, "learning_rate": 4.020229100658055e-06, "loss": 0.9383, "step": 3300 }, { "epoch": 0.04027884416170036, "grad_norm": 1.9468209743499756, "learning_rate": 4.026322203265903e-06, "loss": 1.0108, "step": 3305 }, { "epoch": 0.04033978038584817, "grad_norm": 2.163207530975342, "learning_rate": 4.032415305873751e-06, "loss": 0.9705, "step": 3310 }, { "epoch": 0.04040071660999598, "grad_norm": 2.0922605991363525, "learning_rate": 4.038508408481599e-06, "loss": 0.9404, "step": 3315 }, { "epoch": 0.04046165283414378, "grad_norm": 1.8850396871566772, "learning_rate": 4.044601511089447e-06, "loss": 0.9281, "step": 3320 }, { "epoch": 0.04052258905829159, "grad_norm": 2.0032474994659424, "learning_rate": 4.050694613697295e-06, "loss": 1.0522, "step": 3325 }, { "epoch": 0.0405835252824394, "grad_norm": 2.505028247833252, "learning_rate": 4.0567877163051425e-06, "loss": 0.934, "step": 3330 }, { "epoch": 0.0406444615065872, "grad_norm": 2.3225080966949463, "learning_rate": 4.062880818912991e-06, "loss": 1.0031, "step": 3335 }, { "epoch": 0.04070539773073501, "grad_norm": 2.423830270767212, "learning_rate": 4.068973921520838e-06, "loss": 0.9495, "step": 3340 }, { "epoch": 0.04076633395488282, "grad_norm": 2.38288950920105, "learning_rate": 4.075067024128686e-06, "loss": 0.9724, "step": 3345 }, { "epoch": 0.04082727017903063, "grad_norm": 2.250657320022583, "learning_rate": 4.0811601267365345e-06, "loss": 0.9658, "step": 3350 }, { "epoch": 0.04088820640317843, "grad_norm": 1.846524953842163, "learning_rate": 4.087253229344382e-06, "loss": 1.0022, "step": 3355 }, { "epoch": 0.04094914262732624, "grad_norm": 2.083474636077881, "learning_rate": 4.09334633195223e-06, "loss": 0.9523, "step": 3360 }, { "epoch": 0.04101007885147405, "grad_norm": 1.8926117420196533, "learning_rate": 4.099439434560078e-06, "loss": 1.0175, "step": 3365 }, { "epoch": 0.04107101507562185, "grad_norm": 2.7333853244781494, "learning_rate": 4.1055325371679265e-06, "loss": 0.8831, "step": 3370 }, { "epoch": 0.04113195129976966, "grad_norm": 2.1762001514434814, "learning_rate": 4.111625639775774e-06, "loss": 1.009, "step": 3375 }, { "epoch": 0.04119288752391747, "grad_norm": 2.217297077178955, "learning_rate": 4.117718742383622e-06, "loss": 0.9852, "step": 3380 }, { "epoch": 0.04125382374806527, "grad_norm": 2.0643842220306396, "learning_rate": 4.12381184499147e-06, "loss": 0.9552, "step": 3385 }, { "epoch": 0.04131475997221308, "grad_norm": 2.2478179931640625, "learning_rate": 4.129904947599318e-06, "loss": 0.9568, "step": 3390 }, { "epoch": 0.04137569619636089, "grad_norm": 1.9871281385421753, "learning_rate": 4.135998050207166e-06, "loss": 0.9889, "step": 3395 }, { "epoch": 0.0414366324205087, "grad_norm": 1.9624282121658325, "learning_rate": 4.142091152815014e-06, "loss": 1.0173, "step": 3400 }, { "epoch": 0.0414975686446565, "grad_norm": 2.0433740615844727, "learning_rate": 4.148184255422862e-06, "loss": 0.9847, "step": 3405 }, { "epoch": 0.04155850486880431, "grad_norm": 2.38912296295166, "learning_rate": 4.15427735803071e-06, "loss": 0.9659, "step": 3410 }, { "epoch": 0.04161944109295212, "grad_norm": 2.124469518661499, "learning_rate": 4.160370460638558e-06, "loss": 0.9167, "step": 3415 }, { "epoch": 0.04168037731709992, "grad_norm": 1.910997748374939, "learning_rate": 4.166463563246406e-06, "loss": 0.9825, "step": 3420 }, { "epoch": 0.04174131354124773, "grad_norm": 2.1323726177215576, "learning_rate": 4.172556665854253e-06, "loss": 1.0176, "step": 3425 }, { "epoch": 0.04180224976539554, "grad_norm": 2.076981544494629, "learning_rate": 4.178649768462102e-06, "loss": 0.9144, "step": 3430 }, { "epoch": 0.04186318598954335, "grad_norm": 2.431020498275757, "learning_rate": 4.184742871069949e-06, "loss": 0.9761, "step": 3435 }, { "epoch": 0.04192412221369115, "grad_norm": 1.894600510597229, "learning_rate": 4.190835973677797e-06, "loss": 1.0336, "step": 3440 }, { "epoch": 0.04198505843783896, "grad_norm": 2.1999924182891846, "learning_rate": 4.196929076285645e-06, "loss": 1.0227, "step": 3445 }, { "epoch": 0.04204599466198677, "grad_norm": 2.0287058353424072, "learning_rate": 4.203022178893493e-06, "loss": 0.9726, "step": 3450 }, { "epoch": 0.04210693088613457, "grad_norm": 1.9648072719573975, "learning_rate": 4.209115281501341e-06, "loss": 1.0169, "step": 3455 }, { "epoch": 0.04216786711028238, "grad_norm": 1.8318368196487427, "learning_rate": 4.215208384109189e-06, "loss": 1.0408, "step": 3460 }, { "epoch": 0.04222880333443019, "grad_norm": 2.2250726222991943, "learning_rate": 4.2213014867170365e-06, "loss": 0.9719, "step": 3465 }, { "epoch": 0.04228973955857799, "grad_norm": 1.9464809894561768, "learning_rate": 4.227394589324885e-06, "loss": 1.0051, "step": 3470 }, { "epoch": 0.0423506757827258, "grad_norm": 2.2316622734069824, "learning_rate": 4.233487691932732e-06, "loss": 0.9957, "step": 3475 }, { "epoch": 0.04241161200687361, "grad_norm": 1.877394437789917, "learning_rate": 4.23958079454058e-06, "loss": 0.8847, "step": 3480 }, { "epoch": 0.042472548231021416, "grad_norm": 2.7246124744415283, "learning_rate": 4.2456738971484285e-06, "loss": 0.9448, "step": 3485 }, { "epoch": 0.04253348445516922, "grad_norm": 2.19623064994812, "learning_rate": 4.251766999756276e-06, "loss": 0.9346, "step": 3490 }, { "epoch": 0.04259442067931703, "grad_norm": 2.29207444190979, "learning_rate": 4.257860102364124e-06, "loss": 0.924, "step": 3495 }, { "epoch": 0.042655356903464836, "grad_norm": 1.9689520597457886, "learning_rate": 4.263953204971972e-06, "loss": 0.9061, "step": 3500 }, { "epoch": 0.04271629312761264, "grad_norm": 2.6058974266052246, "learning_rate": 4.27004630757982e-06, "loss": 1.0667, "step": 3505 }, { "epoch": 0.04277722935176045, "grad_norm": 2.1562438011169434, "learning_rate": 4.276139410187668e-06, "loss": 1.0472, "step": 3510 }, { "epoch": 0.042838165575908256, "grad_norm": 2.469334840774536, "learning_rate": 4.282232512795515e-06, "loss": 0.9644, "step": 3515 }, { "epoch": 0.04289910180005606, "grad_norm": 2.5326170921325684, "learning_rate": 4.288325615403363e-06, "loss": 0.9724, "step": 3520 }, { "epoch": 0.04296003802420387, "grad_norm": 2.5040512084960938, "learning_rate": 4.294418718011212e-06, "loss": 0.9798, "step": 3525 }, { "epoch": 0.043020974248351676, "grad_norm": 1.927352786064148, "learning_rate": 4.300511820619059e-06, "loss": 0.9838, "step": 3530 }, { "epoch": 0.043081910472499485, "grad_norm": 2.0768890380859375, "learning_rate": 4.306604923226907e-06, "loss": 0.9776, "step": 3535 }, { "epoch": 0.04314284669664729, "grad_norm": 1.966644048690796, "learning_rate": 4.312698025834755e-06, "loss": 0.9835, "step": 3540 }, { "epoch": 0.043203782920795096, "grad_norm": 1.912344217300415, "learning_rate": 4.3187911284426036e-06, "loss": 1.0465, "step": 3545 }, { "epoch": 0.043264719144942905, "grad_norm": 2.259838581085205, "learning_rate": 4.324884231050451e-06, "loss": 0.9087, "step": 3550 }, { "epoch": 0.04332565536909071, "grad_norm": 2.276555299758911, "learning_rate": 4.330977333658299e-06, "loss": 0.9103, "step": 3555 }, { "epoch": 0.043386591593238516, "grad_norm": 2.0731046199798584, "learning_rate": 4.337070436266147e-06, "loss": 0.9293, "step": 3560 }, { "epoch": 0.043447527817386325, "grad_norm": 2.106032371520996, "learning_rate": 4.343163538873995e-06, "loss": 1.0092, "step": 3565 }, { "epoch": 0.04350846404153413, "grad_norm": 3.2258543968200684, "learning_rate": 4.349256641481843e-06, "loss": 1.0152, "step": 3570 }, { "epoch": 0.043569400265681936, "grad_norm": 1.9558651447296143, "learning_rate": 4.355349744089691e-06, "loss": 0.9617, "step": 3575 }, { "epoch": 0.043630336489829745, "grad_norm": 1.96135413646698, "learning_rate": 4.361442846697539e-06, "loss": 0.912, "step": 3580 }, { "epoch": 0.043691272713977554, "grad_norm": 2.1873459815979004, "learning_rate": 4.367535949305387e-06, "loss": 1.0109, "step": 3585 }, { "epoch": 0.043752208938125356, "grad_norm": 2.223349094390869, "learning_rate": 4.373629051913235e-06, "loss": 0.9787, "step": 3590 }, { "epoch": 0.043813145162273165, "grad_norm": 2.3961894512176514, "learning_rate": 4.379722154521083e-06, "loss": 0.9644, "step": 3595 }, { "epoch": 0.043874081386420974, "grad_norm": 1.8966249227523804, "learning_rate": 4.3858152571289305e-06, "loss": 1.0109, "step": 3600 }, { "epoch": 0.043935017610568776, "grad_norm": 2.403662919998169, "learning_rate": 4.391908359736779e-06, "loss": 0.9835, "step": 3605 }, { "epoch": 0.043995953834716585, "grad_norm": 2.0127458572387695, "learning_rate": 4.398001462344626e-06, "loss": 0.979, "step": 3610 }, { "epoch": 0.044056890058864394, "grad_norm": 2.38726544380188, "learning_rate": 4.404094564952474e-06, "loss": 0.9233, "step": 3615 }, { "epoch": 0.0441178262830122, "grad_norm": 1.7328885793685913, "learning_rate": 4.4101876675603224e-06, "loss": 0.9488, "step": 3620 }, { "epoch": 0.044178762507160005, "grad_norm": 2.163496971130371, "learning_rate": 4.41628077016817e-06, "loss": 1.0006, "step": 3625 }, { "epoch": 0.044239698731307814, "grad_norm": 2.4504263401031494, "learning_rate": 4.422373872776018e-06, "loss": 0.9403, "step": 3630 }, { "epoch": 0.04430063495545562, "grad_norm": 2.044313669204712, "learning_rate": 4.428466975383866e-06, "loss": 1.0396, "step": 3635 }, { "epoch": 0.044361571179603425, "grad_norm": 2.8100461959838867, "learning_rate": 4.4345600779917136e-06, "loss": 1.0157, "step": 3640 }, { "epoch": 0.044422507403751234, "grad_norm": 2.241595506668091, "learning_rate": 4.440653180599562e-06, "loss": 0.9875, "step": 3645 }, { "epoch": 0.04448344362789904, "grad_norm": 1.9177073240280151, "learning_rate": 4.446746283207409e-06, "loss": 1.003, "step": 3650 }, { "epoch": 0.044544379852046845, "grad_norm": 2.2800934314727783, "learning_rate": 4.452839385815257e-06, "loss": 1.0169, "step": 3655 }, { "epoch": 0.044605316076194654, "grad_norm": 2.1621227264404297, "learning_rate": 4.4589324884231056e-06, "loss": 1.0043, "step": 3660 }, { "epoch": 0.04466625230034246, "grad_norm": 1.9367449283599854, "learning_rate": 4.465025591030953e-06, "loss": 0.8815, "step": 3665 }, { "epoch": 0.04472718852449027, "grad_norm": 2.401663064956665, "learning_rate": 4.471118693638801e-06, "loss": 1.0228, "step": 3670 }, { "epoch": 0.044788124748638074, "grad_norm": 2.771209239959717, "learning_rate": 4.477211796246649e-06, "loss": 0.9219, "step": 3675 }, { "epoch": 0.04484906097278588, "grad_norm": 2.5835812091827393, "learning_rate": 4.483304898854497e-06, "loss": 0.9297, "step": 3680 }, { "epoch": 0.04490999719693369, "grad_norm": 1.9277350902557373, "learning_rate": 4.489398001462345e-06, "loss": 0.9754, "step": 3685 }, { "epoch": 0.044970933421081494, "grad_norm": 2.1488263607025146, "learning_rate": 4.495491104070193e-06, "loss": 1.0429, "step": 3690 }, { "epoch": 0.0450318696452293, "grad_norm": 2.518409490585327, "learning_rate": 4.5015842066780405e-06, "loss": 0.9306, "step": 3695 }, { "epoch": 0.04509280586937711, "grad_norm": 2.8369314670562744, "learning_rate": 4.507677309285889e-06, "loss": 0.9142, "step": 3700 }, { "epoch": 0.045153742093524914, "grad_norm": 2.135213613510132, "learning_rate": 4.513770411893736e-06, "loss": 1.0042, "step": 3705 }, { "epoch": 0.04521467831767272, "grad_norm": 2.122784376144409, "learning_rate": 4.519863514501584e-06, "loss": 0.9844, "step": 3710 }, { "epoch": 0.04527561454182053, "grad_norm": 2.018807888031006, "learning_rate": 4.5259566171094324e-06, "loss": 1.0126, "step": 3715 }, { "epoch": 0.04533655076596834, "grad_norm": 1.7263844013214111, "learning_rate": 4.532049719717281e-06, "loss": 0.9245, "step": 3720 }, { "epoch": 0.04539748699011614, "grad_norm": 2.25146746635437, "learning_rate": 4.538142822325128e-06, "loss": 0.9767, "step": 3725 }, { "epoch": 0.04545842321426395, "grad_norm": 2.2625250816345215, "learning_rate": 4.544235924932976e-06, "loss": 1.0543, "step": 3730 }, { "epoch": 0.04551935943841176, "grad_norm": 2.1152076721191406, "learning_rate": 4.550329027540824e-06, "loss": 0.9159, "step": 3735 }, { "epoch": 0.04558029566255956, "grad_norm": 2.7101073265075684, "learning_rate": 4.556422130148672e-06, "loss": 0.9897, "step": 3740 }, { "epoch": 0.04564123188670737, "grad_norm": 2.2513201236724854, "learning_rate": 4.56251523275652e-06, "loss": 0.9249, "step": 3745 }, { "epoch": 0.04570216811085518, "grad_norm": 2.269531726837158, "learning_rate": 4.568608335364368e-06, "loss": 0.9671, "step": 3750 }, { "epoch": 0.04576310433500298, "grad_norm": 2.6381139755249023, "learning_rate": 4.574701437972216e-06, "loss": 1.0489, "step": 3755 }, { "epoch": 0.04582404055915079, "grad_norm": 1.9028347730636597, "learning_rate": 4.580794540580064e-06, "loss": 1.0122, "step": 3760 }, { "epoch": 0.0458849767832986, "grad_norm": 2.0857086181640625, "learning_rate": 4.586887643187912e-06, "loss": 0.9497, "step": 3765 }, { "epoch": 0.04594591300744641, "grad_norm": 1.900308609008789, "learning_rate": 4.59298074579576e-06, "loss": 0.9765, "step": 3770 }, { "epoch": 0.04600684923159421, "grad_norm": 1.9422956705093384, "learning_rate": 4.5990738484036075e-06, "loss": 0.9294, "step": 3775 }, { "epoch": 0.04606778545574202, "grad_norm": 1.7919955253601074, "learning_rate": 4.605166951011456e-06, "loss": 0.9716, "step": 3780 }, { "epoch": 0.04612872167988983, "grad_norm": 2.133025884628296, "learning_rate": 4.611260053619303e-06, "loss": 0.9955, "step": 3785 }, { "epoch": 0.04618965790403763, "grad_norm": 2.0849709510803223, "learning_rate": 4.617353156227151e-06, "loss": 0.9688, "step": 3790 }, { "epoch": 0.04625059412818544, "grad_norm": 2.2371034622192383, "learning_rate": 4.6234462588349995e-06, "loss": 1.0, "step": 3795 }, { "epoch": 0.04631153035233325, "grad_norm": 2.032249927520752, "learning_rate": 4.629539361442847e-06, "loss": 1.0276, "step": 3800 }, { "epoch": 0.04637246657648105, "grad_norm": 2.2195770740509033, "learning_rate": 4.635632464050695e-06, "loss": 1.0316, "step": 3805 }, { "epoch": 0.04643340280062886, "grad_norm": 2.212566375732422, "learning_rate": 4.641725566658543e-06, "loss": 0.9243, "step": 3810 }, { "epoch": 0.04649433902477667, "grad_norm": 2.024203062057495, "learning_rate": 4.647818669266391e-06, "loss": 1.0354, "step": 3815 }, { "epoch": 0.04655527524892448, "grad_norm": 2.261993646621704, "learning_rate": 4.653911771874239e-06, "loss": 0.9751, "step": 3820 }, { "epoch": 0.04661621147307228, "grad_norm": 2.015202283859253, "learning_rate": 4.660004874482086e-06, "loss": 0.9916, "step": 3825 }, { "epoch": 0.04667714769722009, "grad_norm": 2.9858975410461426, "learning_rate": 4.6660979770899344e-06, "loss": 0.9655, "step": 3830 }, { "epoch": 0.0467380839213679, "grad_norm": 2.3230295181274414, "learning_rate": 4.672191079697783e-06, "loss": 1.0273, "step": 3835 }, { "epoch": 0.0467990201455157, "grad_norm": 2.1919026374816895, "learning_rate": 4.67828418230563e-06, "loss": 0.9697, "step": 3840 }, { "epoch": 0.04685995636966351, "grad_norm": 1.9919596910476685, "learning_rate": 4.684377284913478e-06, "loss": 0.9727, "step": 3845 }, { "epoch": 0.04692089259381132, "grad_norm": 2.284485101699829, "learning_rate": 4.690470387521326e-06, "loss": 0.9407, "step": 3850 }, { "epoch": 0.04698182881795913, "grad_norm": 1.8306994438171387, "learning_rate": 4.696563490129174e-06, "loss": 0.91, "step": 3855 }, { "epoch": 0.04704276504210693, "grad_norm": 2.0494189262390137, "learning_rate": 4.702656592737022e-06, "loss": 1.0041, "step": 3860 }, { "epoch": 0.04710370126625474, "grad_norm": 2.29205584526062, "learning_rate": 4.70874969534487e-06, "loss": 0.9387, "step": 3865 }, { "epoch": 0.04716463749040255, "grad_norm": 2.3026747703552246, "learning_rate": 4.7148427979527175e-06, "loss": 0.8742, "step": 3870 }, { "epoch": 0.04722557371455035, "grad_norm": 2.151265859603882, "learning_rate": 4.720935900560566e-06, "loss": 0.9874, "step": 3875 }, { "epoch": 0.04728650993869816, "grad_norm": 2.223660707473755, "learning_rate": 4.727029003168413e-06, "loss": 1.0072, "step": 3880 }, { "epoch": 0.04734744616284597, "grad_norm": 2.352647066116333, "learning_rate": 4.733122105776261e-06, "loss": 0.959, "step": 3885 }, { "epoch": 0.04740838238699377, "grad_norm": 2.2187283039093018, "learning_rate": 4.7392152083841095e-06, "loss": 0.9666, "step": 3890 }, { "epoch": 0.04746931861114158, "grad_norm": 2.354466676712036, "learning_rate": 4.745308310991958e-06, "loss": 0.9968, "step": 3895 }, { "epoch": 0.04753025483528939, "grad_norm": 2.2447938919067383, "learning_rate": 4.751401413599805e-06, "loss": 0.9025, "step": 3900 }, { "epoch": 0.047591191059437196, "grad_norm": 2.1837375164031982, "learning_rate": 4.757494516207653e-06, "loss": 0.9726, "step": 3905 }, { "epoch": 0.047652127283585, "grad_norm": 2.254817485809326, "learning_rate": 4.7635876188155015e-06, "loss": 0.974, "step": 3910 }, { "epoch": 0.04771306350773281, "grad_norm": 1.9379876852035522, "learning_rate": 4.769680721423349e-06, "loss": 0.9247, "step": 3915 }, { "epoch": 0.047773999731880616, "grad_norm": 1.7604204416275024, "learning_rate": 4.775773824031197e-06, "loss": 1.0094, "step": 3920 }, { "epoch": 0.04783493595602842, "grad_norm": 2.019530773162842, "learning_rate": 4.781866926639045e-06, "loss": 0.9887, "step": 3925 }, { "epoch": 0.04789587218017623, "grad_norm": 2.061871290206909, "learning_rate": 4.7879600292468935e-06, "loss": 1.0037, "step": 3930 }, { "epoch": 0.047956808404324036, "grad_norm": 2.15039324760437, "learning_rate": 4.794053131854741e-06, "loss": 0.9212, "step": 3935 }, { "epoch": 0.04801774462847184, "grad_norm": 2.0310165882110596, "learning_rate": 4.800146234462589e-06, "loss": 0.9699, "step": 3940 }, { "epoch": 0.04807868085261965, "grad_norm": 2.0253446102142334, "learning_rate": 4.806239337070437e-06, "loss": 0.9863, "step": 3945 }, { "epoch": 0.048139617076767456, "grad_norm": 2.1510236263275146, "learning_rate": 4.812332439678285e-06, "loss": 0.9193, "step": 3950 }, { "epoch": 0.048200553300915265, "grad_norm": 2.5437049865722656, "learning_rate": 4.818425542286133e-06, "loss": 0.994, "step": 3955 }, { "epoch": 0.04826148952506307, "grad_norm": 2.4336752891540527, "learning_rate": 4.82451864489398e-06, "loss": 0.8854, "step": 3960 }, { "epoch": 0.048322425749210876, "grad_norm": 2.583707809448242, "learning_rate": 4.830611747501828e-06, "loss": 1.0104, "step": 3965 }, { "epoch": 0.048383361973358685, "grad_norm": 1.809698462486267, "learning_rate": 4.836704850109677e-06, "loss": 0.945, "step": 3970 }, { "epoch": 0.04844429819750649, "grad_norm": 2.1256301403045654, "learning_rate": 4.842797952717524e-06, "loss": 1.0326, "step": 3975 }, { "epoch": 0.048505234421654296, "grad_norm": 2.261321783065796, "learning_rate": 4.848891055325372e-06, "loss": 1.0022, "step": 3980 }, { "epoch": 0.048566170645802105, "grad_norm": 2.3609023094177246, "learning_rate": 4.85498415793322e-06, "loss": 1.0377, "step": 3985 }, { "epoch": 0.04862710686994991, "grad_norm": 2.1425156593322754, "learning_rate": 4.861077260541068e-06, "loss": 0.9755, "step": 3990 }, { "epoch": 0.048688043094097716, "grad_norm": 2.0754098892211914, "learning_rate": 4.867170363148916e-06, "loss": 1.0059, "step": 3995 }, { "epoch": 0.048748979318245525, "grad_norm": 2.200669765472412, "learning_rate": 4.873263465756764e-06, "loss": 0.9178, "step": 4000 }, { "epoch": 0.048809915542393334, "grad_norm": 1.9324270486831665, "learning_rate": 4.8793565683646115e-06, "loss": 0.9732, "step": 4005 }, { "epoch": 0.048870851766541136, "grad_norm": 1.705198049545288, "learning_rate": 4.88544967097246e-06, "loss": 0.9896, "step": 4010 }, { "epoch": 0.048931787990688945, "grad_norm": 2.018338203430176, "learning_rate": 4.891542773580307e-06, "loss": 0.9317, "step": 4015 }, { "epoch": 0.048992724214836754, "grad_norm": 2.3671696186065674, "learning_rate": 4.897635876188155e-06, "loss": 0.9941, "step": 4020 }, { "epoch": 0.049053660438984556, "grad_norm": 2.0482139587402344, "learning_rate": 4.9037289787960035e-06, "loss": 0.9635, "step": 4025 }, { "epoch": 0.049114596663132365, "grad_norm": 2.215291976928711, "learning_rate": 4.909822081403851e-06, "loss": 0.9407, "step": 4030 }, { "epoch": 0.049175532887280174, "grad_norm": 2.0441389083862305, "learning_rate": 4.915915184011699e-06, "loss": 1.0132, "step": 4035 }, { "epoch": 0.04923646911142798, "grad_norm": 2.37117075920105, "learning_rate": 4.922008286619547e-06, "loss": 0.9697, "step": 4040 }, { "epoch": 0.049297405335575785, "grad_norm": 2.0153496265411377, "learning_rate": 4.928101389227395e-06, "loss": 0.973, "step": 4045 }, { "epoch": 0.049358341559723594, "grad_norm": 2.095165252685547, "learning_rate": 4.934194491835243e-06, "loss": 0.9671, "step": 4050 }, { "epoch": 0.0494192777838714, "grad_norm": 2.5227651596069336, "learning_rate": 4.94028759444309e-06, "loss": 1.0154, "step": 4055 }, { "epoch": 0.049480214008019205, "grad_norm": 2.244781732559204, "learning_rate": 4.946380697050938e-06, "loss": 0.9692, "step": 4060 }, { "epoch": 0.049541150232167014, "grad_norm": 2.4527037143707275, "learning_rate": 4.952473799658787e-06, "loss": 1.0548, "step": 4065 }, { "epoch": 0.04960208645631482, "grad_norm": 1.9964945316314697, "learning_rate": 4.958566902266635e-06, "loss": 0.957, "step": 4070 }, { "epoch": 0.049663022680462625, "grad_norm": 2.497344493865967, "learning_rate": 4.964660004874482e-06, "loss": 0.9656, "step": 4075 }, { "epoch": 0.049723958904610434, "grad_norm": 2.3613436222076416, "learning_rate": 4.97075310748233e-06, "loss": 0.8905, "step": 4080 }, { "epoch": 0.04978489512875824, "grad_norm": 2.3999245166778564, "learning_rate": 4.976846210090179e-06, "loss": 0.9765, "step": 4085 }, { "epoch": 0.04984583135290605, "grad_norm": 2.2095046043395996, "learning_rate": 4.982939312698026e-06, "loss": 0.9355, "step": 4090 }, { "epoch": 0.049906767577053854, "grad_norm": 2.303611993789673, "learning_rate": 4.989032415305874e-06, "loss": 0.9993, "step": 4095 }, { "epoch": 0.04996770380120166, "grad_norm": 2.297487258911133, "learning_rate": 4.995125517913722e-06, "loss": 1.0078, "step": 4100 }, { "epoch": 0.05002864002534947, "grad_norm": 2.313302516937256, "learning_rate": 4.999935856318153e-06, "loss": 0.9463, "step": 4105 }, { "epoch": 0.050089576249497274, "grad_norm": 2.705780029296875, "learning_rate": 4.999615137908916e-06, "loss": 0.9439, "step": 4110 }, { "epoch": 0.05015051247364508, "grad_norm": 1.9366899728775024, "learning_rate": 4.99929441949968e-06, "loss": 0.8571, "step": 4115 }, { "epoch": 0.05021144869779289, "grad_norm": 2.446592092514038, "learning_rate": 4.998973701090443e-06, "loss": 0.9093, "step": 4120 }, { "epoch": 0.050272384921940694, "grad_norm": 1.979678988456726, "learning_rate": 4.998652982681207e-06, "loss": 0.9651, "step": 4125 }, { "epoch": 0.0503333211460885, "grad_norm": 2.1116175651550293, "learning_rate": 4.99833226427197e-06, "loss": 0.9209, "step": 4130 }, { "epoch": 0.05039425737023631, "grad_norm": 1.9690122604370117, "learning_rate": 4.998011545862733e-06, "loss": 0.9468, "step": 4135 }, { "epoch": 0.05045519359438412, "grad_norm": 3.420025110244751, "learning_rate": 4.997690827453497e-06, "loss": 1.0099, "step": 4140 }, { "epoch": 0.05051612981853192, "grad_norm": 1.9421954154968262, "learning_rate": 4.99737010904426e-06, "loss": 0.9176, "step": 4145 }, { "epoch": 0.05057706604267973, "grad_norm": 1.9013793468475342, "learning_rate": 4.997049390635023e-06, "loss": 0.9687, "step": 4150 }, { "epoch": 0.05063800226682754, "grad_norm": 1.9905979633331299, "learning_rate": 4.9967286722257866e-06, "loss": 0.9038, "step": 4155 }, { "epoch": 0.05069893849097534, "grad_norm": 2.1903820037841797, "learning_rate": 4.99640795381655e-06, "loss": 1.0087, "step": 4160 }, { "epoch": 0.05075987471512315, "grad_norm": 2.276634454727173, "learning_rate": 4.996087235407313e-06, "loss": 1.0403, "step": 4165 }, { "epoch": 0.05082081093927096, "grad_norm": 2.0827319622039795, "learning_rate": 4.9957665169980765e-06, "loss": 0.9199, "step": 4170 }, { "epoch": 0.05088174716341876, "grad_norm": 2.0429847240448, "learning_rate": 4.9954457985888395e-06, "loss": 0.9646, "step": 4175 }, { "epoch": 0.05094268338756657, "grad_norm": 2.111738443374634, "learning_rate": 4.9951250801796025e-06, "loss": 0.963, "step": 4180 }, { "epoch": 0.05100361961171438, "grad_norm": 1.8303146362304688, "learning_rate": 4.994804361770366e-06, "loss": 0.9416, "step": 4185 }, { "epoch": 0.05106455583586219, "grad_norm": 2.679424285888672, "learning_rate": 4.994483643361129e-06, "loss": 0.8802, "step": 4190 }, { "epoch": 0.05112549206000999, "grad_norm": 2.5872533321380615, "learning_rate": 4.994162924951892e-06, "loss": 1.0161, "step": 4195 }, { "epoch": 0.0511864282841578, "grad_norm": 2.1106269359588623, "learning_rate": 4.993842206542656e-06, "loss": 0.9265, "step": 4200 }, { "epoch": 0.05124736450830561, "grad_norm": 2.5548019409179688, "learning_rate": 4.993521488133419e-06, "loss": 1.015, "step": 4205 }, { "epoch": 0.05130830073245341, "grad_norm": 2.2539122104644775, "learning_rate": 4.993200769724182e-06, "loss": 0.9974, "step": 4210 }, { "epoch": 0.05136923695660122, "grad_norm": 2.2515270709991455, "learning_rate": 4.992880051314946e-06, "loss": 0.9696, "step": 4215 }, { "epoch": 0.05143017318074903, "grad_norm": 2.0148675441741943, "learning_rate": 4.992559332905709e-06, "loss": 0.988, "step": 4220 }, { "epoch": 0.05149110940489683, "grad_norm": 2.039917469024658, "learning_rate": 4.992238614496472e-06, "loss": 0.9527, "step": 4225 }, { "epoch": 0.05155204562904464, "grad_norm": 2.2999954223632812, "learning_rate": 4.991917896087236e-06, "loss": 1.003, "step": 4230 }, { "epoch": 0.05161298185319245, "grad_norm": 1.8977993726730347, "learning_rate": 4.991597177677999e-06, "loss": 0.9616, "step": 4235 }, { "epoch": 0.05167391807734026, "grad_norm": 1.7425649166107178, "learning_rate": 4.991276459268762e-06, "loss": 0.953, "step": 4240 }, { "epoch": 0.05173485430148806, "grad_norm": 1.960202693939209, "learning_rate": 4.990955740859525e-06, "loss": 0.9473, "step": 4245 }, { "epoch": 0.05179579052563587, "grad_norm": 2.1475565433502197, "learning_rate": 4.990635022450289e-06, "loss": 1.0006, "step": 4250 }, { "epoch": 0.05185672674978368, "grad_norm": 1.9616001844406128, "learning_rate": 4.990314304041052e-06, "loss": 0.9544, "step": 4255 }, { "epoch": 0.05191766297393148, "grad_norm": 2.1962080001831055, "learning_rate": 4.989993585631816e-06, "loss": 0.9982, "step": 4260 }, { "epoch": 0.05197859919807929, "grad_norm": 2.016749382019043, "learning_rate": 4.989672867222579e-06, "loss": 0.9389, "step": 4265 }, { "epoch": 0.0520395354222271, "grad_norm": 2.0137815475463867, "learning_rate": 4.989352148813342e-06, "loss": 0.9072, "step": 4270 }, { "epoch": 0.05210047164637491, "grad_norm": 1.5768598318099976, "learning_rate": 4.989031430404106e-06, "loss": 0.9018, "step": 4275 }, { "epoch": 0.05216140787052271, "grad_norm": 2.435187816619873, "learning_rate": 4.988710711994869e-06, "loss": 0.9488, "step": 4280 }, { "epoch": 0.05222234409467052, "grad_norm": 2.171186923980713, "learning_rate": 4.988389993585633e-06, "loss": 0.9546, "step": 4285 }, { "epoch": 0.05228328031881833, "grad_norm": 2.3618438243865967, "learning_rate": 4.988069275176396e-06, "loss": 0.8602, "step": 4290 }, { "epoch": 0.05234421654296613, "grad_norm": 2.5392568111419678, "learning_rate": 4.987748556767159e-06, "loss": 0.938, "step": 4295 }, { "epoch": 0.05240515276711394, "grad_norm": 2.2699191570281982, "learning_rate": 4.9874278383579225e-06, "loss": 0.9711, "step": 4300 }, { "epoch": 0.05246608899126175, "grad_norm": 1.960006833076477, "learning_rate": 4.9871071199486855e-06, "loss": 0.9505, "step": 4305 }, { "epoch": 0.05252702521540955, "grad_norm": 2.201524496078491, "learning_rate": 4.986786401539449e-06, "loss": 0.9754, "step": 4310 }, { "epoch": 0.05258796143955736, "grad_norm": 1.9891637563705444, "learning_rate": 4.986465683130212e-06, "loss": 0.9233, "step": 4315 }, { "epoch": 0.05264889766370517, "grad_norm": 2.286076068878174, "learning_rate": 4.9861449647209754e-06, "loss": 0.9597, "step": 4320 }, { "epoch": 0.052709833887852976, "grad_norm": 2.4167962074279785, "learning_rate": 4.9858242463117385e-06, "loss": 0.9503, "step": 4325 }, { "epoch": 0.05277077011200078, "grad_norm": 2.1881682872772217, "learning_rate": 4.985503527902502e-06, "loss": 0.9877, "step": 4330 }, { "epoch": 0.05283170633614859, "grad_norm": 2.182337522506714, "learning_rate": 4.985182809493265e-06, "loss": 0.9773, "step": 4335 }, { "epoch": 0.052892642560296396, "grad_norm": 2.031158447265625, "learning_rate": 4.984862091084028e-06, "loss": 0.9265, "step": 4340 }, { "epoch": 0.0529535787844442, "grad_norm": 2.1011457443237305, "learning_rate": 4.984541372674792e-06, "loss": 0.9729, "step": 4345 }, { "epoch": 0.05301451500859201, "grad_norm": 2.688577651977539, "learning_rate": 4.984220654265555e-06, "loss": 0.9825, "step": 4350 }, { "epoch": 0.053075451232739816, "grad_norm": 2.1533782482147217, "learning_rate": 4.983899935856318e-06, "loss": 0.9486, "step": 4355 }, { "epoch": 0.05313638745688762, "grad_norm": 2.378235340118408, "learning_rate": 4.983579217447082e-06, "loss": 0.9923, "step": 4360 }, { "epoch": 0.05319732368103543, "grad_norm": 2.0234909057617188, "learning_rate": 4.983258499037845e-06, "loss": 1.0352, "step": 4365 }, { "epoch": 0.053258259905183236, "grad_norm": 2.058544635772705, "learning_rate": 4.982937780628608e-06, "loss": 0.944, "step": 4370 }, { "epoch": 0.053319196129331045, "grad_norm": 1.9312150478363037, "learning_rate": 4.982617062219372e-06, "loss": 0.9533, "step": 4375 }, { "epoch": 0.05338013235347885, "grad_norm": 2.2067654132843018, "learning_rate": 4.982296343810135e-06, "loss": 0.942, "step": 4380 }, { "epoch": 0.053441068577626656, "grad_norm": 1.8108662366867065, "learning_rate": 4.981975625400898e-06, "loss": 0.9477, "step": 4385 }, { "epoch": 0.053502004801774465, "grad_norm": 2.2960264682769775, "learning_rate": 4.981654906991662e-06, "loss": 0.9276, "step": 4390 }, { "epoch": 0.05356294102592227, "grad_norm": 2.143806219100952, "learning_rate": 4.981334188582425e-06, "loss": 0.9757, "step": 4395 }, { "epoch": 0.053623877250070076, "grad_norm": 2.0813865661621094, "learning_rate": 4.981013470173188e-06, "loss": 0.9444, "step": 4400 }, { "epoch": 0.053684813474217885, "grad_norm": 2.782796621322632, "learning_rate": 4.980692751763952e-06, "loss": 0.9141, "step": 4405 }, { "epoch": 0.05374574969836569, "grad_norm": 2.232351779937744, "learning_rate": 4.980372033354715e-06, "loss": 0.9534, "step": 4410 }, { "epoch": 0.053806685922513496, "grad_norm": 2.2109196186065674, "learning_rate": 4.980051314945478e-06, "loss": 0.9381, "step": 4415 }, { "epoch": 0.053867622146661305, "grad_norm": 1.967576503753662, "learning_rate": 4.979730596536242e-06, "loss": 0.9561, "step": 4420 }, { "epoch": 0.053928558370809114, "grad_norm": 2.0647618770599365, "learning_rate": 4.979409878127005e-06, "loss": 0.9287, "step": 4425 }, { "epoch": 0.053989494594956916, "grad_norm": 2.1984169483184814, "learning_rate": 4.979089159717769e-06, "loss": 0.9707, "step": 4430 }, { "epoch": 0.054050430819104725, "grad_norm": 2.119166851043701, "learning_rate": 4.978768441308532e-06, "loss": 0.9169, "step": 4435 }, { "epoch": 0.054111367043252534, "grad_norm": 2.392920970916748, "learning_rate": 4.978447722899295e-06, "loss": 0.9392, "step": 4440 }, { "epoch": 0.054172303267400336, "grad_norm": 2.01771879196167, "learning_rate": 4.9781270044900585e-06, "loss": 0.9505, "step": 4445 }, { "epoch": 0.054233239491548145, "grad_norm": 2.199697732925415, "learning_rate": 4.9778062860808215e-06, "loss": 0.9137, "step": 4450 }, { "epoch": 0.054294175715695954, "grad_norm": 1.956658959388733, "learning_rate": 4.977485567671585e-06, "loss": 0.9345, "step": 4455 }, { "epoch": 0.05435511193984376, "grad_norm": 2.4196255207061768, "learning_rate": 4.977164849262348e-06, "loss": 0.9657, "step": 4460 }, { "epoch": 0.054416048163991565, "grad_norm": 2.2348363399505615, "learning_rate": 4.976844130853111e-06, "loss": 1.0461, "step": 4465 }, { "epoch": 0.054476984388139374, "grad_norm": 1.9359732866287231, "learning_rate": 4.976523412443875e-06, "loss": 0.9457, "step": 4470 }, { "epoch": 0.05453792061228718, "grad_norm": 1.844224214553833, "learning_rate": 4.976202694034638e-06, "loss": 0.9571, "step": 4475 }, { "epoch": 0.054598856836434985, "grad_norm": 2.1093294620513916, "learning_rate": 4.975881975625401e-06, "loss": 0.8942, "step": 4480 }, { "epoch": 0.054659793060582794, "grad_norm": 2.0967016220092773, "learning_rate": 4.975561257216164e-06, "loss": 0.9645, "step": 4485 }, { "epoch": 0.0547207292847306, "grad_norm": 2.0468149185180664, "learning_rate": 4.975240538806928e-06, "loss": 0.8848, "step": 4490 }, { "epoch": 0.054781665508878405, "grad_norm": 1.8847159147262573, "learning_rate": 4.974919820397691e-06, "loss": 0.9545, "step": 4495 }, { "epoch": 0.054842601733026214, "grad_norm": 2.488862991333008, "learning_rate": 4.974599101988454e-06, "loss": 1.0106, "step": 4500 }, { "epoch": 0.05490353795717402, "grad_norm": 2.3726818561553955, "learning_rate": 4.974278383579218e-06, "loss": 0.9546, "step": 4505 }, { "epoch": 0.05496447418132183, "grad_norm": 1.9580597877502441, "learning_rate": 4.973957665169981e-06, "loss": 1.0194, "step": 4510 }, { "epoch": 0.055025410405469634, "grad_norm": 2.0597589015960693, "learning_rate": 4.973636946760744e-06, "loss": 0.9879, "step": 4515 }, { "epoch": 0.05508634662961744, "grad_norm": 2.116549015045166, "learning_rate": 4.973316228351508e-06, "loss": 0.9127, "step": 4520 }, { "epoch": 0.05514728285376525, "grad_norm": 2.1077561378479004, "learning_rate": 4.972995509942271e-06, "loss": 0.9587, "step": 4525 }, { "epoch": 0.055208219077913054, "grad_norm": 2.350987434387207, "learning_rate": 4.972674791533034e-06, "loss": 0.9192, "step": 4530 }, { "epoch": 0.05526915530206086, "grad_norm": 2.3660855293273926, "learning_rate": 4.972354073123798e-06, "loss": 0.8545, "step": 4535 }, { "epoch": 0.05533009152620867, "grad_norm": 2.4970510005950928, "learning_rate": 4.972033354714561e-06, "loss": 0.9758, "step": 4540 }, { "epoch": 0.055391027750356474, "grad_norm": 2.3673601150512695, "learning_rate": 4.971712636305324e-06, "loss": 1.0075, "step": 4545 }, { "epoch": 0.05545196397450428, "grad_norm": 1.9660429954528809, "learning_rate": 4.971391917896088e-06, "loss": 0.8795, "step": 4550 }, { "epoch": 0.05551290019865209, "grad_norm": 1.9042744636535645, "learning_rate": 4.971071199486851e-06, "loss": 0.9984, "step": 4555 }, { "epoch": 0.0555738364227999, "grad_norm": 2.036038875579834, "learning_rate": 4.970750481077614e-06, "loss": 0.9306, "step": 4560 }, { "epoch": 0.0556347726469477, "grad_norm": 2.0468339920043945, "learning_rate": 4.970429762668378e-06, "loss": 0.9754, "step": 4565 }, { "epoch": 0.05569570887109551, "grad_norm": 2.687669038772583, "learning_rate": 4.970109044259141e-06, "loss": 0.9539, "step": 4570 }, { "epoch": 0.05575664509524332, "grad_norm": 1.9255914688110352, "learning_rate": 4.969788325849904e-06, "loss": 1.0391, "step": 4575 }, { "epoch": 0.05581758131939112, "grad_norm": 2.27289080619812, "learning_rate": 4.9694676074406676e-06, "loss": 0.8771, "step": 4580 }, { "epoch": 0.05587851754353893, "grad_norm": 1.8797038793563843, "learning_rate": 4.9691468890314306e-06, "loss": 0.9751, "step": 4585 }, { "epoch": 0.05593945376768674, "grad_norm": 1.8499515056610107, "learning_rate": 4.9688261706221944e-06, "loss": 0.9625, "step": 4590 }, { "epoch": 0.05600038999183454, "grad_norm": 2.2581887245178223, "learning_rate": 4.9685054522129575e-06, "loss": 0.9531, "step": 4595 }, { "epoch": 0.05606132621598235, "grad_norm": 1.9088228940963745, "learning_rate": 4.968184733803721e-06, "loss": 0.9726, "step": 4600 }, { "epoch": 0.05612226244013016, "grad_norm": 1.9472122192382812, "learning_rate": 4.967864015394484e-06, "loss": 0.9589, "step": 4605 }, { "epoch": 0.05618319866427797, "grad_norm": 2.235375165939331, "learning_rate": 4.967543296985247e-06, "loss": 0.9936, "step": 4610 }, { "epoch": 0.05624413488842577, "grad_norm": 2.094560384750366, "learning_rate": 4.967222578576011e-06, "loss": 0.9467, "step": 4615 }, { "epoch": 0.05630507111257358, "grad_norm": 2.050271511077881, "learning_rate": 4.966901860166774e-06, "loss": 1.0611, "step": 4620 }, { "epoch": 0.05636600733672139, "grad_norm": 1.763024926185608, "learning_rate": 4.966581141757537e-06, "loss": 0.9391, "step": 4625 }, { "epoch": 0.05642694356086919, "grad_norm": 1.8932347297668457, "learning_rate": 4.966260423348301e-06, "loss": 0.9969, "step": 4630 }, { "epoch": 0.056487879785017, "grad_norm": 2.0808145999908447, "learning_rate": 4.965939704939064e-06, "loss": 1.0051, "step": 4635 }, { "epoch": 0.05654881600916481, "grad_norm": 2.4325435161590576, "learning_rate": 4.965618986529827e-06, "loss": 0.9773, "step": 4640 }, { "epoch": 0.05660975223331261, "grad_norm": 2.050163745880127, "learning_rate": 4.965298268120591e-06, "loss": 1.0409, "step": 4645 }, { "epoch": 0.05667068845746042, "grad_norm": 2.2804789543151855, "learning_rate": 4.964977549711354e-06, "loss": 0.9534, "step": 4650 }, { "epoch": 0.05673162468160823, "grad_norm": 2.042606830596924, "learning_rate": 4.964656831302117e-06, "loss": 0.9445, "step": 4655 }, { "epoch": 0.05679256090575604, "grad_norm": 2.1408424377441406, "learning_rate": 4.96433611289288e-06, "loss": 0.922, "step": 4660 }, { "epoch": 0.05685349712990384, "grad_norm": 2.43261981010437, "learning_rate": 4.964015394483644e-06, "loss": 0.9609, "step": 4665 }, { "epoch": 0.05691443335405165, "grad_norm": 1.746370553970337, "learning_rate": 4.963694676074407e-06, "loss": 0.9925, "step": 4670 }, { "epoch": 0.05697536957819946, "grad_norm": 2.5440924167633057, "learning_rate": 4.96337395766517e-06, "loss": 0.9635, "step": 4675 }, { "epoch": 0.05703630580234726, "grad_norm": 2.0889968872070312, "learning_rate": 4.963053239255934e-06, "loss": 1.0092, "step": 4680 }, { "epoch": 0.05709724202649507, "grad_norm": 2.0752875804901123, "learning_rate": 4.962732520846697e-06, "loss": 0.9116, "step": 4685 }, { "epoch": 0.05715817825064288, "grad_norm": 2.0645389556884766, "learning_rate": 4.96241180243746e-06, "loss": 0.916, "step": 4690 }, { "epoch": 0.05721911447479069, "grad_norm": 2.6126043796539307, "learning_rate": 4.962091084028224e-06, "loss": 0.9013, "step": 4695 }, { "epoch": 0.05728005069893849, "grad_norm": 2.8825111389160156, "learning_rate": 4.961770365618987e-06, "loss": 0.9681, "step": 4700 }, { "epoch": 0.0573409869230863, "grad_norm": 2.0285089015960693, "learning_rate": 4.96144964720975e-06, "loss": 1.0308, "step": 4705 }, { "epoch": 0.05740192314723411, "grad_norm": 2.0823240280151367, "learning_rate": 4.961128928800514e-06, "loss": 1.03, "step": 4710 }, { "epoch": 0.05746285937138191, "grad_norm": 2.1190946102142334, "learning_rate": 4.960808210391277e-06, "loss": 0.9874, "step": 4715 }, { "epoch": 0.05752379559552972, "grad_norm": 2.221510171890259, "learning_rate": 4.96048749198204e-06, "loss": 1.0153, "step": 4720 }, { "epoch": 0.05758473181967753, "grad_norm": 1.93096125125885, "learning_rate": 4.9601667735728035e-06, "loss": 0.9854, "step": 4725 }, { "epoch": 0.05764566804382533, "grad_norm": 2.2436416149139404, "learning_rate": 4.9598460551635665e-06, "loss": 0.952, "step": 4730 }, { "epoch": 0.05770660426797314, "grad_norm": 2.133604049682617, "learning_rate": 4.95952533675433e-06, "loss": 0.936, "step": 4735 }, { "epoch": 0.05776754049212095, "grad_norm": 2.3710179328918457, "learning_rate": 4.959204618345093e-06, "loss": 1.0048, "step": 4740 }, { "epoch": 0.057828476716268756, "grad_norm": 2.96234393119812, "learning_rate": 4.9588838999358564e-06, "loss": 0.9674, "step": 4745 }, { "epoch": 0.05788941294041656, "grad_norm": 2.349517583847046, "learning_rate": 4.95856318152662e-06, "loss": 0.9354, "step": 4750 }, { "epoch": 0.05795034916456437, "grad_norm": 2.169318914413452, "learning_rate": 4.958242463117383e-06, "loss": 0.9692, "step": 4755 }, { "epoch": 0.058011285388712176, "grad_norm": 2.147169589996338, "learning_rate": 4.957921744708147e-06, "loss": 0.9483, "step": 4760 }, { "epoch": 0.05807222161285998, "grad_norm": 2.1766092777252197, "learning_rate": 4.95760102629891e-06, "loss": 0.8488, "step": 4765 }, { "epoch": 0.05813315783700779, "grad_norm": 2.072807550430298, "learning_rate": 4.957280307889673e-06, "loss": 0.8657, "step": 4770 }, { "epoch": 0.058194094061155596, "grad_norm": 2.3554985523223877, "learning_rate": 4.956959589480437e-06, "loss": 0.8717, "step": 4775 }, { "epoch": 0.0582550302853034, "grad_norm": 1.936653971672058, "learning_rate": 4.9566388710712e-06, "loss": 0.9614, "step": 4780 }, { "epoch": 0.05831596650945121, "grad_norm": 2.0056257247924805, "learning_rate": 4.956318152661963e-06, "loss": 0.9476, "step": 4785 }, { "epoch": 0.058376902733599016, "grad_norm": 2.04495906829834, "learning_rate": 4.955997434252727e-06, "loss": 1.0185, "step": 4790 }, { "epoch": 0.058437838957746825, "grad_norm": 1.99125075340271, "learning_rate": 4.95567671584349e-06, "loss": 0.9917, "step": 4795 }, { "epoch": 0.05849877518189463, "grad_norm": 2.226348638534546, "learning_rate": 4.955355997434253e-06, "loss": 0.9046, "step": 4800 }, { "epoch": 0.058559711406042436, "grad_norm": 2.2115886211395264, "learning_rate": 4.955035279025017e-06, "loss": 0.9877, "step": 4805 }, { "epoch": 0.058620647630190245, "grad_norm": 2.4100098609924316, "learning_rate": 4.95471456061578e-06, "loss": 0.9314, "step": 4810 }, { "epoch": 0.05868158385433805, "grad_norm": 2.2989494800567627, "learning_rate": 4.954393842206543e-06, "loss": 1.0109, "step": 4815 }, { "epoch": 0.058742520078485856, "grad_norm": 2.014691114425659, "learning_rate": 4.954073123797306e-06, "loss": 0.9704, "step": 4820 }, { "epoch": 0.058803456302633665, "grad_norm": 2.0212395191192627, "learning_rate": 4.95375240538807e-06, "loss": 0.8912, "step": 4825 }, { "epoch": 0.05886439252678147, "grad_norm": 1.7669754028320312, "learning_rate": 4.953431686978833e-06, "loss": 0.9631, "step": 4830 }, { "epoch": 0.058925328750929276, "grad_norm": 2.150233268737793, "learning_rate": 4.953110968569596e-06, "loss": 0.9262, "step": 4835 }, { "epoch": 0.058986264975077085, "grad_norm": 2.3365492820739746, "learning_rate": 4.95279025016036e-06, "loss": 0.9497, "step": 4840 }, { "epoch": 0.059047201199224894, "grad_norm": 1.9619253873825073, "learning_rate": 4.952469531751123e-06, "loss": 0.9316, "step": 4845 }, { "epoch": 0.059108137423372696, "grad_norm": 2.1293716430664062, "learning_rate": 4.952148813341886e-06, "loss": 0.9715, "step": 4850 }, { "epoch": 0.059169073647520505, "grad_norm": 2.0133609771728516, "learning_rate": 4.9518280949326496e-06, "loss": 0.8595, "step": 4855 }, { "epoch": 0.059230009871668314, "grad_norm": 2.3402154445648193, "learning_rate": 4.951507376523413e-06, "loss": 0.9771, "step": 4860 }, { "epoch": 0.059290946095816116, "grad_norm": 1.9398107528686523, "learning_rate": 4.951186658114176e-06, "loss": 0.9306, "step": 4865 }, { "epoch": 0.059351882319963925, "grad_norm": 2.0551598072052, "learning_rate": 4.9508659397049395e-06, "loss": 0.9358, "step": 4870 }, { "epoch": 0.059412818544111734, "grad_norm": 2.263112783432007, "learning_rate": 4.9505452212957025e-06, "loss": 0.9339, "step": 4875 }, { "epoch": 0.05947375476825954, "grad_norm": 1.8991109132766724, "learning_rate": 4.950224502886466e-06, "loss": 0.9687, "step": 4880 }, { "epoch": 0.059534690992407345, "grad_norm": 1.888001799583435, "learning_rate": 4.949903784477229e-06, "loss": 0.9218, "step": 4885 }, { "epoch": 0.059595627216555154, "grad_norm": 2.322986602783203, "learning_rate": 4.949583066067992e-06, "loss": 0.8632, "step": 4890 }, { "epoch": 0.05965656344070296, "grad_norm": 1.9560644626617432, "learning_rate": 4.949262347658756e-06, "loss": 0.9544, "step": 4895 }, { "epoch": 0.059717499664850765, "grad_norm": 3.141874074935913, "learning_rate": 4.948941629249519e-06, "loss": 1.0236, "step": 4900 }, { "epoch": 0.059778435888998574, "grad_norm": 2.134507894515991, "learning_rate": 4.948620910840283e-06, "loss": 0.9077, "step": 4905 }, { "epoch": 0.05983937211314638, "grad_norm": 2.163839101791382, "learning_rate": 4.948300192431046e-06, "loss": 0.939, "step": 4910 }, { "epoch": 0.059900308337294185, "grad_norm": 1.9688993692398071, "learning_rate": 4.947979474021809e-06, "loss": 0.9397, "step": 4915 }, { "epoch": 0.059961244561441994, "grad_norm": 2.1418616771698, "learning_rate": 4.947658755612573e-06, "loss": 0.9165, "step": 4920 }, { "epoch": 0.0600221807855898, "grad_norm": 2.1541857719421387, "learning_rate": 4.947338037203336e-06, "loss": 0.9699, "step": 4925 }, { "epoch": 0.06008311700973761, "grad_norm": 2.119509220123291, "learning_rate": 4.947017318794099e-06, "loss": 0.9656, "step": 4930 }, { "epoch": 0.060144053233885414, "grad_norm": 1.8726918697357178, "learning_rate": 4.946696600384863e-06, "loss": 0.9013, "step": 4935 }, { "epoch": 0.06020498945803322, "grad_norm": 2.397153377532959, "learning_rate": 4.946375881975626e-06, "loss": 0.9568, "step": 4940 }, { "epoch": 0.06026592568218103, "grad_norm": 1.911067008972168, "learning_rate": 4.946055163566389e-06, "loss": 0.9206, "step": 4945 }, { "epoch": 0.060326861906328834, "grad_norm": 2.636570930480957, "learning_rate": 4.945734445157153e-06, "loss": 0.9342, "step": 4950 }, { "epoch": 0.06038779813047664, "grad_norm": 2.252887725830078, "learning_rate": 4.945413726747916e-06, "loss": 0.9343, "step": 4955 }, { "epoch": 0.06044873435462445, "grad_norm": 2.360598087310791, "learning_rate": 4.945093008338679e-06, "loss": 0.8704, "step": 4960 }, { "epoch": 0.060509670578772254, "grad_norm": 1.9237689971923828, "learning_rate": 4.944772289929443e-06, "loss": 0.9614, "step": 4965 }, { "epoch": 0.06057060680292006, "grad_norm": 1.8567075729370117, "learning_rate": 4.944451571520206e-06, "loss": 0.8197, "step": 4970 }, { "epoch": 0.06063154302706787, "grad_norm": 1.9225867986679077, "learning_rate": 4.944130853110969e-06, "loss": 0.9385, "step": 4975 }, { "epoch": 0.06069247925121568, "grad_norm": 2.5478789806365967, "learning_rate": 4.943810134701733e-06, "loss": 0.9469, "step": 4980 }, { "epoch": 0.06075341547536348, "grad_norm": 2.2117717266082764, "learning_rate": 4.943489416292496e-06, "loss": 1.0034, "step": 4985 }, { "epoch": 0.06081435169951129, "grad_norm": 2.125997304916382, "learning_rate": 4.943168697883259e-06, "loss": 0.8498, "step": 4990 }, { "epoch": 0.0608752879236591, "grad_norm": 2.2234039306640625, "learning_rate": 4.942847979474022e-06, "loss": 0.944, "step": 4995 }, { "epoch": 0.0609362241478069, "grad_norm": 1.9848027229309082, "learning_rate": 4.9425272610647855e-06, "loss": 0.8733, "step": 5000 }, { "epoch": 0.06099716037195471, "grad_norm": 1.998953104019165, "learning_rate": 4.9422065426555485e-06, "loss": 0.9057, "step": 5005 }, { "epoch": 0.06105809659610252, "grad_norm": 2.1967885494232178, "learning_rate": 4.9418858242463116e-06, "loss": 0.9551, "step": 5010 }, { "epoch": 0.06111903282025032, "grad_norm": 2.123016357421875, "learning_rate": 4.9415651058370754e-06, "loss": 1.0116, "step": 5015 }, { "epoch": 0.06117996904439813, "grad_norm": 2.061354875564575, "learning_rate": 4.9412443874278384e-06, "loss": 0.9936, "step": 5020 }, { "epoch": 0.06124090526854594, "grad_norm": 2.494229316711426, "learning_rate": 4.9409236690186015e-06, "loss": 0.9261, "step": 5025 }, { "epoch": 0.06130184149269375, "grad_norm": 2.0895004272460938, "learning_rate": 4.940602950609365e-06, "loss": 0.9222, "step": 5030 }, { "epoch": 0.06136277771684155, "grad_norm": 1.9097357988357544, "learning_rate": 4.940282232200128e-06, "loss": 0.9027, "step": 5035 }, { "epoch": 0.06142371394098936, "grad_norm": 2.067857265472412, "learning_rate": 4.939961513790892e-06, "loss": 0.9087, "step": 5040 }, { "epoch": 0.06148465016513717, "grad_norm": 1.9832195043563843, "learning_rate": 4.939640795381655e-06, "loss": 0.9385, "step": 5045 }, { "epoch": 0.06154558638928497, "grad_norm": 1.933997392654419, "learning_rate": 4.939320076972419e-06, "loss": 0.983, "step": 5050 }, { "epoch": 0.06160652261343278, "grad_norm": 2.287663221359253, "learning_rate": 4.938999358563182e-06, "loss": 0.9244, "step": 5055 }, { "epoch": 0.06166745883758059, "grad_norm": 1.896950364112854, "learning_rate": 4.938678640153945e-06, "loss": 0.9532, "step": 5060 }, { "epoch": 0.06172839506172839, "grad_norm": 2.494731903076172, "learning_rate": 4.938357921744709e-06, "loss": 0.9194, "step": 5065 }, { "epoch": 0.0617893312858762, "grad_norm": 2.278404951095581, "learning_rate": 4.938037203335472e-06, "loss": 0.9397, "step": 5070 }, { "epoch": 0.06185026751002401, "grad_norm": 1.942125916481018, "learning_rate": 4.937716484926235e-06, "loss": 0.9131, "step": 5075 }, { "epoch": 0.06191120373417182, "grad_norm": 2.1517515182495117, "learning_rate": 4.937395766516999e-06, "loss": 0.9805, "step": 5080 }, { "epoch": 0.06197213995831962, "grad_norm": 2.3183600902557373, "learning_rate": 4.937075048107762e-06, "loss": 1.0266, "step": 5085 }, { "epoch": 0.06203307618246743, "grad_norm": 1.9815300703048706, "learning_rate": 4.936754329698525e-06, "loss": 0.971, "step": 5090 }, { "epoch": 0.06209401240661524, "grad_norm": 1.6847152709960938, "learning_rate": 4.936433611289289e-06, "loss": 1.0025, "step": 5095 }, { "epoch": 0.06215494863076304, "grad_norm": 2.6026501655578613, "learning_rate": 4.936112892880052e-06, "loss": 0.8576, "step": 5100 }, { "epoch": 0.06221588485491085, "grad_norm": 2.2468771934509277, "learning_rate": 4.935792174470815e-06, "loss": 0.8861, "step": 5105 }, { "epoch": 0.06227682107905866, "grad_norm": 2.325155019760132, "learning_rate": 4.935471456061579e-06, "loss": 0.99, "step": 5110 }, { "epoch": 0.06233775730320647, "grad_norm": 2.1791412830352783, "learning_rate": 4.935150737652342e-06, "loss": 0.9304, "step": 5115 }, { "epoch": 0.06239869352735427, "grad_norm": 2.1914873123168945, "learning_rate": 4.934830019243105e-06, "loss": 0.9573, "step": 5120 }, { "epoch": 0.06245962975150208, "grad_norm": 1.9534186124801636, "learning_rate": 4.9345093008338686e-06, "loss": 0.9974, "step": 5125 }, { "epoch": 0.06252056597564988, "grad_norm": 2.082526922225952, "learning_rate": 4.934188582424632e-06, "loss": 0.9241, "step": 5130 }, { "epoch": 0.06258150219979769, "grad_norm": 1.926159143447876, "learning_rate": 4.933867864015395e-06, "loss": 0.9355, "step": 5135 }, { "epoch": 0.0626424384239455, "grad_norm": 2.4783096313476562, "learning_rate": 4.9335471456061585e-06, "loss": 0.923, "step": 5140 }, { "epoch": 0.06270337464809331, "grad_norm": 2.1311943531036377, "learning_rate": 4.9332264271969215e-06, "loss": 0.9165, "step": 5145 }, { "epoch": 0.06276431087224112, "grad_norm": 2.1435773372650146, "learning_rate": 4.9329057087876845e-06, "loss": 0.9896, "step": 5150 }, { "epoch": 0.06282524709638893, "grad_norm": 1.7961151599884033, "learning_rate": 4.932584990378448e-06, "loss": 0.9471, "step": 5155 }, { "epoch": 0.06288618332053672, "grad_norm": 2.0640339851379395, "learning_rate": 4.932264271969211e-06, "loss": 0.8842, "step": 5160 }, { "epoch": 0.06294711954468453, "grad_norm": 2.123392105102539, "learning_rate": 4.931943553559974e-06, "loss": 0.9098, "step": 5165 }, { "epoch": 0.06300805576883234, "grad_norm": 2.114081382751465, "learning_rate": 4.931622835150737e-06, "loss": 0.8885, "step": 5170 }, { "epoch": 0.06306899199298015, "grad_norm": 2.009061336517334, "learning_rate": 4.931302116741501e-06, "loss": 0.8772, "step": 5175 }, { "epoch": 0.06312992821712796, "grad_norm": 2.142570734024048, "learning_rate": 4.930981398332264e-06, "loss": 0.9443, "step": 5180 }, { "epoch": 0.06319086444127577, "grad_norm": 2.5183029174804688, "learning_rate": 4.930660679923028e-06, "loss": 0.9546, "step": 5185 }, { "epoch": 0.06325180066542357, "grad_norm": 2.2181975841522217, "learning_rate": 4.930339961513791e-06, "loss": 0.9538, "step": 5190 }, { "epoch": 0.06331273688957137, "grad_norm": 2.035323143005371, "learning_rate": 4.930019243104554e-06, "loss": 0.9804, "step": 5195 }, { "epoch": 0.06337367311371918, "grad_norm": 3.440798044204712, "learning_rate": 4.929698524695318e-06, "loss": 0.8855, "step": 5200 }, { "epoch": 0.06343460933786699, "grad_norm": 1.869666576385498, "learning_rate": 4.929377806286081e-06, "loss": 0.9891, "step": 5205 }, { "epoch": 0.0634955455620148, "grad_norm": 1.7425141334533691, "learning_rate": 4.929057087876845e-06, "loss": 0.9696, "step": 5210 }, { "epoch": 0.0635564817861626, "grad_norm": 2.2117691040039062, "learning_rate": 4.928736369467608e-06, "loss": 0.8493, "step": 5215 }, { "epoch": 0.06361741801031041, "grad_norm": 1.8980265855789185, "learning_rate": 4.928415651058371e-06, "loss": 1.008, "step": 5220 }, { "epoch": 0.06367835423445821, "grad_norm": 1.9650155305862427, "learning_rate": 4.928094932649135e-06, "loss": 0.9472, "step": 5225 }, { "epoch": 0.06373929045860602, "grad_norm": 2.1409924030303955, "learning_rate": 4.927774214239898e-06, "loss": 1.0056, "step": 5230 }, { "epoch": 0.06380022668275383, "grad_norm": 2.0142595767974854, "learning_rate": 4.927453495830662e-06, "loss": 0.9044, "step": 5235 }, { "epoch": 0.06386116290690164, "grad_norm": 2.1738502979278564, "learning_rate": 4.927132777421425e-06, "loss": 0.9163, "step": 5240 }, { "epoch": 0.06392209913104945, "grad_norm": 2.199079751968384, "learning_rate": 4.926812059012188e-06, "loss": 0.965, "step": 5245 }, { "epoch": 0.06398303535519725, "grad_norm": 2.1126365661621094, "learning_rate": 4.926491340602951e-06, "loss": 1.008, "step": 5250 }, { "epoch": 0.06404397157934506, "grad_norm": 2.017765522003174, "learning_rate": 4.926170622193715e-06, "loss": 0.9437, "step": 5255 }, { "epoch": 0.06410490780349286, "grad_norm": 1.9682894945144653, "learning_rate": 4.925849903784478e-06, "loss": 0.9351, "step": 5260 }, { "epoch": 0.06416584402764067, "grad_norm": 1.9868667125701904, "learning_rate": 4.925529185375241e-06, "loss": 0.97, "step": 5265 }, { "epoch": 0.06422678025178848, "grad_norm": 1.7767274379730225, "learning_rate": 4.9252084669660045e-06, "loss": 0.9495, "step": 5270 }, { "epoch": 0.06428771647593629, "grad_norm": 1.8576022386550903, "learning_rate": 4.9248877485567675e-06, "loss": 0.9211, "step": 5275 }, { "epoch": 0.0643486527000841, "grad_norm": 1.897353172302246, "learning_rate": 4.9245670301475306e-06, "loss": 0.9728, "step": 5280 }, { "epoch": 0.0644095889242319, "grad_norm": 2.3882906436920166, "learning_rate": 4.9242463117382944e-06, "loss": 1.0213, "step": 5285 }, { "epoch": 0.06447052514837971, "grad_norm": 1.8121424913406372, "learning_rate": 4.9239255933290574e-06, "loss": 0.9085, "step": 5290 }, { "epoch": 0.06453146137252751, "grad_norm": 2.037684917449951, "learning_rate": 4.9236048749198205e-06, "loss": 0.8986, "step": 5295 }, { "epoch": 0.06459239759667532, "grad_norm": 2.306941270828247, "learning_rate": 4.923284156510584e-06, "loss": 0.9686, "step": 5300 }, { "epoch": 0.06465333382082313, "grad_norm": 1.9806796312332153, "learning_rate": 4.922963438101347e-06, "loss": 0.9889, "step": 5305 }, { "epoch": 0.06471427004497093, "grad_norm": 2.2044315338134766, "learning_rate": 4.92264271969211e-06, "loss": 0.9674, "step": 5310 }, { "epoch": 0.06477520626911874, "grad_norm": 2.02834415435791, "learning_rate": 4.922322001282874e-06, "loss": 0.9194, "step": 5315 }, { "epoch": 0.06483614249326655, "grad_norm": 2.35077166557312, "learning_rate": 4.922001282873637e-06, "loss": 0.8969, "step": 5320 }, { "epoch": 0.06489707871741436, "grad_norm": 1.8145252466201782, "learning_rate": 4.9216805644644e-06, "loss": 0.959, "step": 5325 }, { "epoch": 0.06495801494156216, "grad_norm": 1.9050465822219849, "learning_rate": 4.921359846055164e-06, "loss": 0.9536, "step": 5330 }, { "epoch": 0.06501895116570997, "grad_norm": 1.9799162149429321, "learning_rate": 4.921039127645927e-06, "loss": 0.9926, "step": 5335 }, { "epoch": 0.06507988738985777, "grad_norm": 1.9568346738815308, "learning_rate": 4.92071840923669e-06, "loss": 0.9383, "step": 5340 }, { "epoch": 0.06514082361400558, "grad_norm": 2.255178451538086, "learning_rate": 4.920397690827454e-06, "loss": 0.8488, "step": 5345 }, { "epoch": 0.06520175983815339, "grad_norm": 2.1354012489318848, "learning_rate": 4.920076972418217e-06, "loss": 0.9825, "step": 5350 }, { "epoch": 0.0652626960623012, "grad_norm": 2.062612295150757, "learning_rate": 4.919756254008981e-06, "loss": 0.9973, "step": 5355 }, { "epoch": 0.065323632286449, "grad_norm": 1.853784203529358, "learning_rate": 4.919435535599744e-06, "loss": 0.9027, "step": 5360 }, { "epoch": 0.0653845685105968, "grad_norm": 1.7247583866119385, "learning_rate": 4.919114817190507e-06, "loss": 0.869, "step": 5365 }, { "epoch": 0.06544550473474461, "grad_norm": 2.319300651550293, "learning_rate": 4.918794098781271e-06, "loss": 1.0535, "step": 5370 }, { "epoch": 0.06550644095889242, "grad_norm": 1.9412859678268433, "learning_rate": 4.918473380372034e-06, "loss": 0.9143, "step": 5375 }, { "epoch": 0.06556737718304023, "grad_norm": 2.063267469406128, "learning_rate": 4.918152661962798e-06, "loss": 0.928, "step": 5380 }, { "epoch": 0.06562831340718804, "grad_norm": 2.161742687225342, "learning_rate": 4.917831943553561e-06, "loss": 0.9544, "step": 5385 }, { "epoch": 0.06568924963133585, "grad_norm": 2.228863477706909, "learning_rate": 4.917511225144324e-06, "loss": 1.0183, "step": 5390 }, { "epoch": 0.06575018585548364, "grad_norm": 1.9259026050567627, "learning_rate": 4.9171905067350876e-06, "loss": 0.9698, "step": 5395 }, { "epoch": 0.06581112207963145, "grad_norm": 1.8600404262542725, "learning_rate": 4.916869788325851e-06, "loss": 0.9432, "step": 5400 }, { "epoch": 0.06587205830377926, "grad_norm": 2.4627840518951416, "learning_rate": 4.916549069916614e-06, "loss": 0.9089, "step": 5405 }, { "epoch": 0.06593299452792707, "grad_norm": 1.9961870908737183, "learning_rate": 4.916228351507377e-06, "loss": 0.994, "step": 5410 }, { "epoch": 0.06599393075207488, "grad_norm": 2.1012933254241943, "learning_rate": 4.9159076330981405e-06, "loss": 0.9521, "step": 5415 }, { "epoch": 0.06605486697622269, "grad_norm": 1.9649673700332642, "learning_rate": 4.9155869146889035e-06, "loss": 0.9363, "step": 5420 }, { "epoch": 0.0661158032003705, "grad_norm": 2.5086309909820557, "learning_rate": 4.9152661962796665e-06, "loss": 0.9063, "step": 5425 }, { "epoch": 0.0661767394245183, "grad_norm": 2.354182481765747, "learning_rate": 4.91494547787043e-06, "loss": 0.9524, "step": 5430 }, { "epoch": 0.0662376756486661, "grad_norm": 1.8379513025283813, "learning_rate": 4.914624759461193e-06, "loss": 0.951, "step": 5435 }, { "epoch": 0.06629861187281391, "grad_norm": 2.0088863372802734, "learning_rate": 4.914304041051956e-06, "loss": 0.926, "step": 5440 }, { "epoch": 0.06635954809696172, "grad_norm": 2.177493095397949, "learning_rate": 4.91398332264272e-06, "loss": 0.9268, "step": 5445 }, { "epoch": 0.06642048432110953, "grad_norm": 2.9914462566375732, "learning_rate": 4.913662604233483e-06, "loss": 0.9455, "step": 5450 }, { "epoch": 0.06648142054525734, "grad_norm": 2.0806522369384766, "learning_rate": 4.913341885824246e-06, "loss": 0.9658, "step": 5455 }, { "epoch": 0.06654235676940513, "grad_norm": 2.09787654876709, "learning_rate": 4.91302116741501e-06, "loss": 1.0195, "step": 5460 }, { "epoch": 0.06660329299355294, "grad_norm": 2.123009443283081, "learning_rate": 4.912700449005773e-06, "loss": 0.9368, "step": 5465 }, { "epoch": 0.06666422921770075, "grad_norm": 1.86814284324646, "learning_rate": 4.912379730596536e-06, "loss": 0.9637, "step": 5470 }, { "epoch": 0.06672516544184856, "grad_norm": 1.9526251554489136, "learning_rate": 4.9120590121873e-06, "loss": 0.892, "step": 5475 }, { "epoch": 0.06678610166599637, "grad_norm": 2.104792833328247, "learning_rate": 4.911738293778063e-06, "loss": 0.9331, "step": 5480 }, { "epoch": 0.06684703789014418, "grad_norm": 1.8674871921539307, "learning_rate": 4.911417575368826e-06, "loss": 0.9829, "step": 5485 }, { "epoch": 0.06690797411429199, "grad_norm": 2.7996275424957275, "learning_rate": 4.91109685695959e-06, "loss": 0.8925, "step": 5490 }, { "epoch": 0.06696891033843978, "grad_norm": 2.3862006664276123, "learning_rate": 4.910776138550353e-06, "loss": 0.9085, "step": 5495 }, { "epoch": 0.06702984656258759, "grad_norm": 2.2744815349578857, "learning_rate": 4.910455420141116e-06, "loss": 0.948, "step": 5500 }, { "epoch": 0.0670907827867354, "grad_norm": 1.952099084854126, "learning_rate": 4.91013470173188e-06, "loss": 0.963, "step": 5505 }, { "epoch": 0.06715171901088321, "grad_norm": 2.086008071899414, "learning_rate": 4.909813983322643e-06, "loss": 1.0034, "step": 5510 }, { "epoch": 0.06721265523503102, "grad_norm": 2.3674612045288086, "learning_rate": 4.909493264913407e-06, "loss": 0.985, "step": 5515 }, { "epoch": 0.06727359145917883, "grad_norm": 1.5989186763763428, "learning_rate": 4.90917254650417e-06, "loss": 0.9206, "step": 5520 }, { "epoch": 0.06733452768332664, "grad_norm": 2.0675480365753174, "learning_rate": 4.908851828094934e-06, "loss": 0.9688, "step": 5525 }, { "epoch": 0.06739546390747443, "grad_norm": 1.9837356805801392, "learning_rate": 4.908531109685697e-06, "loss": 1.0034, "step": 5530 }, { "epoch": 0.06745640013162224, "grad_norm": 2.1570448875427246, "learning_rate": 4.90821039127646e-06, "loss": 0.9238, "step": 5535 }, { "epoch": 0.06751733635577005, "grad_norm": 2.218886375427246, "learning_rate": 4.9078896728672235e-06, "loss": 1.0089, "step": 5540 }, { "epoch": 0.06757827257991786, "grad_norm": 1.920974612236023, "learning_rate": 4.9075689544579865e-06, "loss": 0.8875, "step": 5545 }, { "epoch": 0.06763920880406567, "grad_norm": 2.211859703063965, "learning_rate": 4.9072482360487496e-06, "loss": 1.0023, "step": 5550 }, { "epoch": 0.06770014502821348, "grad_norm": 2.031491756439209, "learning_rate": 4.9069275176395134e-06, "loss": 0.8555, "step": 5555 }, { "epoch": 0.06776108125236129, "grad_norm": 2.396542549133301, "learning_rate": 4.9066067992302764e-06, "loss": 0.9309, "step": 5560 }, { "epoch": 0.06782201747650908, "grad_norm": 2.161733865737915, "learning_rate": 4.9062860808210395e-06, "loss": 0.9562, "step": 5565 }, { "epoch": 0.06788295370065689, "grad_norm": 2.2116711139678955, "learning_rate": 4.905965362411803e-06, "loss": 0.9598, "step": 5570 }, { "epoch": 0.0679438899248047, "grad_norm": 2.0722544193267822, "learning_rate": 4.905644644002566e-06, "loss": 0.9273, "step": 5575 }, { "epoch": 0.06800482614895251, "grad_norm": 2.180281162261963, "learning_rate": 4.905323925593329e-06, "loss": 0.8717, "step": 5580 }, { "epoch": 0.06806576237310032, "grad_norm": 2.009038209915161, "learning_rate": 4.905003207184092e-06, "loss": 0.9183, "step": 5585 }, { "epoch": 0.06812669859724813, "grad_norm": 1.8359899520874023, "learning_rate": 4.904682488774856e-06, "loss": 0.9486, "step": 5590 }, { "epoch": 0.06818763482139592, "grad_norm": 1.973497986793518, "learning_rate": 4.904361770365619e-06, "loss": 0.9191, "step": 5595 }, { "epoch": 0.06824857104554373, "grad_norm": 2.219254970550537, "learning_rate": 4.904041051956382e-06, "loss": 0.934, "step": 5600 }, { "epoch": 0.06830950726969154, "grad_norm": 2.269242525100708, "learning_rate": 4.903720333547146e-06, "loss": 1.0078, "step": 5605 }, { "epoch": 0.06837044349383935, "grad_norm": 1.8953286409378052, "learning_rate": 4.903399615137909e-06, "loss": 0.9297, "step": 5610 }, { "epoch": 0.06843137971798716, "grad_norm": 1.7916902303695679, "learning_rate": 4.903078896728672e-06, "loss": 0.9411, "step": 5615 }, { "epoch": 0.06849231594213497, "grad_norm": 1.9572983980178833, "learning_rate": 4.902758178319436e-06, "loss": 0.9554, "step": 5620 }, { "epoch": 0.06855325216628277, "grad_norm": 1.9052634239196777, "learning_rate": 4.902437459910199e-06, "loss": 0.9343, "step": 5625 }, { "epoch": 0.06861418839043057, "grad_norm": 2.0895280838012695, "learning_rate": 4.902116741500962e-06, "loss": 0.9549, "step": 5630 }, { "epoch": 0.06867512461457838, "grad_norm": 2.0306808948516846, "learning_rate": 4.901796023091726e-06, "loss": 0.966, "step": 5635 }, { "epoch": 0.06873606083872619, "grad_norm": 2.190082550048828, "learning_rate": 4.901475304682489e-06, "loss": 0.8886, "step": 5640 }, { "epoch": 0.068796997062874, "grad_norm": 3.2253754138946533, "learning_rate": 4.901154586273252e-06, "loss": 0.9493, "step": 5645 }, { "epoch": 0.0688579332870218, "grad_norm": 1.9738436937332153, "learning_rate": 4.900833867864016e-06, "loss": 0.9018, "step": 5650 }, { "epoch": 0.06891886951116961, "grad_norm": 2.3908157348632812, "learning_rate": 4.900513149454779e-06, "loss": 1.0017, "step": 5655 }, { "epoch": 0.06897980573531742, "grad_norm": 2.0743753910064697, "learning_rate": 4.900192431045543e-06, "loss": 0.947, "step": 5660 }, { "epoch": 0.06904074195946522, "grad_norm": 1.7864611148834229, "learning_rate": 4.899871712636306e-06, "loss": 0.9717, "step": 5665 }, { "epoch": 0.06910167818361303, "grad_norm": 1.7486777305603027, "learning_rate": 4.899550994227069e-06, "loss": 0.9247, "step": 5670 }, { "epoch": 0.06916261440776084, "grad_norm": 2.305426836013794, "learning_rate": 4.899230275817833e-06, "loss": 0.9582, "step": 5675 }, { "epoch": 0.06922355063190865, "grad_norm": 2.074845314025879, "learning_rate": 4.898909557408596e-06, "loss": 0.947, "step": 5680 }, { "epoch": 0.06928448685605645, "grad_norm": 2.0844264030456543, "learning_rate": 4.8985888389993595e-06, "loss": 1.0151, "step": 5685 }, { "epoch": 0.06934542308020426, "grad_norm": 1.9699323177337646, "learning_rate": 4.8982681205901225e-06, "loss": 0.952, "step": 5690 }, { "epoch": 0.06940635930435206, "grad_norm": 1.9822020530700684, "learning_rate": 4.8979474021808855e-06, "loss": 0.9161, "step": 5695 }, { "epoch": 0.06946729552849987, "grad_norm": 1.9311578273773193, "learning_rate": 4.897626683771649e-06, "loss": 0.8651, "step": 5700 }, { "epoch": 0.06952823175264768, "grad_norm": 1.9454327821731567, "learning_rate": 4.897305965362412e-06, "loss": 1.0286, "step": 5705 }, { "epoch": 0.06958916797679549, "grad_norm": 1.9975162744522095, "learning_rate": 4.896985246953175e-06, "loss": 0.8929, "step": 5710 }, { "epoch": 0.0696501042009433, "grad_norm": 1.8072513341903687, "learning_rate": 4.896664528543939e-06, "loss": 0.9171, "step": 5715 }, { "epoch": 0.0697110404250911, "grad_norm": 1.9945982694625854, "learning_rate": 4.896343810134702e-06, "loss": 1.026, "step": 5720 }, { "epoch": 0.06977197664923891, "grad_norm": 1.9636404514312744, "learning_rate": 4.896023091725465e-06, "loss": 0.9216, "step": 5725 }, { "epoch": 0.06983291287338671, "grad_norm": 2.102193593978882, "learning_rate": 4.895702373316229e-06, "loss": 0.9728, "step": 5730 }, { "epoch": 0.06989384909753452, "grad_norm": 2.459519386291504, "learning_rate": 4.895381654906992e-06, "loss": 0.9826, "step": 5735 }, { "epoch": 0.06995478532168233, "grad_norm": 2.0420408248901367, "learning_rate": 4.895060936497755e-06, "loss": 0.9309, "step": 5740 }, { "epoch": 0.07001572154583013, "grad_norm": 1.8987085819244385, "learning_rate": 4.894740218088519e-06, "loss": 0.871, "step": 5745 }, { "epoch": 0.07007665776997794, "grad_norm": 1.9259001016616821, "learning_rate": 4.894419499679282e-06, "loss": 0.9288, "step": 5750 }, { "epoch": 0.07013759399412575, "grad_norm": 2.6884870529174805, "learning_rate": 4.894098781270045e-06, "loss": 0.9723, "step": 5755 }, { "epoch": 0.07019853021827356, "grad_norm": 1.7844552993774414, "learning_rate": 4.893778062860808e-06, "loss": 0.922, "step": 5760 }, { "epoch": 0.07025946644242136, "grad_norm": 2.226775884628296, "learning_rate": 4.893457344451572e-06, "loss": 0.9425, "step": 5765 }, { "epoch": 0.07032040266656917, "grad_norm": 1.7698386907577515, "learning_rate": 4.893136626042335e-06, "loss": 0.9328, "step": 5770 }, { "epoch": 0.07038133889071697, "grad_norm": 2.1828625202178955, "learning_rate": 4.892815907633098e-06, "loss": 0.9292, "step": 5775 }, { "epoch": 0.07044227511486478, "grad_norm": 1.822723627090454, "learning_rate": 4.892495189223862e-06, "loss": 1.0227, "step": 5780 }, { "epoch": 0.07050321133901259, "grad_norm": 2.2697975635528564, "learning_rate": 4.892174470814625e-06, "loss": 0.9999, "step": 5785 }, { "epoch": 0.0705641475631604, "grad_norm": 2.0370447635650635, "learning_rate": 4.891853752405388e-06, "loss": 0.9301, "step": 5790 }, { "epoch": 0.07062508378730821, "grad_norm": 2.026326894760132, "learning_rate": 4.891533033996152e-06, "loss": 1.0286, "step": 5795 }, { "epoch": 0.070686020011456, "grad_norm": 1.9394038915634155, "learning_rate": 4.891212315586915e-06, "loss": 0.8572, "step": 5800 }, { "epoch": 0.07074695623560381, "grad_norm": 1.9203824996948242, "learning_rate": 4.890891597177679e-06, "loss": 0.9657, "step": 5805 }, { "epoch": 0.07080789245975162, "grad_norm": 2.1651012897491455, "learning_rate": 4.890570878768442e-06, "loss": 0.9893, "step": 5810 }, { "epoch": 0.07086882868389943, "grad_norm": 1.9795219898223877, "learning_rate": 4.890250160359205e-06, "loss": 0.9064, "step": 5815 }, { "epoch": 0.07092976490804724, "grad_norm": 2.2890713214874268, "learning_rate": 4.8899294419499686e-06, "loss": 0.916, "step": 5820 }, { "epoch": 0.07099070113219505, "grad_norm": 2.68293833732605, "learning_rate": 4.889608723540732e-06, "loss": 0.905, "step": 5825 }, { "epoch": 0.07105163735634284, "grad_norm": 1.9203087091445923, "learning_rate": 4.8892880051314954e-06, "loss": 0.8787, "step": 5830 }, { "epoch": 0.07111257358049065, "grad_norm": 1.9521187543869019, "learning_rate": 4.8889672867222585e-06, "loss": 0.9047, "step": 5835 }, { "epoch": 0.07117350980463846, "grad_norm": 2.0679123401641846, "learning_rate": 4.8886465683130215e-06, "loss": 0.9909, "step": 5840 }, { "epoch": 0.07123444602878627, "grad_norm": 2.3581197261810303, "learning_rate": 4.888325849903785e-06, "loss": 0.9909, "step": 5845 }, { "epoch": 0.07129538225293408, "grad_norm": 2.2985730171203613, "learning_rate": 4.888005131494548e-06, "loss": 0.9711, "step": 5850 }, { "epoch": 0.07135631847708189, "grad_norm": 1.7822095155715942, "learning_rate": 4.887684413085311e-06, "loss": 0.93, "step": 5855 }, { "epoch": 0.0714172547012297, "grad_norm": 1.9766901731491089, "learning_rate": 4.887363694676075e-06, "loss": 0.9254, "step": 5860 }, { "epoch": 0.0714781909253775, "grad_norm": 2.4586021900177, "learning_rate": 4.887042976266838e-06, "loss": 0.9536, "step": 5865 }, { "epoch": 0.0715391271495253, "grad_norm": 2.36570405960083, "learning_rate": 4.886722257857601e-06, "loss": 0.928, "step": 5870 }, { "epoch": 0.07160006337367311, "grad_norm": 1.7185827493667603, "learning_rate": 4.886401539448365e-06, "loss": 0.9646, "step": 5875 }, { "epoch": 0.07166099959782092, "grad_norm": 2.1109673976898193, "learning_rate": 4.886080821039128e-06, "loss": 0.9592, "step": 5880 }, { "epoch": 0.07172193582196873, "grad_norm": 1.752276062965393, "learning_rate": 4.885760102629891e-06, "loss": 0.9362, "step": 5885 }, { "epoch": 0.07178287204611654, "grad_norm": 2.2327980995178223, "learning_rate": 4.885439384220655e-06, "loss": 0.9325, "step": 5890 }, { "epoch": 0.07184380827026435, "grad_norm": 1.9795997142791748, "learning_rate": 4.885118665811418e-06, "loss": 0.986, "step": 5895 }, { "epoch": 0.07190474449441214, "grad_norm": 1.9070727825164795, "learning_rate": 4.884797947402181e-06, "loss": 0.9931, "step": 5900 }, { "epoch": 0.07196568071855995, "grad_norm": 2.0061910152435303, "learning_rate": 4.884477228992945e-06, "loss": 0.9347, "step": 5905 }, { "epoch": 0.07202661694270776, "grad_norm": 1.7485777139663696, "learning_rate": 4.884156510583708e-06, "loss": 0.9123, "step": 5910 }, { "epoch": 0.07208755316685557, "grad_norm": 1.7906681299209595, "learning_rate": 4.883835792174471e-06, "loss": 0.8619, "step": 5915 }, { "epoch": 0.07214848939100338, "grad_norm": 1.8957687616348267, "learning_rate": 4.883515073765234e-06, "loss": 0.8782, "step": 5920 }, { "epoch": 0.07220942561515119, "grad_norm": 2.2623887062072754, "learning_rate": 4.883194355355998e-06, "loss": 0.9554, "step": 5925 }, { "epoch": 0.072270361839299, "grad_norm": 2.5576794147491455, "learning_rate": 4.882873636946761e-06, "loss": 0.9848, "step": 5930 }, { "epoch": 0.07233129806344679, "grad_norm": 2.332691192626953, "learning_rate": 4.882552918537524e-06, "loss": 0.9718, "step": 5935 }, { "epoch": 0.0723922342875946, "grad_norm": 1.9942837953567505, "learning_rate": 4.882232200128288e-06, "loss": 0.8842, "step": 5940 }, { "epoch": 0.07245317051174241, "grad_norm": 1.9793899059295654, "learning_rate": 4.881911481719051e-06, "loss": 1.0072, "step": 5945 }, { "epoch": 0.07251410673589022, "grad_norm": 1.9229289293289185, "learning_rate": 4.881590763309814e-06, "loss": 0.8948, "step": 5950 }, { "epoch": 0.07257504296003803, "grad_norm": 1.7639293670654297, "learning_rate": 4.881270044900578e-06, "loss": 0.9109, "step": 5955 }, { "epoch": 0.07263597918418584, "grad_norm": 2.7629237174987793, "learning_rate": 4.880949326491341e-06, "loss": 0.9707, "step": 5960 }, { "epoch": 0.07269691540833363, "grad_norm": 2.552983522415161, "learning_rate": 4.8806286080821045e-06, "loss": 0.9713, "step": 5965 }, { "epoch": 0.07275785163248144, "grad_norm": 1.9225667715072632, "learning_rate": 4.8803078896728675e-06, "loss": 0.923, "step": 5970 }, { "epoch": 0.07281878785662925, "grad_norm": 2.091099262237549, "learning_rate": 4.8799871712636306e-06, "loss": 0.9206, "step": 5975 }, { "epoch": 0.07287972408077706, "grad_norm": 2.7206573486328125, "learning_rate": 4.879666452854394e-06, "loss": 0.9792, "step": 5980 }, { "epoch": 0.07294066030492487, "grad_norm": 2.0040769577026367, "learning_rate": 4.8793457344451574e-06, "loss": 0.949, "step": 5985 }, { "epoch": 0.07300159652907268, "grad_norm": 2.147705078125, "learning_rate": 4.879025016035921e-06, "loss": 1.0076, "step": 5990 }, { "epoch": 0.07306253275322049, "grad_norm": 2.0962088108062744, "learning_rate": 4.878704297626684e-06, "loss": 0.9423, "step": 5995 }, { "epoch": 0.07312346897736828, "grad_norm": 2.1763482093811035, "learning_rate": 4.878383579217447e-06, "loss": 0.8745, "step": 6000 }, { "epoch": 0.07318440520151609, "grad_norm": 1.9363750219345093, "learning_rate": 4.878062860808211e-06, "loss": 0.9332, "step": 6005 }, { "epoch": 0.0732453414256639, "grad_norm": 2.3881053924560547, "learning_rate": 4.877742142398974e-06, "loss": 0.9426, "step": 6010 }, { "epoch": 0.07330627764981171, "grad_norm": 2.295728921890259, "learning_rate": 4.877421423989737e-06, "loss": 0.997, "step": 6015 }, { "epoch": 0.07336721387395952, "grad_norm": 1.8541845083236694, "learning_rate": 4.877100705580501e-06, "loss": 0.9444, "step": 6020 }, { "epoch": 0.07342815009810733, "grad_norm": 2.178030252456665, "learning_rate": 4.876779987171264e-06, "loss": 1.0054, "step": 6025 }, { "epoch": 0.07348908632225513, "grad_norm": 2.285686731338501, "learning_rate": 4.876459268762027e-06, "loss": 0.9248, "step": 6030 }, { "epoch": 0.07355002254640293, "grad_norm": 2.312490463256836, "learning_rate": 4.876138550352791e-06, "loss": 1.0118, "step": 6035 }, { "epoch": 0.07361095877055074, "grad_norm": 2.101527214050293, "learning_rate": 4.875817831943554e-06, "loss": 0.8669, "step": 6040 }, { "epoch": 0.07367189499469855, "grad_norm": 2.4846630096435547, "learning_rate": 4.875497113534317e-06, "loss": 0.9621, "step": 6045 }, { "epoch": 0.07373283121884636, "grad_norm": 2.0172345638275146, "learning_rate": 4.875176395125081e-06, "loss": 0.9209, "step": 6050 }, { "epoch": 0.07379376744299417, "grad_norm": 2.1219921112060547, "learning_rate": 4.874855676715844e-06, "loss": 0.89, "step": 6055 }, { "epoch": 0.07385470366714197, "grad_norm": 1.8583046197891235, "learning_rate": 4.874534958306607e-06, "loss": 0.9903, "step": 6060 }, { "epoch": 0.07391563989128977, "grad_norm": 2.0557310581207275, "learning_rate": 4.874214239897371e-06, "loss": 0.9917, "step": 6065 }, { "epoch": 0.07397657611543758, "grad_norm": 2.1951565742492676, "learning_rate": 4.873893521488134e-06, "loss": 0.9855, "step": 6070 }, { "epoch": 0.07403751233958539, "grad_norm": 1.9902844429016113, "learning_rate": 4.873572803078897e-06, "loss": 0.9379, "step": 6075 }, { "epoch": 0.0740984485637332, "grad_norm": 1.956467866897583, "learning_rate": 4.873252084669661e-06, "loss": 0.9669, "step": 6080 }, { "epoch": 0.074159384787881, "grad_norm": 1.7598921060562134, "learning_rate": 4.872931366260424e-06, "loss": 0.9617, "step": 6085 }, { "epoch": 0.07422032101202881, "grad_norm": 1.7081522941589355, "learning_rate": 4.872610647851187e-06, "loss": 0.9248, "step": 6090 }, { "epoch": 0.07428125723617662, "grad_norm": 1.9040896892547607, "learning_rate": 4.87228992944195e-06, "loss": 0.8954, "step": 6095 }, { "epoch": 0.07434219346032442, "grad_norm": 2.1636576652526855, "learning_rate": 4.871969211032714e-06, "loss": 0.8585, "step": 6100 }, { "epoch": 0.07440312968447223, "grad_norm": 2.0964553356170654, "learning_rate": 4.871648492623477e-06, "loss": 0.8774, "step": 6105 }, { "epoch": 0.07446406590862004, "grad_norm": 1.7752571105957031, "learning_rate": 4.8713277742142405e-06, "loss": 0.9399, "step": 6110 }, { "epoch": 0.07452500213276785, "grad_norm": 1.7716045379638672, "learning_rate": 4.8710070558050035e-06, "loss": 0.9369, "step": 6115 }, { "epoch": 0.07458593835691565, "grad_norm": 1.9126569032669067, "learning_rate": 4.8706863373957665e-06, "loss": 0.9077, "step": 6120 }, { "epoch": 0.07464687458106346, "grad_norm": 2.101668119430542, "learning_rate": 4.87036561898653e-06, "loss": 0.8688, "step": 6125 }, { "epoch": 0.07470781080521127, "grad_norm": 1.9651577472686768, "learning_rate": 4.870044900577293e-06, "loss": 0.8938, "step": 6130 }, { "epoch": 0.07476874702935907, "grad_norm": 2.0396313667297363, "learning_rate": 4.869724182168057e-06, "loss": 0.8993, "step": 6135 }, { "epoch": 0.07482968325350688, "grad_norm": 1.9107922315597534, "learning_rate": 4.86940346375882e-06, "loss": 0.891, "step": 6140 }, { "epoch": 0.07489061947765469, "grad_norm": 2.314258575439453, "learning_rate": 4.869082745349583e-06, "loss": 0.9513, "step": 6145 }, { "epoch": 0.0749515557018025, "grad_norm": 1.949492335319519, "learning_rate": 4.868762026940347e-06, "loss": 0.939, "step": 6150 }, { "epoch": 0.0750124919259503, "grad_norm": 2.385714054107666, "learning_rate": 4.86844130853111e-06, "loss": 1.0077, "step": 6155 }, { "epoch": 0.07507342815009811, "grad_norm": 2.0329267978668213, "learning_rate": 4.868120590121874e-06, "loss": 1.0058, "step": 6160 }, { "epoch": 0.07513436437424592, "grad_norm": 1.776684045791626, "learning_rate": 4.867799871712637e-06, "loss": 0.9608, "step": 6165 }, { "epoch": 0.07519530059839372, "grad_norm": 2.753244161605835, "learning_rate": 4.8674791533034e-06, "loss": 0.976, "step": 6170 }, { "epoch": 0.07525623682254153, "grad_norm": 2.256103992462158, "learning_rate": 4.867158434894163e-06, "loss": 0.9285, "step": 6175 }, { "epoch": 0.07531717304668933, "grad_norm": 1.9951237440109253, "learning_rate": 4.866837716484927e-06, "loss": 1.0384, "step": 6180 }, { "epoch": 0.07537810927083714, "grad_norm": 2.359471559524536, "learning_rate": 4.86651699807569e-06, "loss": 0.9742, "step": 6185 }, { "epoch": 0.07543904549498495, "grad_norm": 2.2458739280700684, "learning_rate": 4.866196279666453e-06, "loss": 0.9292, "step": 6190 }, { "epoch": 0.07549998171913276, "grad_norm": 2.044781446456909, "learning_rate": 4.865875561257217e-06, "loss": 1.004, "step": 6195 }, { "epoch": 0.07556091794328056, "grad_norm": 1.927573800086975, "learning_rate": 4.86555484284798e-06, "loss": 0.9056, "step": 6200 }, { "epoch": 0.07562185416742837, "grad_norm": 1.935996174812317, "learning_rate": 4.865234124438743e-06, "loss": 0.9785, "step": 6205 }, { "epoch": 0.07568279039157617, "grad_norm": 2.260227680206299, "learning_rate": 4.864913406029507e-06, "loss": 0.9509, "step": 6210 }, { "epoch": 0.07574372661572398, "grad_norm": 1.8646361827850342, "learning_rate": 4.86459268762027e-06, "loss": 0.9208, "step": 6215 }, { "epoch": 0.07580466283987179, "grad_norm": 2.142771005630493, "learning_rate": 4.864271969211033e-06, "loss": 0.9558, "step": 6220 }, { "epoch": 0.0758655990640196, "grad_norm": 1.9547452926635742, "learning_rate": 4.863951250801797e-06, "loss": 0.946, "step": 6225 }, { "epoch": 0.07592653528816741, "grad_norm": 2.1109044551849365, "learning_rate": 4.86363053239256e-06, "loss": 0.8859, "step": 6230 }, { "epoch": 0.0759874715123152, "grad_norm": 2.853337526321411, "learning_rate": 4.863309813983323e-06, "loss": 0.8775, "step": 6235 }, { "epoch": 0.07604840773646301, "grad_norm": 1.6962800025939941, "learning_rate": 4.8629890955740865e-06, "loss": 0.9359, "step": 6240 }, { "epoch": 0.07610934396061082, "grad_norm": 1.9592900276184082, "learning_rate": 4.8626683771648496e-06, "loss": 0.8783, "step": 6245 }, { "epoch": 0.07617028018475863, "grad_norm": 2.3133881092071533, "learning_rate": 4.8623476587556126e-06, "loss": 0.9558, "step": 6250 }, { "epoch": 0.07623121640890644, "grad_norm": 2.0683751106262207, "learning_rate": 4.8620269403463764e-06, "loss": 0.9101, "step": 6255 }, { "epoch": 0.07629215263305425, "grad_norm": 2.073871374130249, "learning_rate": 4.8617062219371395e-06, "loss": 0.9599, "step": 6260 }, { "epoch": 0.07635308885720206, "grad_norm": 2.080519914627075, "learning_rate": 4.8613855035279025e-06, "loss": 0.9978, "step": 6265 }, { "epoch": 0.07641402508134985, "grad_norm": 2.2446465492248535, "learning_rate": 4.861064785118666e-06, "loss": 0.936, "step": 6270 }, { "epoch": 0.07647496130549766, "grad_norm": 1.9061129093170166, "learning_rate": 4.860744066709429e-06, "loss": 0.9016, "step": 6275 }, { "epoch": 0.07653589752964547, "grad_norm": 2.419060707092285, "learning_rate": 4.860423348300193e-06, "loss": 0.9155, "step": 6280 }, { "epoch": 0.07659683375379328, "grad_norm": 2.2243871688842773, "learning_rate": 4.860102629890956e-06, "loss": 0.9399, "step": 6285 }, { "epoch": 0.07665776997794109, "grad_norm": 2.1127240657806396, "learning_rate": 4.859781911481719e-06, "loss": 0.9081, "step": 6290 }, { "epoch": 0.0767187062020889, "grad_norm": 2.085336446762085, "learning_rate": 4.859461193072483e-06, "loss": 0.9515, "step": 6295 }, { "epoch": 0.0767796424262367, "grad_norm": 2.2834887504577637, "learning_rate": 4.859140474663246e-06, "loss": 0.8944, "step": 6300 }, { "epoch": 0.0768405786503845, "grad_norm": 2.1130266189575195, "learning_rate": 4.85881975625401e-06, "loss": 0.9491, "step": 6305 }, { "epoch": 0.07690151487453231, "grad_norm": 1.9348957538604736, "learning_rate": 4.858499037844773e-06, "loss": 0.8961, "step": 6310 }, { "epoch": 0.07696245109868012, "grad_norm": 2.2834999561309814, "learning_rate": 4.858178319435536e-06, "loss": 0.9811, "step": 6315 }, { "epoch": 0.07702338732282793, "grad_norm": 1.935605764389038, "learning_rate": 4.8578576010263e-06, "loss": 0.9568, "step": 6320 }, { "epoch": 0.07708432354697574, "grad_norm": 2.053457021713257, "learning_rate": 4.857536882617063e-06, "loss": 0.8919, "step": 6325 }, { "epoch": 0.07714525977112355, "grad_norm": 1.9197912216186523, "learning_rate": 4.857216164207826e-06, "loss": 0.9992, "step": 6330 }, { "epoch": 0.07720619599527134, "grad_norm": 2.094545602798462, "learning_rate": 4.856895445798589e-06, "loss": 0.9462, "step": 6335 }, { "epoch": 0.07726713221941915, "grad_norm": 2.452425718307495, "learning_rate": 4.856574727389353e-06, "loss": 0.9557, "step": 6340 }, { "epoch": 0.07732806844356696, "grad_norm": 2.0239148139953613, "learning_rate": 4.856254008980116e-06, "loss": 0.947, "step": 6345 }, { "epoch": 0.07738900466771477, "grad_norm": 1.9416303634643555, "learning_rate": 4.855933290570879e-06, "loss": 0.8751, "step": 6350 }, { "epoch": 0.07744994089186258, "grad_norm": 1.7068225145339966, "learning_rate": 4.855612572161643e-06, "loss": 0.9495, "step": 6355 }, { "epoch": 0.07751087711601039, "grad_norm": 2.1844303607940674, "learning_rate": 4.855291853752406e-06, "loss": 0.8704, "step": 6360 }, { "epoch": 0.0775718133401582, "grad_norm": 1.8966670036315918, "learning_rate": 4.854971135343169e-06, "loss": 0.9007, "step": 6365 }, { "epoch": 0.07763274956430599, "grad_norm": 2.3655343055725098, "learning_rate": 4.854650416933933e-06, "loss": 0.8974, "step": 6370 }, { "epoch": 0.0776936857884538, "grad_norm": 1.8604155778884888, "learning_rate": 4.854329698524696e-06, "loss": 0.8451, "step": 6375 }, { "epoch": 0.07775462201260161, "grad_norm": 2.0433120727539062, "learning_rate": 4.854008980115459e-06, "loss": 0.9776, "step": 6380 }, { "epoch": 0.07781555823674942, "grad_norm": 2.47220516204834, "learning_rate": 4.8536882617062225e-06, "loss": 0.9442, "step": 6385 }, { "epoch": 0.07787649446089723, "grad_norm": 2.2032322883605957, "learning_rate": 4.8533675432969855e-06, "loss": 0.918, "step": 6390 }, { "epoch": 0.07793743068504504, "grad_norm": 2.2888479232788086, "learning_rate": 4.8530468248877485e-06, "loss": 1.0182, "step": 6395 }, { "epoch": 0.07799836690919285, "grad_norm": 2.9597890377044678, "learning_rate": 4.852726106478512e-06, "loss": 0.9395, "step": 6400 }, { "epoch": 0.07805930313334064, "grad_norm": 2.164687156677246, "learning_rate": 4.852405388069275e-06, "loss": 0.9222, "step": 6405 }, { "epoch": 0.07812023935748845, "grad_norm": 1.971567153930664, "learning_rate": 4.8520846696600384e-06, "loss": 0.8611, "step": 6410 }, { "epoch": 0.07818117558163626, "grad_norm": 2.1082561016082764, "learning_rate": 4.851763951250802e-06, "loss": 0.929, "step": 6415 }, { "epoch": 0.07824211180578407, "grad_norm": 2.1643593311309814, "learning_rate": 4.851443232841565e-06, "loss": 0.848, "step": 6420 }, { "epoch": 0.07830304802993188, "grad_norm": 2.5203871726989746, "learning_rate": 4.851122514432328e-06, "loss": 0.9274, "step": 6425 }, { "epoch": 0.07836398425407969, "grad_norm": 1.8904608488082886, "learning_rate": 4.850801796023092e-06, "loss": 0.9734, "step": 6430 }, { "epoch": 0.07842492047822748, "grad_norm": 2.2424135208129883, "learning_rate": 4.850481077613855e-06, "loss": 0.9762, "step": 6435 }, { "epoch": 0.07848585670237529, "grad_norm": 1.8320666551589966, "learning_rate": 4.850160359204619e-06, "loss": 1.0201, "step": 6440 }, { "epoch": 0.0785467929265231, "grad_norm": 2.0666117668151855, "learning_rate": 4.849839640795382e-06, "loss": 0.9091, "step": 6445 }, { "epoch": 0.07860772915067091, "grad_norm": 2.1600966453552246, "learning_rate": 4.849518922386145e-06, "loss": 1.003, "step": 6450 }, { "epoch": 0.07866866537481872, "grad_norm": 2.1416003704071045, "learning_rate": 4.849198203976909e-06, "loss": 0.9649, "step": 6455 }, { "epoch": 0.07872960159896653, "grad_norm": 2.3314433097839355, "learning_rate": 4.848877485567672e-06, "loss": 0.9349, "step": 6460 }, { "epoch": 0.07879053782311433, "grad_norm": 2.205967903137207, "learning_rate": 4.848556767158436e-06, "loss": 0.9449, "step": 6465 }, { "epoch": 0.07885147404726213, "grad_norm": 2.190028667449951, "learning_rate": 4.848236048749199e-06, "loss": 0.9399, "step": 6470 }, { "epoch": 0.07891241027140994, "grad_norm": 2.0024070739746094, "learning_rate": 4.847915330339962e-06, "loss": 0.9617, "step": 6475 }, { "epoch": 0.07897334649555775, "grad_norm": 1.8203140497207642, "learning_rate": 4.847594611930726e-06, "loss": 1.026, "step": 6480 }, { "epoch": 0.07903428271970556, "grad_norm": 2.300349235534668, "learning_rate": 4.847273893521489e-06, "loss": 0.8968, "step": 6485 }, { "epoch": 0.07909521894385337, "grad_norm": 2.199732780456543, "learning_rate": 4.846953175112252e-06, "loss": 0.891, "step": 6490 }, { "epoch": 0.07915615516800117, "grad_norm": 2.346160411834717, "learning_rate": 4.846632456703016e-06, "loss": 1.0018, "step": 6495 }, { "epoch": 0.07921709139214898, "grad_norm": 1.989732027053833, "learning_rate": 4.846311738293779e-06, "loss": 0.8912, "step": 6500 }, { "epoch": 0.07927802761629678, "grad_norm": 2.176313638687134, "learning_rate": 4.845991019884542e-06, "loss": 0.9126, "step": 6505 }, { "epoch": 0.07933896384044459, "grad_norm": 2.303056240081787, "learning_rate": 4.845670301475305e-06, "loss": 0.9772, "step": 6510 }, { "epoch": 0.0793999000645924, "grad_norm": 1.9584097862243652, "learning_rate": 4.8453495830660686e-06, "loss": 0.944, "step": 6515 }, { "epoch": 0.0794608362887402, "grad_norm": 2.005720853805542, "learning_rate": 4.8450288646568316e-06, "loss": 0.9551, "step": 6520 }, { "epoch": 0.07952177251288801, "grad_norm": 1.9843183755874634, "learning_rate": 4.844708146247595e-06, "loss": 1.0006, "step": 6525 }, { "epoch": 0.07958270873703582, "grad_norm": 1.8179017305374146, "learning_rate": 4.8443874278383585e-06, "loss": 0.7923, "step": 6530 }, { "epoch": 0.07964364496118362, "grad_norm": 2.0517923831939697, "learning_rate": 4.8440667094291215e-06, "loss": 0.9293, "step": 6535 }, { "epoch": 0.07970458118533143, "grad_norm": 1.7186590433120728, "learning_rate": 4.8437459910198845e-06, "loss": 0.9844, "step": 6540 }, { "epoch": 0.07976551740947924, "grad_norm": 1.820568561553955, "learning_rate": 4.843425272610648e-06, "loss": 0.8182, "step": 6545 }, { "epoch": 0.07982645363362705, "grad_norm": 2.0808608531951904, "learning_rate": 4.843104554201411e-06, "loss": 0.9717, "step": 6550 }, { "epoch": 0.07988738985777485, "grad_norm": 1.975653886795044, "learning_rate": 4.842783835792174e-06, "loss": 0.9399, "step": 6555 }, { "epoch": 0.07994832608192266, "grad_norm": 1.9600152969360352, "learning_rate": 4.842463117382938e-06, "loss": 0.9083, "step": 6560 }, { "epoch": 0.08000926230607047, "grad_norm": 2.1788017749786377, "learning_rate": 4.842142398973701e-06, "loss": 0.9157, "step": 6565 }, { "epoch": 0.08007019853021827, "grad_norm": 1.9819215536117554, "learning_rate": 4.841821680564464e-06, "loss": 0.929, "step": 6570 }, { "epoch": 0.08013113475436608, "grad_norm": 2.1030771732330322, "learning_rate": 4.841500962155228e-06, "loss": 0.9465, "step": 6575 }, { "epoch": 0.08019207097851389, "grad_norm": 1.9042936563491821, "learning_rate": 4.841180243745991e-06, "loss": 0.9482, "step": 6580 }, { "epoch": 0.0802530072026617, "grad_norm": 1.9491890668869019, "learning_rate": 4.840859525336755e-06, "loss": 0.9074, "step": 6585 }, { "epoch": 0.0803139434268095, "grad_norm": 2.2509360313415527, "learning_rate": 4.840538806927518e-06, "loss": 0.9529, "step": 6590 }, { "epoch": 0.08037487965095731, "grad_norm": 1.8506810665130615, "learning_rate": 4.840218088518281e-06, "loss": 0.9163, "step": 6595 }, { "epoch": 0.08043581587510512, "grad_norm": 2.0568716526031494, "learning_rate": 4.839897370109045e-06, "loss": 1.008, "step": 6600 }, { "epoch": 0.08049675209925292, "grad_norm": 2.2628414630889893, "learning_rate": 4.839576651699808e-06, "loss": 0.9111, "step": 6605 }, { "epoch": 0.08055768832340073, "grad_norm": 2.1777124404907227, "learning_rate": 4.839255933290572e-06, "loss": 1.0054, "step": 6610 }, { "epoch": 0.08061862454754853, "grad_norm": 2.125645160675049, "learning_rate": 4.838935214881335e-06, "loss": 0.9574, "step": 6615 }, { "epoch": 0.08067956077169634, "grad_norm": 1.926742672920227, "learning_rate": 4.838614496472098e-06, "loss": 1.0016, "step": 6620 }, { "epoch": 0.08074049699584415, "grad_norm": 1.9015440940856934, "learning_rate": 4.838293778062862e-06, "loss": 0.9624, "step": 6625 }, { "epoch": 0.08080143321999196, "grad_norm": 2.4735395908355713, "learning_rate": 4.837973059653625e-06, "loss": 0.8831, "step": 6630 }, { "epoch": 0.08086236944413977, "grad_norm": 2.3859009742736816, "learning_rate": 4.837652341244388e-06, "loss": 0.8978, "step": 6635 }, { "epoch": 0.08092330566828757, "grad_norm": 1.8988410234451294, "learning_rate": 4.837331622835152e-06, "loss": 0.9534, "step": 6640 }, { "epoch": 0.08098424189243537, "grad_norm": 1.866252064704895, "learning_rate": 4.837010904425915e-06, "loss": 0.918, "step": 6645 }, { "epoch": 0.08104517811658318, "grad_norm": 2.2162303924560547, "learning_rate": 4.836690186016678e-06, "loss": 0.9264, "step": 6650 }, { "epoch": 0.08110611434073099, "grad_norm": 2.0846850872039795, "learning_rate": 4.8363694676074415e-06, "loss": 0.8618, "step": 6655 }, { "epoch": 0.0811670505648788, "grad_norm": 2.8374390602111816, "learning_rate": 4.8360487491982045e-06, "loss": 0.9557, "step": 6660 }, { "epoch": 0.08122798678902661, "grad_norm": 1.8604798316955566, "learning_rate": 4.8357280307889675e-06, "loss": 0.9311, "step": 6665 }, { "epoch": 0.0812889230131744, "grad_norm": 1.9897760152816772, "learning_rate": 4.835407312379731e-06, "loss": 0.9181, "step": 6670 }, { "epoch": 0.08134985923732221, "grad_norm": 2.035599946975708, "learning_rate": 4.835086593970494e-06, "loss": 0.939, "step": 6675 }, { "epoch": 0.08141079546147002, "grad_norm": 1.836288332939148, "learning_rate": 4.8347658755612574e-06, "loss": 1.0051, "step": 6680 }, { "epoch": 0.08147173168561783, "grad_norm": 2.1404671669006348, "learning_rate": 4.8344451571520204e-06, "loss": 0.9206, "step": 6685 }, { "epoch": 0.08153266790976564, "grad_norm": 2.1909029483795166, "learning_rate": 4.834124438742784e-06, "loss": 0.9261, "step": 6690 }, { "epoch": 0.08159360413391345, "grad_norm": 1.8499631881713867, "learning_rate": 4.833803720333547e-06, "loss": 0.929, "step": 6695 }, { "epoch": 0.08165454035806126, "grad_norm": 2.023073673248291, "learning_rate": 4.83348300192431e-06, "loss": 0.9605, "step": 6700 }, { "epoch": 0.08171547658220905, "grad_norm": 1.9184857606887817, "learning_rate": 4.833162283515074e-06, "loss": 0.9333, "step": 6705 }, { "epoch": 0.08177641280635686, "grad_norm": 1.9246106147766113, "learning_rate": 4.832841565105837e-06, "loss": 1.0297, "step": 6710 }, { "epoch": 0.08183734903050467, "grad_norm": 2.1264681816101074, "learning_rate": 4.8325208466966e-06, "loss": 1.007, "step": 6715 }, { "epoch": 0.08189828525465248, "grad_norm": 2.0781240463256836, "learning_rate": 4.832200128287364e-06, "loss": 0.9298, "step": 6720 }, { "epoch": 0.08195922147880029, "grad_norm": 1.8630543947219849, "learning_rate": 4.831879409878127e-06, "loss": 0.978, "step": 6725 }, { "epoch": 0.0820201577029481, "grad_norm": 2.0776751041412354, "learning_rate": 4.831558691468891e-06, "loss": 0.9492, "step": 6730 }, { "epoch": 0.08208109392709591, "grad_norm": 2.1292319297790527, "learning_rate": 4.831237973059654e-06, "loss": 0.9119, "step": 6735 }, { "epoch": 0.0821420301512437, "grad_norm": 2.0286993980407715, "learning_rate": 4.830917254650417e-06, "loss": 1.0102, "step": 6740 }, { "epoch": 0.08220296637539151, "grad_norm": 2.1906161308288574, "learning_rate": 4.830596536241181e-06, "loss": 0.9493, "step": 6745 }, { "epoch": 0.08226390259953932, "grad_norm": 2.0219569206237793, "learning_rate": 4.830275817831944e-06, "loss": 0.9461, "step": 6750 }, { "epoch": 0.08232483882368713, "grad_norm": 2.259857177734375, "learning_rate": 4.829955099422708e-06, "loss": 0.8681, "step": 6755 }, { "epoch": 0.08238577504783494, "grad_norm": 2.005310297012329, "learning_rate": 4.829634381013471e-06, "loss": 0.8268, "step": 6760 }, { "epoch": 0.08244671127198275, "grad_norm": 2.002131700515747, "learning_rate": 4.829313662604234e-06, "loss": 0.9997, "step": 6765 }, { "epoch": 0.08250764749613054, "grad_norm": 1.9874975681304932, "learning_rate": 4.828992944194998e-06, "loss": 0.9303, "step": 6770 }, { "epoch": 0.08256858372027835, "grad_norm": 2.607520818710327, "learning_rate": 4.828672225785761e-06, "loss": 0.9192, "step": 6775 }, { "epoch": 0.08262951994442616, "grad_norm": 1.9687776565551758, "learning_rate": 4.828351507376524e-06, "loss": 0.9226, "step": 6780 }, { "epoch": 0.08269045616857397, "grad_norm": 2.0982048511505127, "learning_rate": 4.8280307889672876e-06, "loss": 1.0145, "step": 6785 }, { "epoch": 0.08275139239272178, "grad_norm": 2.272130012512207, "learning_rate": 4.8277100705580506e-06, "loss": 1.0008, "step": 6790 }, { "epoch": 0.08281232861686959, "grad_norm": 2.0021095275878906, "learning_rate": 4.827389352148814e-06, "loss": 1.0029, "step": 6795 }, { "epoch": 0.0828732648410174, "grad_norm": 2.047048807144165, "learning_rate": 4.8270686337395774e-06, "loss": 0.9435, "step": 6800 }, { "epoch": 0.08293420106516519, "grad_norm": 1.803330421447754, "learning_rate": 4.8267479153303405e-06, "loss": 0.9188, "step": 6805 }, { "epoch": 0.082995137289313, "grad_norm": 1.958532452583313, "learning_rate": 4.8264271969211035e-06, "loss": 0.9401, "step": 6810 }, { "epoch": 0.08305607351346081, "grad_norm": 2.256840705871582, "learning_rate": 4.826106478511867e-06, "loss": 0.9876, "step": 6815 }, { "epoch": 0.08311700973760862, "grad_norm": 2.127922534942627, "learning_rate": 4.82578576010263e-06, "loss": 0.9026, "step": 6820 }, { "epoch": 0.08317794596175643, "grad_norm": 1.9300429821014404, "learning_rate": 4.825465041693393e-06, "loss": 1.0304, "step": 6825 }, { "epoch": 0.08323888218590424, "grad_norm": 1.6210451126098633, "learning_rate": 4.825144323284157e-06, "loss": 0.9997, "step": 6830 }, { "epoch": 0.08329981841005205, "grad_norm": 2.147120952606201, "learning_rate": 4.82482360487492e-06, "loss": 0.9247, "step": 6835 }, { "epoch": 0.08336075463419984, "grad_norm": 2.223076343536377, "learning_rate": 4.824502886465683e-06, "loss": 0.8884, "step": 6840 }, { "epoch": 0.08342169085834765, "grad_norm": 2.0280697345733643, "learning_rate": 4.824182168056446e-06, "loss": 0.9265, "step": 6845 }, { "epoch": 0.08348262708249546, "grad_norm": 1.8725453615188599, "learning_rate": 4.82386144964721e-06, "loss": 0.9532, "step": 6850 }, { "epoch": 0.08354356330664327, "grad_norm": 2.098389148712158, "learning_rate": 4.823540731237973e-06, "loss": 0.9415, "step": 6855 }, { "epoch": 0.08360449953079108, "grad_norm": 2.304896831512451, "learning_rate": 4.823220012828736e-06, "loss": 0.9173, "step": 6860 }, { "epoch": 0.08366543575493889, "grad_norm": 1.8739991188049316, "learning_rate": 4.8228992944195e-06, "loss": 0.9026, "step": 6865 }, { "epoch": 0.0837263719790867, "grad_norm": 2.0766959190368652, "learning_rate": 4.822578576010263e-06, "loss": 0.8932, "step": 6870 }, { "epoch": 0.08378730820323449, "grad_norm": 1.8863236904144287, "learning_rate": 4.822257857601026e-06, "loss": 0.9049, "step": 6875 }, { "epoch": 0.0838482444273823, "grad_norm": 2.046523332595825, "learning_rate": 4.82193713919179e-06, "loss": 0.9392, "step": 6880 }, { "epoch": 0.08390918065153011, "grad_norm": 2.533127784729004, "learning_rate": 4.821616420782553e-06, "loss": 1.0062, "step": 6885 }, { "epoch": 0.08397011687567792, "grad_norm": 3.9309749603271484, "learning_rate": 4.821295702373317e-06, "loss": 0.9183, "step": 6890 }, { "epoch": 0.08403105309982573, "grad_norm": 2.158825159072876, "learning_rate": 4.82097498396408e-06, "loss": 0.8893, "step": 6895 }, { "epoch": 0.08409198932397353, "grad_norm": 2.085160255432129, "learning_rate": 4.820654265554843e-06, "loss": 0.9788, "step": 6900 }, { "epoch": 0.08415292554812133, "grad_norm": 2.0617053508758545, "learning_rate": 4.820333547145607e-06, "loss": 0.9726, "step": 6905 }, { "epoch": 0.08421386177226914, "grad_norm": 2.2503397464752197, "learning_rate": 4.82001282873637e-06, "loss": 0.9334, "step": 6910 }, { "epoch": 0.08427479799641695, "grad_norm": 2.3371682167053223, "learning_rate": 4.819692110327134e-06, "loss": 0.8757, "step": 6915 }, { "epoch": 0.08433573422056476, "grad_norm": 2.2563772201538086, "learning_rate": 4.819371391917897e-06, "loss": 0.9836, "step": 6920 }, { "epoch": 0.08439667044471257, "grad_norm": 2.0311968326568604, "learning_rate": 4.81905067350866e-06, "loss": 0.9449, "step": 6925 }, { "epoch": 0.08445760666886037, "grad_norm": 2.3209314346313477, "learning_rate": 4.8187299550994235e-06, "loss": 0.9442, "step": 6930 }, { "epoch": 0.08451854289300818, "grad_norm": 1.913816213607788, "learning_rate": 4.8184092366901865e-06, "loss": 0.8495, "step": 6935 }, { "epoch": 0.08457947911715598, "grad_norm": 1.995356559753418, "learning_rate": 4.8180885182809495e-06, "loss": 0.9561, "step": 6940 }, { "epoch": 0.08464041534130379, "grad_norm": 1.801537275314331, "learning_rate": 4.817767799871713e-06, "loss": 0.9261, "step": 6945 }, { "epoch": 0.0847013515654516, "grad_norm": 2.6798839569091797, "learning_rate": 4.8174470814624764e-06, "loss": 0.8827, "step": 6950 }, { "epoch": 0.0847622877895994, "grad_norm": 1.7612385749816895, "learning_rate": 4.8171263630532394e-06, "loss": 0.8418, "step": 6955 }, { "epoch": 0.08482322401374721, "grad_norm": 2.2119243144989014, "learning_rate": 4.816805644644003e-06, "loss": 0.9782, "step": 6960 }, { "epoch": 0.08488416023789502, "grad_norm": 1.9775493144989014, "learning_rate": 4.816484926234766e-06, "loss": 0.9578, "step": 6965 }, { "epoch": 0.08494509646204283, "grad_norm": 2.13533091545105, "learning_rate": 4.816164207825529e-06, "loss": 0.8675, "step": 6970 }, { "epoch": 0.08500603268619063, "grad_norm": 1.7837802171707153, "learning_rate": 4.815843489416293e-06, "loss": 0.8706, "step": 6975 }, { "epoch": 0.08506696891033844, "grad_norm": 1.8827228546142578, "learning_rate": 4.815522771007056e-06, "loss": 0.8749, "step": 6980 }, { "epoch": 0.08512790513448625, "grad_norm": 1.9937866926193237, "learning_rate": 4.815202052597819e-06, "loss": 0.9861, "step": 6985 }, { "epoch": 0.08518884135863405, "grad_norm": 2.369575262069702, "learning_rate": 4.814881334188583e-06, "loss": 0.9679, "step": 6990 }, { "epoch": 0.08524977758278186, "grad_norm": 1.9273473024368286, "learning_rate": 4.814560615779346e-06, "loss": 0.9434, "step": 6995 }, { "epoch": 0.08531071380692967, "grad_norm": 1.9399508237838745, "learning_rate": 4.814239897370109e-06, "loss": 1.0243, "step": 7000 }, { "epoch": 0.08537165003107748, "grad_norm": 2.240410804748535, "learning_rate": 4.813919178960873e-06, "loss": 0.9298, "step": 7005 }, { "epoch": 0.08543258625522528, "grad_norm": 2.1479990482330322, "learning_rate": 4.813598460551636e-06, "loss": 0.9476, "step": 7010 }, { "epoch": 0.08549352247937309, "grad_norm": 1.8833796977996826, "learning_rate": 4.813277742142399e-06, "loss": 0.9234, "step": 7015 }, { "epoch": 0.0855544587035209, "grad_norm": 2.0111441612243652, "learning_rate": 4.812957023733162e-06, "loss": 0.9193, "step": 7020 }, { "epoch": 0.0856153949276687, "grad_norm": 2.1026954650878906, "learning_rate": 4.812636305323926e-06, "loss": 0.9646, "step": 7025 }, { "epoch": 0.08567633115181651, "grad_norm": 1.9978861808776855, "learning_rate": 4.812315586914689e-06, "loss": 0.9295, "step": 7030 }, { "epoch": 0.08573726737596432, "grad_norm": 2.606916904449463, "learning_rate": 4.811994868505453e-06, "loss": 0.9624, "step": 7035 }, { "epoch": 0.08579820360011212, "grad_norm": 1.979007601737976, "learning_rate": 4.811674150096216e-06, "loss": 0.911, "step": 7040 }, { "epoch": 0.08585913982425993, "grad_norm": 2.080368995666504, "learning_rate": 4.811353431686979e-06, "loss": 0.9265, "step": 7045 }, { "epoch": 0.08592007604840773, "grad_norm": 1.968972086906433, "learning_rate": 4.811032713277743e-06, "loss": 0.9741, "step": 7050 }, { "epoch": 0.08598101227255554, "grad_norm": 1.7645843029022217, "learning_rate": 4.810711994868506e-06, "loss": 0.9414, "step": 7055 }, { "epoch": 0.08604194849670335, "grad_norm": 1.9203622341156006, "learning_rate": 4.8103912764592696e-06, "loss": 0.9265, "step": 7060 }, { "epoch": 0.08610288472085116, "grad_norm": 2.1524224281311035, "learning_rate": 4.810070558050033e-06, "loss": 0.9571, "step": 7065 }, { "epoch": 0.08616382094499897, "grad_norm": 2.2659592628479004, "learning_rate": 4.809749839640796e-06, "loss": 0.9798, "step": 7070 }, { "epoch": 0.08622475716914677, "grad_norm": 2.1412947177886963, "learning_rate": 4.8094291212315595e-06, "loss": 0.943, "step": 7075 }, { "epoch": 0.08628569339329457, "grad_norm": 2.15057373046875, "learning_rate": 4.8091084028223225e-06, "loss": 1.0446, "step": 7080 }, { "epoch": 0.08634662961744238, "grad_norm": 1.8152341842651367, "learning_rate": 4.808787684413086e-06, "loss": 0.9268, "step": 7085 }, { "epoch": 0.08640756584159019, "grad_norm": 2.045264482498169, "learning_rate": 4.808466966003849e-06, "loss": 1.0339, "step": 7090 }, { "epoch": 0.086468502065738, "grad_norm": 1.918103575706482, "learning_rate": 4.808146247594612e-06, "loss": 0.8814, "step": 7095 }, { "epoch": 0.08652943828988581, "grad_norm": 2.0492961406707764, "learning_rate": 4.807825529185375e-06, "loss": 0.8957, "step": 7100 }, { "epoch": 0.08659037451403362, "grad_norm": 2.0213563442230225, "learning_rate": 4.807504810776139e-06, "loss": 0.9426, "step": 7105 }, { "epoch": 0.08665131073818141, "grad_norm": 2.1699202060699463, "learning_rate": 4.807184092366902e-06, "loss": 0.9215, "step": 7110 }, { "epoch": 0.08671224696232922, "grad_norm": 2.030061721801758, "learning_rate": 4.806863373957665e-06, "loss": 0.8768, "step": 7115 }, { "epoch": 0.08677318318647703, "grad_norm": 2.064182758331299, "learning_rate": 4.806542655548429e-06, "loss": 0.9863, "step": 7120 }, { "epoch": 0.08683411941062484, "grad_norm": 2.4626317024230957, "learning_rate": 4.806221937139192e-06, "loss": 0.998, "step": 7125 }, { "epoch": 0.08689505563477265, "grad_norm": 2.010493755340576, "learning_rate": 4.805901218729955e-06, "loss": 0.9528, "step": 7130 }, { "epoch": 0.08695599185892046, "grad_norm": 2.316166400909424, "learning_rate": 4.805580500320719e-06, "loss": 0.9919, "step": 7135 }, { "epoch": 0.08701692808306825, "grad_norm": 2.232994556427002, "learning_rate": 4.805259781911482e-06, "loss": 0.9721, "step": 7140 }, { "epoch": 0.08707786430721606, "grad_norm": 2.201192855834961, "learning_rate": 4.804939063502245e-06, "loss": 0.8649, "step": 7145 }, { "epoch": 0.08713880053136387, "grad_norm": 1.9804211854934692, "learning_rate": 4.804618345093009e-06, "loss": 0.8755, "step": 7150 }, { "epoch": 0.08719973675551168, "grad_norm": 2.115044355392456, "learning_rate": 4.804297626683772e-06, "loss": 0.912, "step": 7155 }, { "epoch": 0.08726067297965949, "grad_norm": 1.9226917028427124, "learning_rate": 4.803976908274535e-06, "loss": 0.8996, "step": 7160 }, { "epoch": 0.0873216092038073, "grad_norm": 2.469843864440918, "learning_rate": 4.803656189865299e-06, "loss": 0.9265, "step": 7165 }, { "epoch": 0.08738254542795511, "grad_norm": 1.862473487854004, "learning_rate": 4.803335471456062e-06, "loss": 0.9069, "step": 7170 }, { "epoch": 0.0874434816521029, "grad_norm": 2.1908648014068604, "learning_rate": 4.803014753046825e-06, "loss": 0.9108, "step": 7175 }, { "epoch": 0.08750441787625071, "grad_norm": 1.9259998798370361, "learning_rate": 4.802694034637588e-06, "loss": 0.9424, "step": 7180 }, { "epoch": 0.08756535410039852, "grad_norm": 1.9923855066299438, "learning_rate": 4.802373316228352e-06, "loss": 0.977, "step": 7185 }, { "epoch": 0.08762629032454633, "grad_norm": 2.2348594665527344, "learning_rate": 4.802052597819115e-06, "loss": 0.9438, "step": 7190 }, { "epoch": 0.08768722654869414, "grad_norm": 2.5709354877471924, "learning_rate": 4.801731879409879e-06, "loss": 0.861, "step": 7195 }, { "epoch": 0.08774816277284195, "grad_norm": 2.3195202350616455, "learning_rate": 4.801411161000642e-06, "loss": 0.9483, "step": 7200 }, { "epoch": 0.08780909899698976, "grad_norm": 2.03287410736084, "learning_rate": 4.8010904425914055e-06, "loss": 0.8875, "step": 7205 }, { "epoch": 0.08787003522113755, "grad_norm": 1.8566420078277588, "learning_rate": 4.8007697241821685e-06, "loss": 1.0144, "step": 7210 }, { "epoch": 0.08793097144528536, "grad_norm": 1.7437676191329956, "learning_rate": 4.8004490057729316e-06, "loss": 0.942, "step": 7215 }, { "epoch": 0.08799190766943317, "grad_norm": 2.264672040939331, "learning_rate": 4.800128287363695e-06, "loss": 0.9234, "step": 7220 }, { "epoch": 0.08805284389358098, "grad_norm": 1.9979230165481567, "learning_rate": 4.7998075689544584e-06, "loss": 0.9703, "step": 7225 }, { "epoch": 0.08811378011772879, "grad_norm": 2.232041835784912, "learning_rate": 4.799486850545222e-06, "loss": 0.8951, "step": 7230 }, { "epoch": 0.0881747163418766, "grad_norm": 1.929734230041504, "learning_rate": 4.799166132135985e-06, "loss": 0.8287, "step": 7235 }, { "epoch": 0.0882356525660244, "grad_norm": 2.5515799522399902, "learning_rate": 4.798845413726748e-06, "loss": 0.9935, "step": 7240 }, { "epoch": 0.0882965887901722, "grad_norm": 2.0635592937469482, "learning_rate": 4.798524695317512e-06, "loss": 0.9554, "step": 7245 }, { "epoch": 0.08835752501432001, "grad_norm": 1.8852143287658691, "learning_rate": 4.798203976908275e-06, "loss": 0.9404, "step": 7250 }, { "epoch": 0.08841846123846782, "grad_norm": 1.988961935043335, "learning_rate": 4.797883258499038e-06, "loss": 0.947, "step": 7255 }, { "epoch": 0.08847939746261563, "grad_norm": 1.9010993242263794, "learning_rate": 4.797562540089802e-06, "loss": 0.9721, "step": 7260 }, { "epoch": 0.08854033368676344, "grad_norm": 1.9704500436782837, "learning_rate": 4.797241821680565e-06, "loss": 0.9831, "step": 7265 }, { "epoch": 0.08860126991091125, "grad_norm": 1.7750638723373413, "learning_rate": 4.796921103271328e-06, "loss": 0.9347, "step": 7270 }, { "epoch": 0.08866220613505904, "grad_norm": 2.259902238845825, "learning_rate": 4.796600384862091e-06, "loss": 0.8963, "step": 7275 }, { "epoch": 0.08872314235920685, "grad_norm": 2.3083865642547607, "learning_rate": 4.796279666452855e-06, "loss": 0.9615, "step": 7280 }, { "epoch": 0.08878407858335466, "grad_norm": 1.662541389465332, "learning_rate": 4.795958948043618e-06, "loss": 1.0297, "step": 7285 }, { "epoch": 0.08884501480750247, "grad_norm": 2.17964243888855, "learning_rate": 4.795638229634381e-06, "loss": 0.9333, "step": 7290 }, { "epoch": 0.08890595103165028, "grad_norm": 1.9623513221740723, "learning_rate": 4.795317511225145e-06, "loss": 0.8575, "step": 7295 }, { "epoch": 0.08896688725579809, "grad_norm": 2.552565813064575, "learning_rate": 4.794996792815908e-06, "loss": 0.8584, "step": 7300 }, { "epoch": 0.0890278234799459, "grad_norm": 1.9918886423110962, "learning_rate": 4.794676074406671e-06, "loss": 0.9252, "step": 7305 }, { "epoch": 0.08908875970409369, "grad_norm": 1.8891286849975586, "learning_rate": 4.794355355997435e-06, "loss": 0.9816, "step": 7310 }, { "epoch": 0.0891496959282415, "grad_norm": 1.948836088180542, "learning_rate": 4.794034637588198e-06, "loss": 0.9149, "step": 7315 }, { "epoch": 0.08921063215238931, "grad_norm": 2.093383312225342, "learning_rate": 4.793713919178961e-06, "loss": 0.8837, "step": 7320 }, { "epoch": 0.08927156837653712, "grad_norm": 1.8534992933273315, "learning_rate": 4.793393200769725e-06, "loss": 0.9868, "step": 7325 }, { "epoch": 0.08933250460068493, "grad_norm": 1.8200798034667969, "learning_rate": 4.793072482360488e-06, "loss": 0.9674, "step": 7330 }, { "epoch": 0.08939344082483273, "grad_norm": 2.7880172729492188, "learning_rate": 4.792751763951251e-06, "loss": 0.911, "step": 7335 }, { "epoch": 0.08945437704898054, "grad_norm": 1.8843952417373657, "learning_rate": 4.792431045542015e-06, "loss": 0.9647, "step": 7340 }, { "epoch": 0.08951531327312834, "grad_norm": 1.9503439664840698, "learning_rate": 4.792110327132778e-06, "loss": 0.9082, "step": 7345 }, { "epoch": 0.08957624949727615, "grad_norm": 2.1398580074310303, "learning_rate": 4.791789608723541e-06, "loss": 0.9661, "step": 7350 }, { "epoch": 0.08963718572142396, "grad_norm": 2.228052854537964, "learning_rate": 4.7914688903143045e-06, "loss": 0.9547, "step": 7355 }, { "epoch": 0.08969812194557177, "grad_norm": 2.178101062774658, "learning_rate": 4.7911481719050675e-06, "loss": 0.9321, "step": 7360 }, { "epoch": 0.08975905816971957, "grad_norm": 1.9091296195983887, "learning_rate": 4.790827453495831e-06, "loss": 0.8839, "step": 7365 }, { "epoch": 0.08981999439386738, "grad_norm": 2.036616325378418, "learning_rate": 4.790506735086594e-06, "loss": 0.9618, "step": 7370 }, { "epoch": 0.08988093061801518, "grad_norm": 1.8935574293136597, "learning_rate": 4.790186016677357e-06, "loss": 0.9196, "step": 7375 }, { "epoch": 0.08994186684216299, "grad_norm": 2.1481430530548096, "learning_rate": 4.789865298268121e-06, "loss": 1.0012, "step": 7380 }, { "epoch": 0.0900028030663108, "grad_norm": 1.9660571813583374, "learning_rate": 4.789544579858884e-06, "loss": 0.9383, "step": 7385 }, { "epoch": 0.0900637392904586, "grad_norm": 1.9712305068969727, "learning_rate": 4.789223861449648e-06, "loss": 0.9258, "step": 7390 }, { "epoch": 0.09012467551460641, "grad_norm": 2.0245754718780518, "learning_rate": 4.788903143040411e-06, "loss": 0.944, "step": 7395 }, { "epoch": 0.09018561173875422, "grad_norm": 1.764752984046936, "learning_rate": 4.788582424631174e-06, "loss": 0.8894, "step": 7400 }, { "epoch": 0.09024654796290203, "grad_norm": 1.7972642183303833, "learning_rate": 4.788261706221938e-06, "loss": 0.9141, "step": 7405 }, { "epoch": 0.09030748418704983, "grad_norm": 1.695796012878418, "learning_rate": 4.787940987812701e-06, "loss": 0.8916, "step": 7410 }, { "epoch": 0.09036842041119764, "grad_norm": 2.307032346725464, "learning_rate": 4.787620269403464e-06, "loss": 0.9779, "step": 7415 }, { "epoch": 0.09042935663534545, "grad_norm": 1.9759291410446167, "learning_rate": 4.787299550994228e-06, "loss": 0.8752, "step": 7420 }, { "epoch": 0.09049029285949325, "grad_norm": 2.1299140453338623, "learning_rate": 4.786978832584991e-06, "loss": 0.8818, "step": 7425 }, { "epoch": 0.09055122908364106, "grad_norm": 2.145289421081543, "learning_rate": 4.786658114175754e-06, "loss": 0.8695, "step": 7430 }, { "epoch": 0.09061216530778887, "grad_norm": 1.712414264678955, "learning_rate": 4.786337395766517e-06, "loss": 0.9529, "step": 7435 }, { "epoch": 0.09067310153193668, "grad_norm": 2.1111371517181396, "learning_rate": 4.786016677357281e-06, "loss": 1.0121, "step": 7440 }, { "epoch": 0.09073403775608448, "grad_norm": 2.2585930824279785, "learning_rate": 4.785695958948044e-06, "loss": 0.9543, "step": 7445 }, { "epoch": 0.09079497398023229, "grad_norm": 1.9005712270736694, "learning_rate": 4.785375240538807e-06, "loss": 0.8967, "step": 7450 }, { "epoch": 0.0908559102043801, "grad_norm": 1.816818356513977, "learning_rate": 4.785054522129571e-06, "loss": 0.9299, "step": 7455 }, { "epoch": 0.0909168464285279, "grad_norm": 1.7387731075286865, "learning_rate": 4.784733803720334e-06, "loss": 0.9008, "step": 7460 }, { "epoch": 0.09097778265267571, "grad_norm": 1.9858990907669067, "learning_rate": 4.784413085311097e-06, "loss": 0.9326, "step": 7465 }, { "epoch": 0.09103871887682352, "grad_norm": 2.012162923812866, "learning_rate": 4.784092366901861e-06, "loss": 0.9306, "step": 7470 }, { "epoch": 0.09109965510097133, "grad_norm": 2.296046257019043, "learning_rate": 4.783771648492624e-06, "loss": 0.9259, "step": 7475 }, { "epoch": 0.09116059132511913, "grad_norm": 1.9344017505645752, "learning_rate": 4.783450930083387e-06, "loss": 1.0413, "step": 7480 }, { "epoch": 0.09122152754926693, "grad_norm": 1.8740172386169434, "learning_rate": 4.7831302116741506e-06, "loss": 0.9214, "step": 7485 }, { "epoch": 0.09128246377341474, "grad_norm": 1.9415215253829956, "learning_rate": 4.7828094932649136e-06, "loss": 0.9384, "step": 7490 }, { "epoch": 0.09134339999756255, "grad_norm": 2.087634563446045, "learning_rate": 4.782488774855677e-06, "loss": 0.958, "step": 7495 }, { "epoch": 0.09140433622171036, "grad_norm": 2.1337504386901855, "learning_rate": 4.7821680564464405e-06, "loss": 0.9191, "step": 7500 }, { "epoch": 0.09146527244585817, "grad_norm": 2.1419808864593506, "learning_rate": 4.7818473380372035e-06, "loss": 0.9644, "step": 7505 }, { "epoch": 0.09152620867000597, "grad_norm": 1.9303083419799805, "learning_rate": 4.781526619627967e-06, "loss": 0.8933, "step": 7510 }, { "epoch": 0.09158714489415377, "grad_norm": 2.114201545715332, "learning_rate": 4.78120590121873e-06, "loss": 0.8703, "step": 7515 }, { "epoch": 0.09164808111830158, "grad_norm": 1.8774518966674805, "learning_rate": 4.780885182809493e-06, "loss": 0.9047, "step": 7520 }, { "epoch": 0.09170901734244939, "grad_norm": 2.0034146308898926, "learning_rate": 4.780564464400257e-06, "loss": 0.9415, "step": 7525 }, { "epoch": 0.0917699535665972, "grad_norm": 1.9107784032821655, "learning_rate": 4.78024374599102e-06, "loss": 0.9348, "step": 7530 }, { "epoch": 0.09183088979074501, "grad_norm": 2.7788245677948, "learning_rate": 4.779923027581784e-06, "loss": 0.8856, "step": 7535 }, { "epoch": 0.09189182601489282, "grad_norm": 2.1568024158477783, "learning_rate": 4.779602309172547e-06, "loss": 0.9656, "step": 7540 }, { "epoch": 0.09195276223904061, "grad_norm": 2.3137993812561035, "learning_rate": 4.77928159076331e-06, "loss": 0.9639, "step": 7545 }, { "epoch": 0.09201369846318842, "grad_norm": 2.0391459465026855, "learning_rate": 4.778960872354074e-06, "loss": 0.9702, "step": 7550 }, { "epoch": 0.09207463468733623, "grad_norm": 2.3775007724761963, "learning_rate": 4.778640153944837e-06, "loss": 0.9219, "step": 7555 }, { "epoch": 0.09213557091148404, "grad_norm": 2.3394289016723633, "learning_rate": 4.7783194355356e-06, "loss": 0.9266, "step": 7560 }, { "epoch": 0.09219650713563185, "grad_norm": 1.9215071201324463, "learning_rate": 4.777998717126364e-06, "loss": 0.968, "step": 7565 }, { "epoch": 0.09225744335977966, "grad_norm": 2.4627113342285156, "learning_rate": 4.777677998717127e-06, "loss": 0.9582, "step": 7570 }, { "epoch": 0.09231837958392747, "grad_norm": 2.063096046447754, "learning_rate": 4.77735728030789e-06, "loss": 0.8718, "step": 7575 }, { "epoch": 0.09237931580807526, "grad_norm": 1.9310442209243774, "learning_rate": 4.777036561898654e-06, "loss": 0.896, "step": 7580 }, { "epoch": 0.09244025203222307, "grad_norm": 2.0023815631866455, "learning_rate": 4.776715843489417e-06, "loss": 0.9391, "step": 7585 }, { "epoch": 0.09250118825637088, "grad_norm": 2.039250135421753, "learning_rate": 4.77639512508018e-06, "loss": 0.9318, "step": 7590 }, { "epoch": 0.09256212448051869, "grad_norm": 2.2031846046447754, "learning_rate": 4.776074406670944e-06, "loss": 0.9068, "step": 7595 }, { "epoch": 0.0926230607046665, "grad_norm": 2.0693371295928955, "learning_rate": 4.775753688261707e-06, "loss": 1.0018, "step": 7600 }, { "epoch": 0.09268399692881431, "grad_norm": 1.874010682106018, "learning_rate": 4.77543296985247e-06, "loss": 0.9287, "step": 7605 }, { "epoch": 0.0927449331529621, "grad_norm": 2.509889602661133, "learning_rate": 4.775112251443233e-06, "loss": 0.9274, "step": 7610 }, { "epoch": 0.09280586937710991, "grad_norm": 1.6892116069793701, "learning_rate": 4.774791533033997e-06, "loss": 0.8933, "step": 7615 }, { "epoch": 0.09286680560125772, "grad_norm": 1.9980014562606812, "learning_rate": 4.77447081462476e-06, "loss": 0.9043, "step": 7620 }, { "epoch": 0.09292774182540553, "grad_norm": 1.9919480085372925, "learning_rate": 4.774150096215523e-06, "loss": 0.9664, "step": 7625 }, { "epoch": 0.09298867804955334, "grad_norm": 1.9175097942352295, "learning_rate": 4.7738293778062865e-06, "loss": 0.7896, "step": 7630 }, { "epoch": 0.09304961427370115, "grad_norm": 2.050696611404419, "learning_rate": 4.7735086593970495e-06, "loss": 0.9997, "step": 7635 }, { "epoch": 0.09311055049784896, "grad_norm": 1.9436743259429932, "learning_rate": 4.7731879409878125e-06, "loss": 0.9563, "step": 7640 }, { "epoch": 0.09317148672199675, "grad_norm": 1.8851969242095947, "learning_rate": 4.772867222578576e-06, "loss": 0.8627, "step": 7645 }, { "epoch": 0.09323242294614456, "grad_norm": 2.2604520320892334, "learning_rate": 4.7725465041693394e-06, "loss": 1.0101, "step": 7650 }, { "epoch": 0.09329335917029237, "grad_norm": 2.0388846397399902, "learning_rate": 4.7722257857601024e-06, "loss": 0.925, "step": 7655 }, { "epoch": 0.09335429539444018, "grad_norm": 2.3255746364593506, "learning_rate": 4.771905067350866e-06, "loss": 0.9189, "step": 7660 }, { "epoch": 0.09341523161858799, "grad_norm": 2.3324596881866455, "learning_rate": 4.771584348941629e-06, "loss": 0.9179, "step": 7665 }, { "epoch": 0.0934761678427358, "grad_norm": 2.131633996963501, "learning_rate": 4.771263630532393e-06, "loss": 0.9498, "step": 7670 }, { "epoch": 0.0935371040668836, "grad_norm": 1.9440830945968628, "learning_rate": 4.770942912123156e-06, "loss": 0.9398, "step": 7675 }, { "epoch": 0.0935980402910314, "grad_norm": 2.046832799911499, "learning_rate": 4.77062219371392e-06, "loss": 0.8824, "step": 7680 }, { "epoch": 0.09365897651517921, "grad_norm": 1.9470250606536865, "learning_rate": 4.770301475304683e-06, "loss": 0.9341, "step": 7685 }, { "epoch": 0.09371991273932702, "grad_norm": 2.802237033843994, "learning_rate": 4.769980756895446e-06, "loss": 0.9616, "step": 7690 }, { "epoch": 0.09378084896347483, "grad_norm": 2.2245025634765625, "learning_rate": 4.76966003848621e-06, "loss": 0.945, "step": 7695 }, { "epoch": 0.09384178518762264, "grad_norm": 2.4269654750823975, "learning_rate": 4.769339320076973e-06, "loss": 0.9817, "step": 7700 }, { "epoch": 0.09390272141177045, "grad_norm": 1.7477675676345825, "learning_rate": 4.769018601667736e-06, "loss": 0.8647, "step": 7705 }, { "epoch": 0.09396365763591825, "grad_norm": 2.1790032386779785, "learning_rate": 4.7686978832585e-06, "loss": 0.9095, "step": 7710 }, { "epoch": 0.09402459386006605, "grad_norm": 1.8828861713409424, "learning_rate": 4.768377164849263e-06, "loss": 0.9425, "step": 7715 }, { "epoch": 0.09408553008421386, "grad_norm": 1.9986685514450073, "learning_rate": 4.768056446440026e-06, "loss": 0.9922, "step": 7720 }, { "epoch": 0.09414646630836167, "grad_norm": 2.0289125442504883, "learning_rate": 4.76773572803079e-06, "loss": 0.9324, "step": 7725 }, { "epoch": 0.09420740253250948, "grad_norm": 2.800323247909546, "learning_rate": 4.767415009621553e-06, "loss": 0.9455, "step": 7730 }, { "epoch": 0.09426833875665729, "grad_norm": 1.8607292175292969, "learning_rate": 4.767094291212316e-06, "loss": 0.8636, "step": 7735 }, { "epoch": 0.0943292749808051, "grad_norm": 2.039998769760132, "learning_rate": 4.76677357280308e-06, "loss": 0.9222, "step": 7740 }, { "epoch": 0.09439021120495289, "grad_norm": 2.0676393508911133, "learning_rate": 4.766452854393843e-06, "loss": 0.9879, "step": 7745 }, { "epoch": 0.0944511474291007, "grad_norm": 1.8667700290679932, "learning_rate": 4.766132135984606e-06, "loss": 0.8277, "step": 7750 }, { "epoch": 0.09451208365324851, "grad_norm": 2.386425733566284, "learning_rate": 4.7658114175753696e-06, "loss": 0.9428, "step": 7755 }, { "epoch": 0.09457301987739632, "grad_norm": 2.339238405227661, "learning_rate": 4.7654906991661326e-06, "loss": 0.9573, "step": 7760 }, { "epoch": 0.09463395610154413, "grad_norm": 1.9945268630981445, "learning_rate": 4.765169980756896e-06, "loss": 0.9589, "step": 7765 }, { "epoch": 0.09469489232569193, "grad_norm": 2.328235626220703, "learning_rate": 4.764849262347659e-06, "loss": 0.9408, "step": 7770 }, { "epoch": 0.09475582854983974, "grad_norm": 2.4777190685272217, "learning_rate": 4.7645285439384225e-06, "loss": 0.9322, "step": 7775 }, { "epoch": 0.09481676477398754, "grad_norm": 1.7701034545898438, "learning_rate": 4.7642078255291855e-06, "loss": 0.9504, "step": 7780 }, { "epoch": 0.09487770099813535, "grad_norm": 1.9291515350341797, "learning_rate": 4.7638871071199485e-06, "loss": 0.9343, "step": 7785 }, { "epoch": 0.09493863722228316, "grad_norm": 2.085003614425659, "learning_rate": 4.763566388710712e-06, "loss": 1.0029, "step": 7790 }, { "epoch": 0.09499957344643097, "grad_norm": 1.9069958925247192, "learning_rate": 4.763245670301475e-06, "loss": 0.9362, "step": 7795 }, { "epoch": 0.09506050967057877, "grad_norm": 1.968677282333374, "learning_rate": 4.762924951892238e-06, "loss": 0.926, "step": 7800 }, { "epoch": 0.09512144589472658, "grad_norm": 2.6942532062530518, "learning_rate": 4.762604233483002e-06, "loss": 0.955, "step": 7805 }, { "epoch": 0.09518238211887439, "grad_norm": 1.788994550704956, "learning_rate": 4.762283515073765e-06, "loss": 0.9481, "step": 7810 }, { "epoch": 0.09524331834302219, "grad_norm": 2.0349502563476562, "learning_rate": 4.761962796664529e-06, "loss": 0.8999, "step": 7815 }, { "epoch": 0.09530425456717, "grad_norm": 2.2127668857574463, "learning_rate": 4.761642078255292e-06, "loss": 0.9298, "step": 7820 }, { "epoch": 0.0953651907913178, "grad_norm": 1.834019660949707, "learning_rate": 4.761321359846055e-06, "loss": 0.9614, "step": 7825 }, { "epoch": 0.09542612701546561, "grad_norm": 1.9361318349838257, "learning_rate": 4.761000641436819e-06, "loss": 0.9215, "step": 7830 }, { "epoch": 0.09548706323961342, "grad_norm": 1.8366612195968628, "learning_rate": 4.760679923027582e-06, "loss": 0.913, "step": 7835 }, { "epoch": 0.09554799946376123, "grad_norm": 2.0292234420776367, "learning_rate": 4.760359204618346e-06, "loss": 0.867, "step": 7840 }, { "epoch": 0.09560893568790904, "grad_norm": 2.0118727684020996, "learning_rate": 4.760038486209109e-06, "loss": 0.8915, "step": 7845 }, { "epoch": 0.09566987191205684, "grad_norm": 2.1455886363983154, "learning_rate": 4.759717767799872e-06, "loss": 0.9006, "step": 7850 }, { "epoch": 0.09573080813620465, "grad_norm": 2.0479836463928223, "learning_rate": 4.759397049390636e-06, "loss": 0.9329, "step": 7855 }, { "epoch": 0.09579174436035245, "grad_norm": 2.0135562419891357, "learning_rate": 4.759076330981399e-06, "loss": 0.9377, "step": 7860 }, { "epoch": 0.09585268058450026, "grad_norm": 2.192251443862915, "learning_rate": 4.758755612572162e-06, "loss": 0.9248, "step": 7865 }, { "epoch": 0.09591361680864807, "grad_norm": 1.8468612432479858, "learning_rate": 4.758434894162926e-06, "loss": 0.8941, "step": 7870 }, { "epoch": 0.09597455303279588, "grad_norm": 1.8939534425735474, "learning_rate": 4.758114175753689e-06, "loss": 0.9306, "step": 7875 }, { "epoch": 0.09603548925694368, "grad_norm": 1.7973227500915527, "learning_rate": 4.757793457344452e-06, "loss": 0.9296, "step": 7880 }, { "epoch": 0.09609642548109149, "grad_norm": 2.0273401737213135, "learning_rate": 4.757472738935216e-06, "loss": 0.9726, "step": 7885 }, { "epoch": 0.0961573617052393, "grad_norm": 1.879355788230896, "learning_rate": 4.757152020525979e-06, "loss": 0.9444, "step": 7890 }, { "epoch": 0.0962182979293871, "grad_norm": 2.0944130420684814, "learning_rate": 4.756831302116742e-06, "loss": 1.0072, "step": 7895 }, { "epoch": 0.09627923415353491, "grad_norm": 1.8287577629089355, "learning_rate": 4.7565105837075055e-06, "loss": 0.9492, "step": 7900 }, { "epoch": 0.09634017037768272, "grad_norm": 2.052328586578369, "learning_rate": 4.7561898652982685e-06, "loss": 0.9685, "step": 7905 }, { "epoch": 0.09640110660183053, "grad_norm": 2.0835893154144287, "learning_rate": 4.7558691468890315e-06, "loss": 0.9739, "step": 7910 }, { "epoch": 0.09646204282597833, "grad_norm": 2.044435977935791, "learning_rate": 4.755548428479795e-06, "loss": 0.8989, "step": 7915 }, { "epoch": 0.09652297905012613, "grad_norm": 2.5506880283355713, "learning_rate": 4.7552277100705584e-06, "loss": 0.9254, "step": 7920 }, { "epoch": 0.09658391527427394, "grad_norm": 2.6179862022399902, "learning_rate": 4.7549069916613214e-06, "loss": 0.9625, "step": 7925 }, { "epoch": 0.09664485149842175, "grad_norm": 2.035508394241333, "learning_rate": 4.754586273252085e-06, "loss": 0.8931, "step": 7930 }, { "epoch": 0.09670578772256956, "grad_norm": 2.3074281215667725, "learning_rate": 4.754265554842848e-06, "loss": 0.9566, "step": 7935 }, { "epoch": 0.09676672394671737, "grad_norm": 2.231227397918701, "learning_rate": 4.753944836433611e-06, "loss": 0.9724, "step": 7940 }, { "epoch": 0.09682766017086518, "grad_norm": 2.014248847961426, "learning_rate": 4.753624118024374e-06, "loss": 0.856, "step": 7945 }, { "epoch": 0.09688859639501297, "grad_norm": 1.8635444641113281, "learning_rate": 4.753303399615138e-06, "loss": 0.934, "step": 7950 }, { "epoch": 0.09694953261916078, "grad_norm": 1.9935200214385986, "learning_rate": 4.752982681205901e-06, "loss": 0.8592, "step": 7955 }, { "epoch": 0.09701046884330859, "grad_norm": 1.9657137393951416, "learning_rate": 4.752661962796665e-06, "loss": 0.9721, "step": 7960 }, { "epoch": 0.0970714050674564, "grad_norm": 2.453711748123169, "learning_rate": 4.752341244387428e-06, "loss": 0.8848, "step": 7965 }, { "epoch": 0.09713234129160421, "grad_norm": 1.7992454767227173, "learning_rate": 4.752020525978191e-06, "loss": 0.8806, "step": 7970 }, { "epoch": 0.09719327751575202, "grad_norm": 2.0806386470794678, "learning_rate": 4.751699807568955e-06, "loss": 0.8862, "step": 7975 }, { "epoch": 0.09725421373989981, "grad_norm": 2.00010085105896, "learning_rate": 4.751379089159718e-06, "loss": 0.8547, "step": 7980 }, { "epoch": 0.09731514996404762, "grad_norm": 2.1465208530426025, "learning_rate": 4.751058370750482e-06, "loss": 0.8616, "step": 7985 }, { "epoch": 0.09737608618819543, "grad_norm": 2.1473915576934814, "learning_rate": 4.750737652341245e-06, "loss": 0.958, "step": 7990 }, { "epoch": 0.09743702241234324, "grad_norm": 1.8751198053359985, "learning_rate": 4.750416933932008e-06, "loss": 0.9461, "step": 7995 }, { "epoch": 0.09749795863649105, "grad_norm": 2.240013837814331, "learning_rate": 4.750096215522772e-06, "loss": 0.9478, "step": 8000 }, { "epoch": 0.09755889486063886, "grad_norm": 2.051497459411621, "learning_rate": 4.749775497113535e-06, "loss": 0.9445, "step": 8005 }, { "epoch": 0.09761983108478667, "grad_norm": 2.0940582752227783, "learning_rate": 4.749454778704299e-06, "loss": 0.9122, "step": 8010 }, { "epoch": 0.09768076730893446, "grad_norm": 1.943544626235962, "learning_rate": 4.749134060295062e-06, "loss": 0.9138, "step": 8015 }, { "epoch": 0.09774170353308227, "grad_norm": 1.9337201118469238, "learning_rate": 4.748813341885825e-06, "loss": 0.9471, "step": 8020 }, { "epoch": 0.09780263975723008, "grad_norm": 1.9981952905654907, "learning_rate": 4.748492623476588e-06, "loss": 0.8799, "step": 8025 }, { "epoch": 0.09786357598137789, "grad_norm": 1.9739201068878174, "learning_rate": 4.7481719050673516e-06, "loss": 0.9339, "step": 8030 }, { "epoch": 0.0979245122055257, "grad_norm": 2.1532156467437744, "learning_rate": 4.747851186658115e-06, "loss": 0.9163, "step": 8035 }, { "epoch": 0.09798544842967351, "grad_norm": 2.5503170490264893, "learning_rate": 4.747530468248878e-06, "loss": 0.9939, "step": 8040 }, { "epoch": 0.09804638465382132, "grad_norm": 2.2300260066986084, "learning_rate": 4.7472097498396415e-06, "loss": 0.9251, "step": 8045 }, { "epoch": 0.09810732087796911, "grad_norm": 1.907518982887268, "learning_rate": 4.7468890314304045e-06, "loss": 0.8997, "step": 8050 }, { "epoch": 0.09816825710211692, "grad_norm": 1.9880061149597168, "learning_rate": 4.7465683130211675e-06, "loss": 0.945, "step": 8055 }, { "epoch": 0.09822919332626473, "grad_norm": 2.1005167961120605, "learning_rate": 4.746247594611931e-06, "loss": 0.9307, "step": 8060 }, { "epoch": 0.09829012955041254, "grad_norm": 1.977311134338379, "learning_rate": 4.745926876202694e-06, "loss": 0.9764, "step": 8065 }, { "epoch": 0.09835106577456035, "grad_norm": 2.2148396968841553, "learning_rate": 4.745606157793457e-06, "loss": 0.9612, "step": 8070 }, { "epoch": 0.09841200199870816, "grad_norm": 1.9558004140853882, "learning_rate": 4.745285439384221e-06, "loss": 0.9243, "step": 8075 }, { "epoch": 0.09847293822285597, "grad_norm": 2.092973470687866, "learning_rate": 4.744964720974984e-06, "loss": 0.8852, "step": 8080 }, { "epoch": 0.09853387444700376, "grad_norm": 2.068937063217163, "learning_rate": 4.744644002565747e-06, "loss": 0.8795, "step": 8085 }, { "epoch": 0.09859481067115157, "grad_norm": 2.284715175628662, "learning_rate": 4.744323284156511e-06, "loss": 0.9748, "step": 8090 }, { "epoch": 0.09865574689529938, "grad_norm": 1.8317261934280396, "learning_rate": 4.744002565747274e-06, "loss": 0.9279, "step": 8095 }, { "epoch": 0.09871668311944719, "grad_norm": 1.7052876949310303, "learning_rate": 4.743681847338037e-06, "loss": 0.8786, "step": 8100 }, { "epoch": 0.098777619343595, "grad_norm": 1.7708340883255005, "learning_rate": 4.7433611289288e-06, "loss": 0.8757, "step": 8105 }, { "epoch": 0.0988385555677428, "grad_norm": 1.8371254205703735, "learning_rate": 4.743040410519564e-06, "loss": 0.926, "step": 8110 }, { "epoch": 0.0988994917918906, "grad_norm": 2.1609303951263428, "learning_rate": 4.742719692110327e-06, "loss": 0.9644, "step": 8115 }, { "epoch": 0.09896042801603841, "grad_norm": 2.453162670135498, "learning_rate": 4.742398973701091e-06, "loss": 0.9982, "step": 8120 }, { "epoch": 0.09902136424018622, "grad_norm": 2.12156081199646, "learning_rate": 4.742078255291854e-06, "loss": 0.8854, "step": 8125 }, { "epoch": 0.09908230046433403, "grad_norm": 2.1669681072235107, "learning_rate": 4.741757536882618e-06, "loss": 0.9831, "step": 8130 }, { "epoch": 0.09914323668848184, "grad_norm": 1.9174309968948364, "learning_rate": 4.741436818473381e-06, "loss": 0.9587, "step": 8135 }, { "epoch": 0.09920417291262965, "grad_norm": 2.142359733581543, "learning_rate": 4.741116100064144e-06, "loss": 0.9817, "step": 8140 }, { "epoch": 0.09926510913677745, "grad_norm": 1.910460114479065, "learning_rate": 4.740795381654908e-06, "loss": 0.9199, "step": 8145 }, { "epoch": 0.09932604536092525, "grad_norm": 1.7850617170333862, "learning_rate": 4.740474663245671e-06, "loss": 1.0097, "step": 8150 }, { "epoch": 0.09938698158507306, "grad_norm": 1.9688884019851685, "learning_rate": 4.740153944836435e-06, "loss": 0.9237, "step": 8155 }, { "epoch": 0.09944791780922087, "grad_norm": 1.808931827545166, "learning_rate": 4.739833226427198e-06, "loss": 0.9247, "step": 8160 }, { "epoch": 0.09950885403336868, "grad_norm": 2.1794910430908203, "learning_rate": 4.739512508017961e-06, "loss": 0.9264, "step": 8165 }, { "epoch": 0.09956979025751649, "grad_norm": 1.9130667448043823, "learning_rate": 4.7391917896087245e-06, "loss": 0.9731, "step": 8170 }, { "epoch": 0.0996307264816643, "grad_norm": 1.9818557500839233, "learning_rate": 4.7388710711994875e-06, "loss": 0.9205, "step": 8175 }, { "epoch": 0.0996916627058121, "grad_norm": 1.848726511001587, "learning_rate": 4.7385503527902505e-06, "loss": 0.907, "step": 8180 }, { "epoch": 0.0997525989299599, "grad_norm": 1.8059109449386597, "learning_rate": 4.738229634381014e-06, "loss": 0.8405, "step": 8185 }, { "epoch": 0.09981353515410771, "grad_norm": 1.829579472541809, "learning_rate": 4.7379089159717774e-06, "loss": 0.8735, "step": 8190 }, { "epoch": 0.09987447137825552, "grad_norm": 2.108583688735962, "learning_rate": 4.7375881975625404e-06, "loss": 0.8996, "step": 8195 }, { "epoch": 0.09993540760240333, "grad_norm": 1.9747000932693481, "learning_rate": 4.7372674791533035e-06, "loss": 0.9006, "step": 8200 }, { "epoch": 0.09999634382655113, "grad_norm": 1.7163729667663574, "learning_rate": 4.736946760744067e-06, "loss": 0.8801, "step": 8205 }, { "epoch": 0.10005728005069894, "grad_norm": 2.7396254539489746, "learning_rate": 4.73662604233483e-06, "loss": 1.0181, "step": 8210 }, { "epoch": 0.10011821627484674, "grad_norm": 1.8210437297821045, "learning_rate": 4.736305323925593e-06, "loss": 0.9266, "step": 8215 }, { "epoch": 0.10017915249899455, "grad_norm": 2.2399251461029053, "learning_rate": 4.735984605516357e-06, "loss": 0.9118, "step": 8220 }, { "epoch": 0.10024008872314236, "grad_norm": 2.1350021362304688, "learning_rate": 4.73566388710712e-06, "loss": 1.0263, "step": 8225 }, { "epoch": 0.10030102494729017, "grad_norm": 2.651933431625366, "learning_rate": 4.735343168697883e-06, "loss": 0.9436, "step": 8230 }, { "epoch": 0.10036196117143797, "grad_norm": 2.0300865173339844, "learning_rate": 4.735022450288647e-06, "loss": 0.9293, "step": 8235 }, { "epoch": 0.10042289739558578, "grad_norm": 2.4126877784729004, "learning_rate": 4.73470173187941e-06, "loss": 0.9545, "step": 8240 }, { "epoch": 0.10048383361973359, "grad_norm": 2.0946855545043945, "learning_rate": 4.734381013470173e-06, "loss": 0.9226, "step": 8245 }, { "epoch": 0.10054476984388139, "grad_norm": 1.912548303604126, "learning_rate": 4.734060295060937e-06, "loss": 0.9411, "step": 8250 }, { "epoch": 0.1006057060680292, "grad_norm": 1.9393367767333984, "learning_rate": 4.7337395766517e-06, "loss": 0.8489, "step": 8255 }, { "epoch": 0.100666642292177, "grad_norm": 1.9224495887756348, "learning_rate": 4.733418858242463e-06, "loss": 0.9675, "step": 8260 }, { "epoch": 0.10072757851632481, "grad_norm": 1.8809266090393066, "learning_rate": 4.733098139833227e-06, "loss": 0.9509, "step": 8265 }, { "epoch": 0.10078851474047262, "grad_norm": 1.9752098321914673, "learning_rate": 4.73277742142399e-06, "loss": 0.9521, "step": 8270 }, { "epoch": 0.10084945096462043, "grad_norm": 2.281622886657715, "learning_rate": 4.732456703014753e-06, "loss": 0.9094, "step": 8275 }, { "epoch": 0.10091038718876824, "grad_norm": 1.8813058137893677, "learning_rate": 4.732135984605517e-06, "loss": 0.9427, "step": 8280 }, { "epoch": 0.10097132341291604, "grad_norm": 2.137038230895996, "learning_rate": 4.73181526619628e-06, "loss": 0.9183, "step": 8285 }, { "epoch": 0.10103225963706385, "grad_norm": 2.013803243637085, "learning_rate": 4.731494547787044e-06, "loss": 0.8682, "step": 8290 }, { "epoch": 0.10109319586121165, "grad_norm": 2.1299915313720703, "learning_rate": 4.731173829377807e-06, "loss": 0.9129, "step": 8295 }, { "epoch": 0.10115413208535946, "grad_norm": 1.7628681659698486, "learning_rate": 4.73085311096857e-06, "loss": 0.9425, "step": 8300 }, { "epoch": 0.10121506830950727, "grad_norm": 2.2011988162994385, "learning_rate": 4.730532392559334e-06, "loss": 0.9456, "step": 8305 }, { "epoch": 0.10127600453365508, "grad_norm": 2.090008020401001, "learning_rate": 4.730211674150097e-06, "loss": 0.8824, "step": 8310 }, { "epoch": 0.10133694075780289, "grad_norm": 2.1633684635162354, "learning_rate": 4.7298909557408605e-06, "loss": 0.9289, "step": 8315 }, { "epoch": 0.10139787698195069, "grad_norm": 1.915521502494812, "learning_rate": 4.7295702373316235e-06, "loss": 0.9418, "step": 8320 }, { "epoch": 0.1014588132060985, "grad_norm": 2.534162759780884, "learning_rate": 4.7292495189223865e-06, "loss": 0.9973, "step": 8325 }, { "epoch": 0.1015197494302463, "grad_norm": 2.0982794761657715, "learning_rate": 4.72892880051315e-06, "loss": 0.9422, "step": 8330 }, { "epoch": 0.10158068565439411, "grad_norm": 1.723221778869629, "learning_rate": 4.728608082103913e-06, "loss": 0.9044, "step": 8335 }, { "epoch": 0.10164162187854192, "grad_norm": 2.0464704036712646, "learning_rate": 4.728287363694676e-06, "loss": 0.8624, "step": 8340 }, { "epoch": 0.10170255810268973, "grad_norm": 2.0106399059295654, "learning_rate": 4.72796664528544e-06, "loss": 0.8766, "step": 8345 }, { "epoch": 0.10176349432683753, "grad_norm": 1.7106642723083496, "learning_rate": 4.727645926876203e-06, "loss": 0.9186, "step": 8350 }, { "epoch": 0.10182443055098533, "grad_norm": 1.933763861656189, "learning_rate": 4.727325208466966e-06, "loss": 0.9405, "step": 8355 }, { "epoch": 0.10188536677513314, "grad_norm": 1.952106237411499, "learning_rate": 4.727004490057729e-06, "loss": 0.9308, "step": 8360 }, { "epoch": 0.10194630299928095, "grad_norm": 1.8675868511199951, "learning_rate": 4.726683771648493e-06, "loss": 0.8666, "step": 8365 }, { "epoch": 0.10200723922342876, "grad_norm": 1.9538604021072388, "learning_rate": 4.726363053239256e-06, "loss": 0.9626, "step": 8370 }, { "epoch": 0.10206817544757657, "grad_norm": 2.164177417755127, "learning_rate": 4.726042334830019e-06, "loss": 0.9281, "step": 8375 }, { "epoch": 0.10212911167172438, "grad_norm": 2.079716205596924, "learning_rate": 4.725721616420783e-06, "loss": 0.9062, "step": 8380 }, { "epoch": 0.10219004789587217, "grad_norm": 2.1122231483459473, "learning_rate": 4.725400898011546e-06, "loss": 0.8971, "step": 8385 }, { "epoch": 0.10225098412001998, "grad_norm": 1.94412100315094, "learning_rate": 4.725080179602309e-06, "loss": 0.9775, "step": 8390 }, { "epoch": 0.10231192034416779, "grad_norm": 1.8804377317428589, "learning_rate": 4.724759461193073e-06, "loss": 0.8966, "step": 8395 }, { "epoch": 0.1023728565683156, "grad_norm": 2.111886501312256, "learning_rate": 4.724438742783836e-06, "loss": 0.9226, "step": 8400 }, { "epoch": 0.10243379279246341, "grad_norm": 1.907268762588501, "learning_rate": 4.724118024374599e-06, "loss": 0.9056, "step": 8405 }, { "epoch": 0.10249472901661122, "grad_norm": 2.2375073432922363, "learning_rate": 4.723797305965363e-06, "loss": 0.8765, "step": 8410 }, { "epoch": 0.10255566524075903, "grad_norm": 1.7985740900039673, "learning_rate": 4.723476587556126e-06, "loss": 0.9213, "step": 8415 }, { "epoch": 0.10261660146490682, "grad_norm": 2.4621880054473877, "learning_rate": 4.723155869146889e-06, "loss": 0.8999, "step": 8420 }, { "epoch": 0.10267753768905463, "grad_norm": 2.0128042697906494, "learning_rate": 4.722835150737653e-06, "loss": 0.9362, "step": 8425 }, { "epoch": 0.10273847391320244, "grad_norm": 2.057873249053955, "learning_rate": 4.722514432328416e-06, "loss": 0.8582, "step": 8430 }, { "epoch": 0.10279941013735025, "grad_norm": 2.1493334770202637, "learning_rate": 4.72219371391918e-06, "loss": 0.9137, "step": 8435 }, { "epoch": 0.10286034636149806, "grad_norm": 2.013162612915039, "learning_rate": 4.721872995509943e-06, "loss": 0.9461, "step": 8440 }, { "epoch": 0.10292128258564587, "grad_norm": 2.1264898777008057, "learning_rate": 4.721552277100706e-06, "loss": 0.9174, "step": 8445 }, { "epoch": 0.10298221880979366, "grad_norm": 1.7873125076293945, "learning_rate": 4.7212315586914695e-06, "loss": 0.953, "step": 8450 }, { "epoch": 0.10304315503394147, "grad_norm": 1.9829106330871582, "learning_rate": 4.7209108402822326e-06, "loss": 0.9002, "step": 8455 }, { "epoch": 0.10310409125808928, "grad_norm": 1.9847782850265503, "learning_rate": 4.7205901218729964e-06, "loss": 0.883, "step": 8460 }, { "epoch": 0.10316502748223709, "grad_norm": 1.9709986448287964, "learning_rate": 4.7202694034637594e-06, "loss": 0.8996, "step": 8465 }, { "epoch": 0.1032259637063849, "grad_norm": 2.243861675262451, "learning_rate": 4.7199486850545225e-06, "loss": 0.8688, "step": 8470 }, { "epoch": 0.10328689993053271, "grad_norm": 2.0623607635498047, "learning_rate": 4.719627966645286e-06, "loss": 0.9445, "step": 8475 }, { "epoch": 0.10334783615468052, "grad_norm": 2.3858582973480225, "learning_rate": 4.719307248236049e-06, "loss": 0.9482, "step": 8480 }, { "epoch": 0.10340877237882831, "grad_norm": 1.9949631690979004, "learning_rate": 4.718986529826812e-06, "loss": 1.0407, "step": 8485 }, { "epoch": 0.10346970860297612, "grad_norm": 1.771828532218933, "learning_rate": 4.718665811417576e-06, "loss": 0.8925, "step": 8490 }, { "epoch": 0.10353064482712393, "grad_norm": 1.801328182220459, "learning_rate": 4.718345093008339e-06, "loss": 0.9454, "step": 8495 }, { "epoch": 0.10359158105127174, "grad_norm": 1.9129337072372437, "learning_rate": 4.718024374599102e-06, "loss": 0.8845, "step": 8500 }, { "epoch": 0.10365251727541955, "grad_norm": 1.7861639261245728, "learning_rate": 4.717703656189866e-06, "loss": 0.8786, "step": 8505 }, { "epoch": 0.10371345349956736, "grad_norm": 2.0920839309692383, "learning_rate": 4.717382937780629e-06, "loss": 0.8753, "step": 8510 }, { "epoch": 0.10377438972371517, "grad_norm": 2.259150981903076, "learning_rate": 4.717062219371392e-06, "loss": 0.901, "step": 8515 }, { "epoch": 0.10383532594786296, "grad_norm": 2.0308563709259033, "learning_rate": 4.716741500962156e-06, "loss": 0.8956, "step": 8520 }, { "epoch": 0.10389626217201077, "grad_norm": 2.2257537841796875, "learning_rate": 4.716420782552919e-06, "loss": 0.9338, "step": 8525 }, { "epoch": 0.10395719839615858, "grad_norm": 2.4234044551849365, "learning_rate": 4.716100064143682e-06, "loss": 0.9623, "step": 8530 }, { "epoch": 0.10401813462030639, "grad_norm": 1.7508103847503662, "learning_rate": 4.715779345734445e-06, "loss": 0.9441, "step": 8535 }, { "epoch": 0.1040790708444542, "grad_norm": 2.0962331295013428, "learning_rate": 4.715458627325209e-06, "loss": 0.9369, "step": 8540 }, { "epoch": 0.104140007068602, "grad_norm": 2.57608699798584, "learning_rate": 4.715137908915972e-06, "loss": 0.878, "step": 8545 }, { "epoch": 0.10420094329274981, "grad_norm": 1.4122627973556519, "learning_rate": 4.714817190506735e-06, "loss": 0.948, "step": 8550 }, { "epoch": 0.10426187951689761, "grad_norm": 2.0487003326416016, "learning_rate": 4.714496472097499e-06, "loss": 0.8674, "step": 8555 }, { "epoch": 0.10432281574104542, "grad_norm": 2.05422306060791, "learning_rate": 4.714175753688262e-06, "loss": 0.947, "step": 8560 }, { "epoch": 0.10438375196519323, "grad_norm": 2.2921335697174072, "learning_rate": 4.713855035279025e-06, "loss": 0.9365, "step": 8565 }, { "epoch": 0.10444468818934104, "grad_norm": 1.9454784393310547, "learning_rate": 4.713534316869789e-06, "loss": 0.9512, "step": 8570 }, { "epoch": 0.10450562441348885, "grad_norm": 2.273024320602417, "learning_rate": 4.713213598460552e-06, "loss": 0.9612, "step": 8575 }, { "epoch": 0.10456656063763665, "grad_norm": 2.373622179031372, "learning_rate": 4.712892880051315e-06, "loss": 0.9814, "step": 8580 }, { "epoch": 0.10462749686178445, "grad_norm": 2.14728045463562, "learning_rate": 4.712572161642079e-06, "loss": 0.9376, "step": 8585 }, { "epoch": 0.10468843308593226, "grad_norm": 2.0130231380462646, "learning_rate": 4.712251443232842e-06, "loss": 0.964, "step": 8590 }, { "epoch": 0.10474936931008007, "grad_norm": 2.24383544921875, "learning_rate": 4.7119307248236055e-06, "loss": 0.9583, "step": 8595 }, { "epoch": 0.10481030553422788, "grad_norm": 2.1340322494506836, "learning_rate": 4.7116100064143685e-06, "loss": 0.9356, "step": 8600 }, { "epoch": 0.10487124175837569, "grad_norm": 2.0383927822113037, "learning_rate": 4.711289288005132e-06, "loss": 0.8983, "step": 8605 }, { "epoch": 0.1049321779825235, "grad_norm": 1.886786699295044, "learning_rate": 4.710968569595895e-06, "loss": 0.8821, "step": 8610 }, { "epoch": 0.1049931142066713, "grad_norm": 2.8415093421936035, "learning_rate": 4.710647851186658e-06, "loss": 1.0096, "step": 8615 }, { "epoch": 0.1050540504308191, "grad_norm": 1.9301505088806152, "learning_rate": 4.710327132777422e-06, "loss": 0.9409, "step": 8620 }, { "epoch": 0.10511498665496691, "grad_norm": 1.9805537462234497, "learning_rate": 4.710006414368185e-06, "loss": 0.9074, "step": 8625 }, { "epoch": 0.10517592287911472, "grad_norm": 1.8591296672821045, "learning_rate": 4.709685695958948e-06, "loss": 0.9034, "step": 8630 }, { "epoch": 0.10523685910326253, "grad_norm": 1.7955665588378906, "learning_rate": 4.709364977549712e-06, "loss": 0.9127, "step": 8635 }, { "epoch": 0.10529779532741033, "grad_norm": 2.5051748752593994, "learning_rate": 4.709044259140475e-06, "loss": 0.9178, "step": 8640 }, { "epoch": 0.10535873155155814, "grad_norm": 2.084951639175415, "learning_rate": 4.708723540731238e-06, "loss": 0.9538, "step": 8645 }, { "epoch": 0.10541966777570595, "grad_norm": 1.8253487348556519, "learning_rate": 4.708402822322002e-06, "loss": 0.9624, "step": 8650 }, { "epoch": 0.10548060399985375, "grad_norm": 1.9926048517227173, "learning_rate": 4.708082103912765e-06, "loss": 0.8531, "step": 8655 }, { "epoch": 0.10554154022400156, "grad_norm": 1.608812689781189, "learning_rate": 4.707761385503528e-06, "loss": 0.9197, "step": 8660 }, { "epoch": 0.10560247644814937, "grad_norm": 2.028993606567383, "learning_rate": 4.707440667094292e-06, "loss": 0.8848, "step": 8665 }, { "epoch": 0.10566341267229717, "grad_norm": 1.7788608074188232, "learning_rate": 4.707119948685055e-06, "loss": 0.8961, "step": 8670 }, { "epoch": 0.10572434889644498, "grad_norm": 2.062516689300537, "learning_rate": 4.706799230275818e-06, "loss": 0.9206, "step": 8675 }, { "epoch": 0.10578528512059279, "grad_norm": 2.3905508518218994, "learning_rate": 4.706478511866582e-06, "loss": 0.9224, "step": 8680 }, { "epoch": 0.1058462213447406, "grad_norm": 2.0313804149627686, "learning_rate": 4.706157793457345e-06, "loss": 0.8911, "step": 8685 }, { "epoch": 0.1059071575688884, "grad_norm": 2.1226439476013184, "learning_rate": 4.705837075048108e-06, "loss": 0.8701, "step": 8690 }, { "epoch": 0.1059680937930362, "grad_norm": 1.8921107053756714, "learning_rate": 4.705516356638871e-06, "loss": 0.8865, "step": 8695 }, { "epoch": 0.10602903001718401, "grad_norm": 2.1945528984069824, "learning_rate": 4.705195638229635e-06, "loss": 0.8622, "step": 8700 }, { "epoch": 0.10608996624133182, "grad_norm": 1.9721648693084717, "learning_rate": 4.704874919820398e-06, "loss": 0.9495, "step": 8705 }, { "epoch": 0.10615090246547963, "grad_norm": 2.362914800643921, "learning_rate": 4.704554201411161e-06, "loss": 0.9749, "step": 8710 }, { "epoch": 0.10621183868962744, "grad_norm": 2.0176494121551514, "learning_rate": 4.704233483001925e-06, "loss": 0.9392, "step": 8715 }, { "epoch": 0.10627277491377524, "grad_norm": 1.9091452360153198, "learning_rate": 4.703912764592688e-06, "loss": 0.9511, "step": 8720 }, { "epoch": 0.10633371113792305, "grad_norm": 2.334660530090332, "learning_rate": 4.703592046183451e-06, "loss": 0.9379, "step": 8725 }, { "epoch": 0.10639464736207085, "grad_norm": 1.8132244348526, "learning_rate": 4.703271327774215e-06, "loss": 0.9295, "step": 8730 }, { "epoch": 0.10645558358621866, "grad_norm": 1.9874327182769775, "learning_rate": 4.702950609364978e-06, "loss": 1.0099, "step": 8735 }, { "epoch": 0.10651651981036647, "grad_norm": 2.3487184047698975, "learning_rate": 4.7026298909557415e-06, "loss": 0.8699, "step": 8740 }, { "epoch": 0.10657745603451428, "grad_norm": 1.9554133415222168, "learning_rate": 4.7023091725465045e-06, "loss": 0.8675, "step": 8745 }, { "epoch": 0.10663839225866209, "grad_norm": 2.011171817779541, "learning_rate": 4.7019884541372675e-06, "loss": 0.9071, "step": 8750 }, { "epoch": 0.10669932848280989, "grad_norm": 2.0520031452178955, "learning_rate": 4.701667735728031e-06, "loss": 0.8319, "step": 8755 }, { "epoch": 0.1067602647069577, "grad_norm": 1.8349335193634033, "learning_rate": 4.701347017318794e-06, "loss": 0.9501, "step": 8760 }, { "epoch": 0.1068212009311055, "grad_norm": 2.1500792503356934, "learning_rate": 4.701026298909558e-06, "loss": 0.9643, "step": 8765 }, { "epoch": 0.10688213715525331, "grad_norm": 2.229980945587158, "learning_rate": 4.700705580500321e-06, "loss": 0.9771, "step": 8770 }, { "epoch": 0.10694307337940112, "grad_norm": 1.972834587097168, "learning_rate": 4.700384862091084e-06, "loss": 0.9233, "step": 8775 }, { "epoch": 0.10700400960354893, "grad_norm": 1.9759653806686401, "learning_rate": 4.700064143681848e-06, "loss": 0.9168, "step": 8780 }, { "epoch": 0.10706494582769674, "grad_norm": 2.271766424179077, "learning_rate": 4.699743425272611e-06, "loss": 0.9794, "step": 8785 }, { "epoch": 0.10712588205184453, "grad_norm": 2.166882276535034, "learning_rate": 4.699422706863374e-06, "loss": 0.9148, "step": 8790 }, { "epoch": 0.10718681827599234, "grad_norm": 1.9247188568115234, "learning_rate": 4.699101988454138e-06, "loss": 0.8595, "step": 8795 }, { "epoch": 0.10724775450014015, "grad_norm": 1.9505727291107178, "learning_rate": 4.698781270044901e-06, "loss": 0.9009, "step": 8800 }, { "epoch": 0.10730869072428796, "grad_norm": 1.8629591464996338, "learning_rate": 4.698460551635664e-06, "loss": 0.896, "step": 8805 }, { "epoch": 0.10736962694843577, "grad_norm": 1.9186738729476929, "learning_rate": 4.698139833226428e-06, "loss": 0.8621, "step": 8810 }, { "epoch": 0.10743056317258358, "grad_norm": 1.8040310144424438, "learning_rate": 4.697819114817191e-06, "loss": 0.938, "step": 8815 }, { "epoch": 0.10749149939673137, "grad_norm": 2.1483869552612305, "learning_rate": 4.697498396407954e-06, "loss": 0.9993, "step": 8820 }, { "epoch": 0.10755243562087918, "grad_norm": 2.3203861713409424, "learning_rate": 4.697177677998718e-06, "loss": 0.9322, "step": 8825 }, { "epoch": 0.10761337184502699, "grad_norm": 2.322564125061035, "learning_rate": 4.696856959589481e-06, "loss": 0.9818, "step": 8830 }, { "epoch": 0.1076743080691748, "grad_norm": 2.2600574493408203, "learning_rate": 4.696536241180244e-06, "loss": 0.9023, "step": 8835 }, { "epoch": 0.10773524429332261, "grad_norm": 2.2328736782073975, "learning_rate": 4.696215522771008e-06, "loss": 0.9291, "step": 8840 }, { "epoch": 0.10779618051747042, "grad_norm": 1.9144799709320068, "learning_rate": 4.695894804361771e-06, "loss": 0.8776, "step": 8845 }, { "epoch": 0.10785711674161823, "grad_norm": 2.228687286376953, "learning_rate": 4.695574085952534e-06, "loss": 0.8966, "step": 8850 }, { "epoch": 0.10791805296576602, "grad_norm": 2.216179370880127, "learning_rate": 4.695253367543298e-06, "loss": 0.9976, "step": 8855 }, { "epoch": 0.10797898918991383, "grad_norm": 2.129688024520874, "learning_rate": 4.694932649134061e-06, "loss": 0.912, "step": 8860 }, { "epoch": 0.10803992541406164, "grad_norm": 2.51928448677063, "learning_rate": 4.694611930724824e-06, "loss": 0.8829, "step": 8865 }, { "epoch": 0.10810086163820945, "grad_norm": 1.7868096828460693, "learning_rate": 4.694291212315587e-06, "loss": 0.9392, "step": 8870 }, { "epoch": 0.10816179786235726, "grad_norm": 2.2969648838043213, "learning_rate": 4.6939704939063505e-06, "loss": 0.9355, "step": 8875 }, { "epoch": 0.10822273408650507, "grad_norm": 2.531519889831543, "learning_rate": 4.6936497754971136e-06, "loss": 0.925, "step": 8880 }, { "epoch": 0.10828367031065288, "grad_norm": 1.9414974451065063, "learning_rate": 4.693329057087877e-06, "loss": 0.935, "step": 8885 }, { "epoch": 0.10834460653480067, "grad_norm": 2.0161936283111572, "learning_rate": 4.6930083386786404e-06, "loss": 0.8683, "step": 8890 }, { "epoch": 0.10840554275894848, "grad_norm": 2.134080648422241, "learning_rate": 4.6926876202694035e-06, "loss": 0.8908, "step": 8895 }, { "epoch": 0.10846647898309629, "grad_norm": 2.0823206901550293, "learning_rate": 4.692366901860167e-06, "loss": 1.0312, "step": 8900 }, { "epoch": 0.1085274152072441, "grad_norm": 2.5434093475341797, "learning_rate": 4.69204618345093e-06, "loss": 0.8927, "step": 8905 }, { "epoch": 0.10858835143139191, "grad_norm": 1.8958841562271118, "learning_rate": 4.691725465041694e-06, "loss": 0.9106, "step": 8910 }, { "epoch": 0.10864928765553972, "grad_norm": 2.0120837688446045, "learning_rate": 4.691404746632457e-06, "loss": 0.9412, "step": 8915 }, { "epoch": 0.10871022387968753, "grad_norm": 2.5118181705474854, "learning_rate": 4.69108402822322e-06, "loss": 0.9118, "step": 8920 }, { "epoch": 0.10877116010383532, "grad_norm": 2.21207857131958, "learning_rate": 4.690763309813984e-06, "loss": 0.9596, "step": 8925 }, { "epoch": 0.10883209632798313, "grad_norm": 1.9944136142730713, "learning_rate": 4.690442591404747e-06, "loss": 0.883, "step": 8930 }, { "epoch": 0.10889303255213094, "grad_norm": 2.596531629562378, "learning_rate": 4.690121872995511e-06, "loss": 0.8879, "step": 8935 }, { "epoch": 0.10895396877627875, "grad_norm": 1.8541334867477417, "learning_rate": 4.689801154586274e-06, "loss": 0.9456, "step": 8940 }, { "epoch": 0.10901490500042656, "grad_norm": 1.9205018281936646, "learning_rate": 4.689480436177037e-06, "loss": 0.8843, "step": 8945 }, { "epoch": 0.10907584122457437, "grad_norm": 1.8966398239135742, "learning_rate": 4.6891597177678e-06, "loss": 0.9182, "step": 8950 }, { "epoch": 0.10913677744872216, "grad_norm": 1.982514500617981, "learning_rate": 4.688838999358564e-06, "loss": 0.9616, "step": 8955 }, { "epoch": 0.10919771367286997, "grad_norm": 1.9001080989837646, "learning_rate": 4.688518280949327e-06, "loss": 0.9336, "step": 8960 }, { "epoch": 0.10925864989701778, "grad_norm": 2.1538751125335693, "learning_rate": 4.68819756254009e-06, "loss": 0.9369, "step": 8965 }, { "epoch": 0.10931958612116559, "grad_norm": 1.8569443225860596, "learning_rate": 4.687876844130854e-06, "loss": 0.9464, "step": 8970 }, { "epoch": 0.1093805223453134, "grad_norm": 1.9878102540969849, "learning_rate": 4.687556125721617e-06, "loss": 0.9578, "step": 8975 }, { "epoch": 0.1094414585694612, "grad_norm": 1.9622222185134888, "learning_rate": 4.68723540731238e-06, "loss": 0.9452, "step": 8980 }, { "epoch": 0.10950239479360901, "grad_norm": 2.2841594219207764, "learning_rate": 4.686914688903144e-06, "loss": 0.9049, "step": 8985 }, { "epoch": 0.10956333101775681, "grad_norm": 2.296674966812134, "learning_rate": 4.686593970493907e-06, "loss": 0.899, "step": 8990 }, { "epoch": 0.10962426724190462, "grad_norm": 1.8626673221588135, "learning_rate": 4.68627325208467e-06, "loss": 0.9214, "step": 8995 }, { "epoch": 0.10968520346605243, "grad_norm": 2.091069459915161, "learning_rate": 4.685952533675434e-06, "loss": 0.7908, "step": 9000 }, { "epoch": 0.10974613969020024, "grad_norm": 2.104238748550415, "learning_rate": 4.685631815266197e-06, "loss": 0.8986, "step": 9005 }, { "epoch": 0.10980707591434805, "grad_norm": 1.8643653392791748, "learning_rate": 4.68531109685696e-06, "loss": 0.9476, "step": 9010 }, { "epoch": 0.10986801213849585, "grad_norm": 2.2153775691986084, "learning_rate": 4.6849903784477235e-06, "loss": 0.9271, "step": 9015 }, { "epoch": 0.10992894836264366, "grad_norm": 2.354647159576416, "learning_rate": 4.6846696600384865e-06, "loss": 0.949, "step": 9020 }, { "epoch": 0.10998988458679146, "grad_norm": 1.9722691774368286, "learning_rate": 4.6843489416292495e-06, "loss": 0.9375, "step": 9025 }, { "epoch": 0.11005082081093927, "grad_norm": 1.8722853660583496, "learning_rate": 4.6840282232200125e-06, "loss": 0.9361, "step": 9030 }, { "epoch": 0.11011175703508708, "grad_norm": 1.9679877758026123, "learning_rate": 4.683707504810776e-06, "loss": 0.9409, "step": 9035 }, { "epoch": 0.11017269325923489, "grad_norm": 2.148055076599121, "learning_rate": 4.683386786401539e-06, "loss": 0.9988, "step": 9040 }, { "epoch": 0.1102336294833827, "grad_norm": 2.187908887863159, "learning_rate": 4.683066067992303e-06, "loss": 0.8777, "step": 9045 }, { "epoch": 0.1102945657075305, "grad_norm": 2.1598691940307617, "learning_rate": 4.682745349583066e-06, "loss": 0.9972, "step": 9050 }, { "epoch": 0.1103555019316783, "grad_norm": 2.1870830059051514, "learning_rate": 4.682424631173829e-06, "loss": 0.9472, "step": 9055 }, { "epoch": 0.11041643815582611, "grad_norm": 2.0024218559265137, "learning_rate": 4.682103912764593e-06, "loss": 0.9458, "step": 9060 }, { "epoch": 0.11047737437997392, "grad_norm": 2.1309988498687744, "learning_rate": 4.681783194355356e-06, "loss": 0.9614, "step": 9065 }, { "epoch": 0.11053831060412173, "grad_norm": 2.16911244392395, "learning_rate": 4.68146247594612e-06, "loss": 0.8717, "step": 9070 }, { "epoch": 0.11059924682826953, "grad_norm": 1.9830567836761475, "learning_rate": 4.681141757536883e-06, "loss": 0.8951, "step": 9075 }, { "epoch": 0.11066018305241734, "grad_norm": 1.9164162874221802, "learning_rate": 4.680821039127647e-06, "loss": 0.8759, "step": 9080 }, { "epoch": 0.11072111927656515, "grad_norm": 1.8835424184799194, "learning_rate": 4.68050032071841e-06, "loss": 0.8522, "step": 9085 }, { "epoch": 0.11078205550071295, "grad_norm": 2.0209243297576904, "learning_rate": 4.680179602309173e-06, "loss": 0.8784, "step": 9090 }, { "epoch": 0.11084299172486076, "grad_norm": 1.9588443040847778, "learning_rate": 4.679858883899937e-06, "loss": 0.8782, "step": 9095 }, { "epoch": 0.11090392794900857, "grad_norm": 3.047943353652954, "learning_rate": 4.6795381654907e-06, "loss": 0.9327, "step": 9100 }, { "epoch": 0.11096486417315637, "grad_norm": 1.9266809225082397, "learning_rate": 4.679217447081463e-06, "loss": 0.896, "step": 9105 }, { "epoch": 0.11102580039730418, "grad_norm": 2.1279735565185547, "learning_rate": 4.678896728672227e-06, "loss": 0.9468, "step": 9110 }, { "epoch": 0.11108673662145199, "grad_norm": 1.9506089687347412, "learning_rate": 4.67857601026299e-06, "loss": 1.0036, "step": 9115 }, { "epoch": 0.1111476728455998, "grad_norm": 2.0431432723999023, "learning_rate": 4.678255291853753e-06, "loss": 0.9106, "step": 9120 }, { "epoch": 0.1112086090697476, "grad_norm": 2.672241687774658, "learning_rate": 4.677934573444516e-06, "loss": 1.0053, "step": 9125 }, { "epoch": 0.1112695452938954, "grad_norm": 2.221660614013672, "learning_rate": 4.67761385503528e-06, "loss": 0.967, "step": 9130 }, { "epoch": 0.11133048151804321, "grad_norm": 2.272958993911743, "learning_rate": 4.677293136626043e-06, "loss": 0.8787, "step": 9135 }, { "epoch": 0.11139141774219102, "grad_norm": 2.143483877182007, "learning_rate": 4.676972418216806e-06, "loss": 0.9197, "step": 9140 }, { "epoch": 0.11145235396633883, "grad_norm": 2.276283025741577, "learning_rate": 4.6766516998075695e-06, "loss": 0.8678, "step": 9145 }, { "epoch": 0.11151329019048664, "grad_norm": 1.7546840906143188, "learning_rate": 4.6763309813983326e-06, "loss": 0.9093, "step": 9150 }, { "epoch": 0.11157422641463445, "grad_norm": 1.925848126411438, "learning_rate": 4.6760102629890956e-06, "loss": 0.9249, "step": 9155 }, { "epoch": 0.11163516263878225, "grad_norm": 2.0787601470947266, "learning_rate": 4.6756895445798594e-06, "loss": 0.8966, "step": 9160 }, { "epoch": 0.11169609886293005, "grad_norm": 2.062063694000244, "learning_rate": 4.6753688261706225e-06, "loss": 0.8691, "step": 9165 }, { "epoch": 0.11175703508707786, "grad_norm": 2.0217626094818115, "learning_rate": 4.6750481077613855e-06, "loss": 0.9077, "step": 9170 }, { "epoch": 0.11181797131122567, "grad_norm": 1.8872931003570557, "learning_rate": 4.674727389352149e-06, "loss": 0.9269, "step": 9175 }, { "epoch": 0.11187890753537348, "grad_norm": 2.5690670013427734, "learning_rate": 4.674406670942912e-06, "loss": 1.0057, "step": 9180 }, { "epoch": 0.11193984375952129, "grad_norm": 2.168888568878174, "learning_rate": 4.674085952533675e-06, "loss": 0.9176, "step": 9185 }, { "epoch": 0.11200077998366909, "grad_norm": 2.272639274597168, "learning_rate": 4.673765234124439e-06, "loss": 0.8694, "step": 9190 }, { "epoch": 0.1120617162078169, "grad_norm": 1.8720009326934814, "learning_rate": 4.673444515715202e-06, "loss": 0.8707, "step": 9195 }, { "epoch": 0.1121226524319647, "grad_norm": 1.9712108373641968, "learning_rate": 4.673123797305965e-06, "loss": 0.9728, "step": 9200 }, { "epoch": 0.11218358865611251, "grad_norm": 1.9777642488479614, "learning_rate": 4.672803078896729e-06, "loss": 0.9101, "step": 9205 }, { "epoch": 0.11224452488026032, "grad_norm": 1.9033690690994263, "learning_rate": 4.672482360487492e-06, "loss": 0.9168, "step": 9210 }, { "epoch": 0.11230546110440813, "grad_norm": 2.10374116897583, "learning_rate": 4.672161642078256e-06, "loss": 0.9206, "step": 9215 }, { "epoch": 0.11236639732855594, "grad_norm": 2.031608819961548, "learning_rate": 4.671840923669019e-06, "loss": 0.9603, "step": 9220 }, { "epoch": 0.11242733355270373, "grad_norm": 1.8009127378463745, "learning_rate": 4.671520205259782e-06, "loss": 0.8772, "step": 9225 }, { "epoch": 0.11248826977685154, "grad_norm": 1.927183747291565, "learning_rate": 4.671199486850546e-06, "loss": 0.9071, "step": 9230 }, { "epoch": 0.11254920600099935, "grad_norm": 2.0184876918792725, "learning_rate": 4.670878768441309e-06, "loss": 0.9197, "step": 9235 }, { "epoch": 0.11261014222514716, "grad_norm": 1.8451043367385864, "learning_rate": 4.670558050032073e-06, "loss": 0.8791, "step": 9240 }, { "epoch": 0.11267107844929497, "grad_norm": 2.3858132362365723, "learning_rate": 4.670237331622836e-06, "loss": 0.932, "step": 9245 }, { "epoch": 0.11273201467344278, "grad_norm": 2.046093702316284, "learning_rate": 4.669916613213599e-06, "loss": 0.9136, "step": 9250 }, { "epoch": 0.11279295089759059, "grad_norm": 1.820855736732483, "learning_rate": 4.669595894804363e-06, "loss": 0.9631, "step": 9255 }, { "epoch": 0.11285388712173838, "grad_norm": 2.009413719177246, "learning_rate": 4.669275176395126e-06, "loss": 1.0024, "step": 9260 }, { "epoch": 0.11291482334588619, "grad_norm": 1.9854352474212646, "learning_rate": 4.668954457985889e-06, "loss": 0.9197, "step": 9265 }, { "epoch": 0.112975759570034, "grad_norm": 2.000741720199585, "learning_rate": 4.668633739576653e-06, "loss": 1.0011, "step": 9270 }, { "epoch": 0.11303669579418181, "grad_norm": 1.8249249458312988, "learning_rate": 4.668313021167416e-06, "loss": 1.0124, "step": 9275 }, { "epoch": 0.11309763201832962, "grad_norm": 2.2201225757598877, "learning_rate": 4.667992302758179e-06, "loss": 0.8756, "step": 9280 }, { "epoch": 0.11315856824247743, "grad_norm": 2.1351702213287354, "learning_rate": 4.667671584348942e-06, "loss": 0.9641, "step": 9285 }, { "epoch": 0.11321950446662522, "grad_norm": 1.9767404794692993, "learning_rate": 4.6673508659397055e-06, "loss": 0.876, "step": 9290 }, { "epoch": 0.11328044069077303, "grad_norm": 1.9406790733337402, "learning_rate": 4.6670301475304685e-06, "loss": 0.8821, "step": 9295 }, { "epoch": 0.11334137691492084, "grad_norm": 1.779618501663208, "learning_rate": 4.6667094291212315e-06, "loss": 0.9478, "step": 9300 }, { "epoch": 0.11340231313906865, "grad_norm": 2.259201765060425, "learning_rate": 4.666388710711995e-06, "loss": 0.8566, "step": 9305 }, { "epoch": 0.11346324936321646, "grad_norm": 1.838748812675476, "learning_rate": 4.666067992302758e-06, "loss": 0.9328, "step": 9310 }, { "epoch": 0.11352418558736427, "grad_norm": 2.035979747772217, "learning_rate": 4.6657472738935214e-06, "loss": 0.8948, "step": 9315 }, { "epoch": 0.11358512181151208, "grad_norm": 1.9157731533050537, "learning_rate": 4.665426555484285e-06, "loss": 0.9934, "step": 9320 }, { "epoch": 0.11364605803565987, "grad_norm": 2.020784378051758, "learning_rate": 4.665105837075048e-06, "loss": 0.9233, "step": 9325 }, { "epoch": 0.11370699425980768, "grad_norm": 2.1390113830566406, "learning_rate": 4.664785118665811e-06, "loss": 0.9375, "step": 9330 }, { "epoch": 0.11376793048395549, "grad_norm": 1.959855079650879, "learning_rate": 4.664464400256575e-06, "loss": 0.924, "step": 9335 }, { "epoch": 0.1138288667081033, "grad_norm": 1.97223961353302, "learning_rate": 4.664143681847338e-06, "loss": 0.8793, "step": 9340 }, { "epoch": 0.11388980293225111, "grad_norm": 2.327636480331421, "learning_rate": 4.663822963438101e-06, "loss": 0.8706, "step": 9345 }, { "epoch": 0.11395073915639892, "grad_norm": 2.082658052444458, "learning_rate": 4.663502245028865e-06, "loss": 0.8575, "step": 9350 }, { "epoch": 0.11401167538054673, "grad_norm": 1.8172781467437744, "learning_rate": 4.663181526619628e-06, "loss": 0.934, "step": 9355 }, { "epoch": 0.11407261160469452, "grad_norm": 1.700541377067566, "learning_rate": 4.662860808210392e-06, "loss": 0.8789, "step": 9360 }, { "epoch": 0.11413354782884233, "grad_norm": 2.193244457244873, "learning_rate": 4.662540089801155e-06, "loss": 0.9106, "step": 9365 }, { "epoch": 0.11419448405299014, "grad_norm": 1.9993181228637695, "learning_rate": 4.662219371391918e-06, "loss": 0.8906, "step": 9370 }, { "epoch": 0.11425542027713795, "grad_norm": 1.853687047958374, "learning_rate": 4.661898652982682e-06, "loss": 0.9824, "step": 9375 }, { "epoch": 0.11431635650128576, "grad_norm": 1.8884377479553223, "learning_rate": 4.661577934573445e-06, "loss": 0.8926, "step": 9380 }, { "epoch": 0.11437729272543357, "grad_norm": 1.7072685956954956, "learning_rate": 4.661257216164209e-06, "loss": 0.8991, "step": 9385 }, { "epoch": 0.11443822894958137, "grad_norm": 2.2759056091308594, "learning_rate": 4.660936497754972e-06, "loss": 0.9413, "step": 9390 }, { "epoch": 0.11449916517372917, "grad_norm": 2.0962724685668945, "learning_rate": 4.660615779345735e-06, "loss": 0.8852, "step": 9395 }, { "epoch": 0.11456010139787698, "grad_norm": 2.149770975112915, "learning_rate": 4.660295060936499e-06, "loss": 0.8945, "step": 9400 }, { "epoch": 0.11462103762202479, "grad_norm": 2.4796433448791504, "learning_rate": 4.659974342527262e-06, "loss": 0.9667, "step": 9405 }, { "epoch": 0.1146819738461726, "grad_norm": 2.122176170349121, "learning_rate": 4.659653624118025e-06, "loss": 0.9241, "step": 9410 }, { "epoch": 0.1147429100703204, "grad_norm": 2.2734627723693848, "learning_rate": 4.6593329057087885e-06, "loss": 0.8584, "step": 9415 }, { "epoch": 0.11480384629446821, "grad_norm": 2.147123336791992, "learning_rate": 4.6590121872995516e-06, "loss": 0.9612, "step": 9420 }, { "epoch": 0.11486478251861601, "grad_norm": 2.1479368209838867, "learning_rate": 4.6586914688903146e-06, "loss": 0.8682, "step": 9425 }, { "epoch": 0.11492571874276382, "grad_norm": 2.313847541809082, "learning_rate": 4.6583707504810784e-06, "loss": 0.9069, "step": 9430 }, { "epoch": 0.11498665496691163, "grad_norm": 1.8596222400665283, "learning_rate": 4.6580500320718415e-06, "loss": 0.9618, "step": 9435 }, { "epoch": 0.11504759119105944, "grad_norm": 2.715813398361206, "learning_rate": 4.6577293136626045e-06, "loss": 0.8994, "step": 9440 }, { "epoch": 0.11510852741520725, "grad_norm": 2.141094923019409, "learning_rate": 4.657408595253368e-06, "loss": 0.9242, "step": 9445 }, { "epoch": 0.11516946363935505, "grad_norm": 1.8335769176483154, "learning_rate": 4.657087876844131e-06, "loss": 0.8679, "step": 9450 }, { "epoch": 0.11523039986350286, "grad_norm": 1.9384739398956299, "learning_rate": 4.656767158434894e-06, "loss": 0.9281, "step": 9455 }, { "epoch": 0.11529133608765066, "grad_norm": 2.091001272201538, "learning_rate": 4.656446440025657e-06, "loss": 0.9078, "step": 9460 }, { "epoch": 0.11535227231179847, "grad_norm": 1.7194536924362183, "learning_rate": 4.656125721616421e-06, "loss": 0.9044, "step": 9465 }, { "epoch": 0.11541320853594628, "grad_norm": 2.1150667667388916, "learning_rate": 4.655805003207184e-06, "loss": 0.9235, "step": 9470 }, { "epoch": 0.11547414476009409, "grad_norm": 1.8742140531539917, "learning_rate": 4.655484284797947e-06, "loss": 0.8588, "step": 9475 }, { "epoch": 0.1155350809842419, "grad_norm": 2.4011621475219727, "learning_rate": 4.655163566388711e-06, "loss": 0.9173, "step": 9480 }, { "epoch": 0.1155960172083897, "grad_norm": 2.322740077972412, "learning_rate": 4.654842847979474e-06, "loss": 0.9429, "step": 9485 }, { "epoch": 0.11565695343253751, "grad_norm": 2.0117080211639404, "learning_rate": 4.654522129570237e-06, "loss": 0.9041, "step": 9490 }, { "epoch": 0.11571788965668531, "grad_norm": 1.9279499053955078, "learning_rate": 4.654201411161001e-06, "loss": 0.8724, "step": 9495 }, { "epoch": 0.11577882588083312, "grad_norm": 2.10063099861145, "learning_rate": 4.653880692751764e-06, "loss": 0.8645, "step": 9500 }, { "epoch": 0.11583976210498093, "grad_norm": 1.5365526676177979, "learning_rate": 4.653559974342527e-06, "loss": 0.8829, "step": 9505 }, { "epoch": 0.11590069832912873, "grad_norm": 1.8723514080047607, "learning_rate": 4.653239255933291e-06, "loss": 0.8607, "step": 9510 }, { "epoch": 0.11596163455327654, "grad_norm": 2.3740344047546387, "learning_rate": 4.652918537524054e-06, "loss": 0.9334, "step": 9515 }, { "epoch": 0.11602257077742435, "grad_norm": 2.996413230895996, "learning_rate": 4.652597819114818e-06, "loss": 0.9186, "step": 9520 }, { "epoch": 0.11608350700157216, "grad_norm": 1.9581416845321655, "learning_rate": 4.652277100705581e-06, "loss": 0.9618, "step": 9525 }, { "epoch": 0.11614444322571996, "grad_norm": 2.502641439437866, "learning_rate": 4.651956382296344e-06, "loss": 0.8921, "step": 9530 }, { "epoch": 0.11620537944986777, "grad_norm": 1.7355599403381348, "learning_rate": 4.651635663887108e-06, "loss": 0.9323, "step": 9535 }, { "epoch": 0.11626631567401557, "grad_norm": 2.027146339416504, "learning_rate": 4.651314945477871e-06, "loss": 0.9119, "step": 9540 }, { "epoch": 0.11632725189816338, "grad_norm": 2.016232490539551, "learning_rate": 4.650994227068635e-06, "loss": 0.9406, "step": 9545 }, { "epoch": 0.11638818812231119, "grad_norm": 2.0111441612243652, "learning_rate": 4.650673508659398e-06, "loss": 0.9016, "step": 9550 }, { "epoch": 0.116449124346459, "grad_norm": 2.3136672973632812, "learning_rate": 4.650352790250161e-06, "loss": 0.9687, "step": 9555 }, { "epoch": 0.1165100605706068, "grad_norm": 2.2347192764282227, "learning_rate": 4.6500320718409245e-06, "loss": 0.9061, "step": 9560 }, { "epoch": 0.1165709967947546, "grad_norm": 2.1329286098480225, "learning_rate": 4.6497113534316875e-06, "loss": 0.9608, "step": 9565 }, { "epoch": 0.11663193301890241, "grad_norm": 2.337070941925049, "learning_rate": 4.6493906350224505e-06, "loss": 0.8683, "step": 9570 }, { "epoch": 0.11669286924305022, "grad_norm": 1.8464391231536865, "learning_rate": 4.649069916613214e-06, "loss": 0.9629, "step": 9575 }, { "epoch": 0.11675380546719803, "grad_norm": 2.161700487136841, "learning_rate": 4.648749198203977e-06, "loss": 0.973, "step": 9580 }, { "epoch": 0.11681474169134584, "grad_norm": 2.094895839691162, "learning_rate": 4.6484284797947404e-06, "loss": 0.935, "step": 9585 }, { "epoch": 0.11687567791549365, "grad_norm": 2.372197151184082, "learning_rate": 4.648107761385504e-06, "loss": 0.9694, "step": 9590 }, { "epoch": 0.11693661413964145, "grad_norm": 1.7688498497009277, "learning_rate": 4.647787042976267e-06, "loss": 0.9803, "step": 9595 }, { "epoch": 0.11699755036378925, "grad_norm": 1.6277568340301514, "learning_rate": 4.64746632456703e-06, "loss": 0.8991, "step": 9600 }, { "epoch": 0.11705848658793706, "grad_norm": 2.299746513366699, "learning_rate": 4.647145606157794e-06, "loss": 0.8984, "step": 9605 }, { "epoch": 0.11711942281208487, "grad_norm": 1.8147737979888916, "learning_rate": 4.646824887748557e-06, "loss": 0.9218, "step": 9610 }, { "epoch": 0.11718035903623268, "grad_norm": 2.2960970401763916, "learning_rate": 4.64650416933932e-06, "loss": 0.8473, "step": 9615 }, { "epoch": 0.11724129526038049, "grad_norm": 1.9032535552978516, "learning_rate": 4.646183450930083e-06, "loss": 0.8788, "step": 9620 }, { "epoch": 0.1173022314845283, "grad_norm": 2.0414741039276123, "learning_rate": 4.645862732520847e-06, "loss": 0.8864, "step": 9625 }, { "epoch": 0.1173631677086761, "grad_norm": 2.2064154148101807, "learning_rate": 4.64554201411161e-06, "loss": 0.8876, "step": 9630 }, { "epoch": 0.1174241039328239, "grad_norm": 2.139953851699829, "learning_rate": 4.645221295702373e-06, "loss": 0.9813, "step": 9635 }, { "epoch": 0.11748504015697171, "grad_norm": 2.3454840183258057, "learning_rate": 4.644900577293137e-06, "loss": 0.9318, "step": 9640 }, { "epoch": 0.11754597638111952, "grad_norm": 1.9104124307632446, "learning_rate": 4.6445798588839e-06, "loss": 0.9309, "step": 9645 }, { "epoch": 0.11760691260526733, "grad_norm": 2.1715333461761475, "learning_rate": 4.644259140474663e-06, "loss": 0.9792, "step": 9650 }, { "epoch": 0.11766784882941514, "grad_norm": 2.0834548473358154, "learning_rate": 4.643938422065427e-06, "loss": 0.9432, "step": 9655 }, { "epoch": 0.11772878505356293, "grad_norm": 2.314192533493042, "learning_rate": 4.64361770365619e-06, "loss": 0.945, "step": 9660 }, { "epoch": 0.11778972127771074, "grad_norm": 2.532228469848633, "learning_rate": 4.643296985246954e-06, "loss": 0.9794, "step": 9665 }, { "epoch": 0.11785065750185855, "grad_norm": 2.2144947052001953, "learning_rate": 4.642976266837717e-06, "loss": 0.8863, "step": 9670 }, { "epoch": 0.11791159372600636, "grad_norm": 1.9660550355911255, "learning_rate": 4.64265554842848e-06, "loss": 0.9418, "step": 9675 }, { "epoch": 0.11797252995015417, "grad_norm": 2.047976016998291, "learning_rate": 4.642334830019244e-06, "loss": 0.9072, "step": 9680 }, { "epoch": 0.11803346617430198, "grad_norm": 1.8889036178588867, "learning_rate": 4.642014111610007e-06, "loss": 0.8703, "step": 9685 }, { "epoch": 0.11809440239844979, "grad_norm": 1.8919200897216797, "learning_rate": 4.6416933932007706e-06, "loss": 0.8704, "step": 9690 }, { "epoch": 0.11815533862259758, "grad_norm": 2.288588762283325, "learning_rate": 4.6413726747915336e-06, "loss": 0.8455, "step": 9695 }, { "epoch": 0.11821627484674539, "grad_norm": 1.8190162181854248, "learning_rate": 4.641051956382297e-06, "loss": 0.9152, "step": 9700 }, { "epoch": 0.1182772110708932, "grad_norm": 2.2887990474700928, "learning_rate": 4.6407312379730605e-06, "loss": 0.947, "step": 9705 }, { "epoch": 0.11833814729504101, "grad_norm": 2.275200843811035, "learning_rate": 4.6404105195638235e-06, "loss": 0.9611, "step": 9710 }, { "epoch": 0.11839908351918882, "grad_norm": 1.9478960037231445, "learning_rate": 4.6400898011545865e-06, "loss": 0.925, "step": 9715 }, { "epoch": 0.11846001974333663, "grad_norm": 2.027615547180176, "learning_rate": 4.63976908274535e-06, "loss": 0.9212, "step": 9720 }, { "epoch": 0.11852095596748444, "grad_norm": 1.7558996677398682, "learning_rate": 4.639448364336113e-06, "loss": 0.8566, "step": 9725 }, { "epoch": 0.11858189219163223, "grad_norm": 1.7815730571746826, "learning_rate": 4.639127645926876e-06, "loss": 0.912, "step": 9730 }, { "epoch": 0.11864282841578004, "grad_norm": 1.768723726272583, "learning_rate": 4.63880692751764e-06, "loss": 0.9602, "step": 9735 }, { "epoch": 0.11870376463992785, "grad_norm": 1.734099268913269, "learning_rate": 4.638486209108403e-06, "loss": 0.903, "step": 9740 }, { "epoch": 0.11876470086407566, "grad_norm": 2.0783421993255615, "learning_rate": 4.638165490699166e-06, "loss": 0.8621, "step": 9745 }, { "epoch": 0.11882563708822347, "grad_norm": 2.0612406730651855, "learning_rate": 4.63784477228993e-06, "loss": 0.9173, "step": 9750 }, { "epoch": 0.11888657331237128, "grad_norm": 1.857866644859314, "learning_rate": 4.637524053880693e-06, "loss": 0.8539, "step": 9755 }, { "epoch": 0.11894750953651909, "grad_norm": 2.10736083984375, "learning_rate": 4.637203335471456e-06, "loss": 0.9962, "step": 9760 }, { "epoch": 0.11900844576066688, "grad_norm": 1.9815529584884644, "learning_rate": 4.63688261706222e-06, "loss": 0.9507, "step": 9765 }, { "epoch": 0.11906938198481469, "grad_norm": 1.9940251111984253, "learning_rate": 4.636561898652983e-06, "loss": 0.9814, "step": 9770 }, { "epoch": 0.1191303182089625, "grad_norm": 1.9258790016174316, "learning_rate": 4.636241180243746e-06, "loss": 0.8945, "step": 9775 }, { "epoch": 0.11919125443311031, "grad_norm": 2.1698687076568604, "learning_rate": 4.63592046183451e-06, "loss": 0.8689, "step": 9780 }, { "epoch": 0.11925219065725812, "grad_norm": 1.9188456535339355, "learning_rate": 4.635599743425273e-06, "loss": 0.9486, "step": 9785 }, { "epoch": 0.11931312688140593, "grad_norm": 1.7158645391464233, "learning_rate": 4.635279025016036e-06, "loss": 0.9587, "step": 9790 }, { "epoch": 0.11937406310555372, "grad_norm": 2.060882806777954, "learning_rate": 4.634958306606799e-06, "loss": 0.905, "step": 9795 }, { "epoch": 0.11943499932970153, "grad_norm": 2.1571898460388184, "learning_rate": 4.634637588197563e-06, "loss": 0.9394, "step": 9800 }, { "epoch": 0.11949593555384934, "grad_norm": 1.9408878087997437, "learning_rate": 4.634316869788326e-06, "loss": 0.9908, "step": 9805 }, { "epoch": 0.11955687177799715, "grad_norm": 1.8022234439849854, "learning_rate": 4.63399615137909e-06, "loss": 0.9203, "step": 9810 }, { "epoch": 0.11961780800214496, "grad_norm": 2.1042816638946533, "learning_rate": 4.633675432969853e-06, "loss": 0.9387, "step": 9815 }, { "epoch": 0.11967874422629277, "grad_norm": 1.9106169939041138, "learning_rate": 4.633354714560616e-06, "loss": 0.9112, "step": 9820 }, { "epoch": 0.11973968045044057, "grad_norm": 2.404148817062378, "learning_rate": 4.63303399615138e-06, "loss": 1.0247, "step": 9825 }, { "epoch": 0.11980061667458837, "grad_norm": 2.3257367610931396, "learning_rate": 4.632713277742143e-06, "loss": 0.9366, "step": 9830 }, { "epoch": 0.11986155289873618, "grad_norm": 1.9008795022964478, "learning_rate": 4.6323925593329065e-06, "loss": 0.8477, "step": 9835 }, { "epoch": 0.11992248912288399, "grad_norm": 1.764387607574463, "learning_rate": 4.6320718409236695e-06, "loss": 0.8673, "step": 9840 }, { "epoch": 0.1199834253470318, "grad_norm": 1.8603031635284424, "learning_rate": 4.6317511225144325e-06, "loss": 0.8893, "step": 9845 }, { "epoch": 0.1200443615711796, "grad_norm": 1.9893853664398193, "learning_rate": 4.631430404105196e-06, "loss": 0.9219, "step": 9850 }, { "epoch": 0.12010529779532741, "grad_norm": 1.985435962677002, "learning_rate": 4.6311096856959594e-06, "loss": 0.935, "step": 9855 }, { "epoch": 0.12016623401947522, "grad_norm": 2.0517613887786865, "learning_rate": 4.630788967286723e-06, "loss": 0.8657, "step": 9860 }, { "epoch": 0.12022717024362302, "grad_norm": 1.7336171865463257, "learning_rate": 4.630468248877486e-06, "loss": 0.8911, "step": 9865 }, { "epoch": 0.12028810646777083, "grad_norm": 2.0548160076141357, "learning_rate": 4.630147530468249e-06, "loss": 0.9458, "step": 9870 }, { "epoch": 0.12034904269191864, "grad_norm": 2.0660431385040283, "learning_rate": 4.629826812059012e-06, "loss": 0.8682, "step": 9875 }, { "epoch": 0.12040997891606645, "grad_norm": 1.887703537940979, "learning_rate": 4.629506093649776e-06, "loss": 0.8143, "step": 9880 }, { "epoch": 0.12047091514021425, "grad_norm": 2.005872964859009, "learning_rate": 4.629185375240539e-06, "loss": 0.8988, "step": 9885 }, { "epoch": 0.12053185136436206, "grad_norm": 1.8557530641555786, "learning_rate": 4.628864656831302e-06, "loss": 0.8564, "step": 9890 }, { "epoch": 0.12059278758850986, "grad_norm": 2.15828800201416, "learning_rate": 4.628543938422066e-06, "loss": 0.8722, "step": 9895 }, { "epoch": 0.12065372381265767, "grad_norm": 2.1957826614379883, "learning_rate": 4.628223220012829e-06, "loss": 1.0658, "step": 9900 }, { "epoch": 0.12071466003680548, "grad_norm": 2.4592480659484863, "learning_rate": 4.627902501603592e-06, "loss": 0.9721, "step": 9905 }, { "epoch": 0.12077559626095329, "grad_norm": 2.1053216457366943, "learning_rate": 4.627581783194356e-06, "loss": 0.8995, "step": 9910 }, { "epoch": 0.1208365324851011, "grad_norm": 2.2507688999176025, "learning_rate": 4.627261064785119e-06, "loss": 0.9588, "step": 9915 }, { "epoch": 0.1208974687092489, "grad_norm": 2.1024017333984375, "learning_rate": 4.626940346375882e-06, "loss": 0.8747, "step": 9920 }, { "epoch": 0.12095840493339671, "grad_norm": 2.001866340637207, "learning_rate": 4.626619627966646e-06, "loss": 0.8384, "step": 9925 }, { "epoch": 0.12101934115754451, "grad_norm": 2.1138429641723633, "learning_rate": 4.626298909557409e-06, "loss": 0.9639, "step": 9930 }, { "epoch": 0.12108027738169232, "grad_norm": 2.0682284832000732, "learning_rate": 4.625978191148172e-06, "loss": 0.8982, "step": 9935 }, { "epoch": 0.12114121360584013, "grad_norm": 1.900878667831421, "learning_rate": 4.625657472738936e-06, "loss": 0.917, "step": 9940 }, { "epoch": 0.12120214982998793, "grad_norm": 2.036882162094116, "learning_rate": 4.625336754329699e-06, "loss": 0.8758, "step": 9945 }, { "epoch": 0.12126308605413574, "grad_norm": 1.9301472902297974, "learning_rate": 4.625016035920462e-06, "loss": 0.8975, "step": 9950 }, { "epoch": 0.12132402227828355, "grad_norm": 2.1314804553985596, "learning_rate": 4.624695317511225e-06, "loss": 0.8789, "step": 9955 }, { "epoch": 0.12138495850243136, "grad_norm": 1.9915263652801514, "learning_rate": 4.624374599101989e-06, "loss": 0.9721, "step": 9960 }, { "epoch": 0.12144589472657916, "grad_norm": 2.128394603729248, "learning_rate": 4.624053880692752e-06, "loss": 0.9253, "step": 9965 }, { "epoch": 0.12150683095072697, "grad_norm": 1.6206835508346558, "learning_rate": 4.623733162283516e-06, "loss": 0.9319, "step": 9970 }, { "epoch": 0.12156776717487477, "grad_norm": 2.271195411682129, "learning_rate": 4.623412443874279e-06, "loss": 0.8445, "step": 9975 }, { "epoch": 0.12162870339902258, "grad_norm": 1.9540413618087769, "learning_rate": 4.623091725465042e-06, "loss": 1.0021, "step": 9980 }, { "epoch": 0.12168963962317039, "grad_norm": 2.339052438735962, "learning_rate": 4.6227710070558055e-06, "loss": 0.9441, "step": 9985 }, { "epoch": 0.1217505758473182, "grad_norm": 1.95758855342865, "learning_rate": 4.6224502886465685e-06, "loss": 0.9346, "step": 9990 }, { "epoch": 0.12181151207146601, "grad_norm": 1.9800487756729126, "learning_rate": 4.622129570237332e-06, "loss": 0.9867, "step": 9995 }, { "epoch": 0.1218724482956138, "grad_norm": 1.8516639471054077, "learning_rate": 4.621808851828095e-06, "loss": 0.8522, "step": 10000 }, { "epoch": 0.12193338451976161, "grad_norm": 1.9457180500030518, "learning_rate": 4.621488133418859e-06, "loss": 0.8769, "step": 10005 }, { "epoch": 0.12199432074390942, "grad_norm": 1.7589353322982788, "learning_rate": 4.621167415009622e-06, "loss": 0.966, "step": 10010 }, { "epoch": 0.12205525696805723, "grad_norm": 1.9918323755264282, "learning_rate": 4.620846696600385e-06, "loss": 0.9061, "step": 10015 }, { "epoch": 0.12211619319220504, "grad_norm": 2.111151695251465, "learning_rate": 4.620525978191149e-06, "loss": 0.873, "step": 10020 }, { "epoch": 0.12217712941635285, "grad_norm": 2.070307493209839, "learning_rate": 4.620205259781912e-06, "loss": 0.9617, "step": 10025 }, { "epoch": 0.12223806564050065, "grad_norm": 2.0698699951171875, "learning_rate": 4.619884541372675e-06, "loss": 0.8817, "step": 10030 }, { "epoch": 0.12229900186464845, "grad_norm": 1.8406862020492554, "learning_rate": 4.619563822963439e-06, "loss": 0.909, "step": 10035 }, { "epoch": 0.12235993808879626, "grad_norm": 1.791804552078247, "learning_rate": 4.619243104554202e-06, "loss": 0.8917, "step": 10040 }, { "epoch": 0.12242087431294407, "grad_norm": 1.9738726615905762, "learning_rate": 4.618922386144965e-06, "loss": 0.9882, "step": 10045 }, { "epoch": 0.12248181053709188, "grad_norm": 2.166085958480835, "learning_rate": 4.618601667735728e-06, "loss": 0.9054, "step": 10050 }, { "epoch": 0.12254274676123969, "grad_norm": 1.9492043256759644, "learning_rate": 4.618280949326492e-06, "loss": 0.9356, "step": 10055 }, { "epoch": 0.1226036829853875, "grad_norm": 2.0838160514831543, "learning_rate": 4.617960230917255e-06, "loss": 0.9491, "step": 10060 }, { "epoch": 0.1226646192095353, "grad_norm": 1.7628496885299683, "learning_rate": 4.617639512508018e-06, "loss": 0.8973, "step": 10065 }, { "epoch": 0.1227255554336831, "grad_norm": 1.9072506427764893, "learning_rate": 4.617318794098782e-06, "loss": 0.9366, "step": 10070 }, { "epoch": 0.12278649165783091, "grad_norm": 1.806888222694397, "learning_rate": 4.616998075689545e-06, "loss": 0.8801, "step": 10075 }, { "epoch": 0.12284742788197872, "grad_norm": 2.0502583980560303, "learning_rate": 4.616677357280308e-06, "loss": 0.8892, "step": 10080 }, { "epoch": 0.12290836410612653, "grad_norm": 2.1881608963012695, "learning_rate": 4.616356638871072e-06, "loss": 0.9453, "step": 10085 }, { "epoch": 0.12296930033027434, "grad_norm": 2.491129159927368, "learning_rate": 4.616035920461835e-06, "loss": 0.9162, "step": 10090 }, { "epoch": 0.12303023655442215, "grad_norm": 1.942836880683899, "learning_rate": 4.615715202052598e-06, "loss": 0.9229, "step": 10095 }, { "epoch": 0.12309117277856994, "grad_norm": 1.7520784139633179, "learning_rate": 4.615394483643362e-06, "loss": 0.8922, "step": 10100 }, { "epoch": 0.12315210900271775, "grad_norm": 1.8546935319900513, "learning_rate": 4.615073765234125e-06, "loss": 0.8893, "step": 10105 }, { "epoch": 0.12321304522686556, "grad_norm": 1.840705156326294, "learning_rate": 4.614753046824888e-06, "loss": 0.926, "step": 10110 }, { "epoch": 0.12327398145101337, "grad_norm": 2.016296863555908, "learning_rate": 4.6144323284156515e-06, "loss": 0.947, "step": 10115 }, { "epoch": 0.12333491767516118, "grad_norm": 2.0941269397735596, "learning_rate": 4.6141116100064146e-06, "loss": 0.9236, "step": 10120 }, { "epoch": 0.12339585389930899, "grad_norm": 2.0755813121795654, "learning_rate": 4.613790891597178e-06, "loss": 0.9146, "step": 10125 }, { "epoch": 0.12345679012345678, "grad_norm": 2.092177391052246, "learning_rate": 4.6134701731879414e-06, "loss": 0.956, "step": 10130 }, { "epoch": 0.12351772634760459, "grad_norm": 2.40000319480896, "learning_rate": 4.6131494547787045e-06, "loss": 0.9135, "step": 10135 }, { "epoch": 0.1235786625717524, "grad_norm": 2.082205295562744, "learning_rate": 4.612828736369468e-06, "loss": 0.9004, "step": 10140 }, { "epoch": 0.12363959879590021, "grad_norm": 1.8626301288604736, "learning_rate": 4.612508017960231e-06, "loss": 0.9855, "step": 10145 }, { "epoch": 0.12370053502004802, "grad_norm": 1.918652892112732, "learning_rate": 4.612187299550994e-06, "loss": 0.9826, "step": 10150 }, { "epoch": 0.12376147124419583, "grad_norm": 1.7706081867218018, "learning_rate": 4.611866581141758e-06, "loss": 0.9113, "step": 10155 }, { "epoch": 0.12382240746834364, "grad_norm": 2.108441114425659, "learning_rate": 4.611545862732521e-06, "loss": 0.9376, "step": 10160 }, { "epoch": 0.12388334369249143, "grad_norm": 1.8182973861694336, "learning_rate": 4.611225144323285e-06, "loss": 0.9059, "step": 10165 }, { "epoch": 0.12394427991663924, "grad_norm": 2.1040031909942627, "learning_rate": 4.610904425914048e-06, "loss": 0.8901, "step": 10170 }, { "epoch": 0.12400521614078705, "grad_norm": 2.087430715560913, "learning_rate": 4.610583707504811e-06, "loss": 0.8394, "step": 10175 }, { "epoch": 0.12406615236493486, "grad_norm": 2.0977938175201416, "learning_rate": 4.610262989095575e-06, "loss": 0.9703, "step": 10180 }, { "epoch": 0.12412708858908267, "grad_norm": 2.0021495819091797, "learning_rate": 4.609942270686338e-06, "loss": 0.9535, "step": 10185 }, { "epoch": 0.12418802481323048, "grad_norm": 2.2689013481140137, "learning_rate": 4.609621552277101e-06, "loss": 0.9111, "step": 10190 }, { "epoch": 0.12424896103737829, "grad_norm": 1.790457844734192, "learning_rate": 4.609300833867865e-06, "loss": 0.9153, "step": 10195 }, { "epoch": 0.12430989726152608, "grad_norm": 2.0716750621795654, "learning_rate": 4.608980115458628e-06, "loss": 0.9179, "step": 10200 }, { "epoch": 0.12437083348567389, "grad_norm": 2.0029163360595703, "learning_rate": 4.608659397049391e-06, "loss": 0.9053, "step": 10205 }, { "epoch": 0.1244317697098217, "grad_norm": 1.9921780824661255, "learning_rate": 4.608338678640154e-06, "loss": 0.9183, "step": 10210 }, { "epoch": 0.12449270593396951, "grad_norm": 2.231397867202759, "learning_rate": 4.608017960230918e-06, "loss": 0.9226, "step": 10215 }, { "epoch": 0.12455364215811732, "grad_norm": 2.0380523204803467, "learning_rate": 4.607697241821681e-06, "loss": 0.8701, "step": 10220 }, { "epoch": 0.12461457838226513, "grad_norm": 1.5842950344085693, "learning_rate": 4.607376523412444e-06, "loss": 0.8917, "step": 10225 }, { "epoch": 0.12467551460641293, "grad_norm": 1.7479714155197144, "learning_rate": 4.607055805003208e-06, "loss": 0.8526, "step": 10230 }, { "epoch": 0.12473645083056073, "grad_norm": 1.736901879310608, "learning_rate": 4.606735086593971e-06, "loss": 0.8886, "step": 10235 }, { "epoch": 0.12479738705470854, "grad_norm": 1.7679253816604614, "learning_rate": 4.606414368184734e-06, "loss": 0.9087, "step": 10240 }, { "epoch": 0.12485832327885635, "grad_norm": 1.7412785291671753, "learning_rate": 4.606093649775498e-06, "loss": 0.9495, "step": 10245 }, { "epoch": 0.12491925950300416, "grad_norm": 2.0274181365966797, "learning_rate": 4.605772931366261e-06, "loss": 0.8919, "step": 10250 }, { "epoch": 0.12498019572715197, "grad_norm": 1.980260968208313, "learning_rate": 4.605452212957024e-06, "loss": 0.9668, "step": 10255 }, { "epoch": 0.12504113195129976, "grad_norm": 1.9239493608474731, "learning_rate": 4.6051314945477875e-06, "loss": 0.9429, "step": 10260 }, { "epoch": 0.12510206817544758, "grad_norm": 2.3943047523498535, "learning_rate": 4.6048107761385505e-06, "loss": 0.8883, "step": 10265 }, { "epoch": 0.12516300439959538, "grad_norm": 2.088216543197632, "learning_rate": 4.6044900577293135e-06, "loss": 0.9256, "step": 10270 }, { "epoch": 0.1252239406237432, "grad_norm": 1.9059959650039673, "learning_rate": 4.604169339320077e-06, "loss": 0.9178, "step": 10275 }, { "epoch": 0.125284876847891, "grad_norm": 2.1856820583343506, "learning_rate": 4.60384862091084e-06, "loss": 0.9394, "step": 10280 }, { "epoch": 0.1253458130720388, "grad_norm": 1.9420034885406494, "learning_rate": 4.603527902501604e-06, "loss": 0.9896, "step": 10285 }, { "epoch": 0.12540674929618661, "grad_norm": 2.168417453765869, "learning_rate": 4.603207184092367e-06, "loss": 0.8784, "step": 10290 }, { "epoch": 0.1254676855203344, "grad_norm": 2.039048671722412, "learning_rate": 4.60288646568313e-06, "loss": 0.9162, "step": 10295 }, { "epoch": 0.12552862174448223, "grad_norm": 2.3885185718536377, "learning_rate": 4.602565747273894e-06, "loss": 0.8908, "step": 10300 }, { "epoch": 0.12558955796863003, "grad_norm": 1.8062856197357178, "learning_rate": 4.602245028864657e-06, "loss": 0.8724, "step": 10305 }, { "epoch": 0.12565049419277785, "grad_norm": 2.018547296524048, "learning_rate": 4.601924310455421e-06, "loss": 0.9364, "step": 10310 }, { "epoch": 0.12571143041692565, "grad_norm": 1.8946781158447266, "learning_rate": 4.601603592046184e-06, "loss": 0.9302, "step": 10315 }, { "epoch": 0.12577236664107344, "grad_norm": 2.1848337650299072, "learning_rate": 4.601282873636947e-06, "loss": 0.9264, "step": 10320 }, { "epoch": 0.12583330286522126, "grad_norm": 2.3147103786468506, "learning_rate": 4.600962155227711e-06, "loss": 0.9501, "step": 10325 }, { "epoch": 0.12589423908936906, "grad_norm": 1.9070346355438232, "learning_rate": 4.600641436818474e-06, "loss": 0.9097, "step": 10330 }, { "epoch": 0.12595517531351688, "grad_norm": 2.0208613872528076, "learning_rate": 4.600320718409237e-06, "loss": 0.9238, "step": 10335 }, { "epoch": 0.12601611153766468, "grad_norm": 2.064154863357544, "learning_rate": 4.600000000000001e-06, "loss": 0.9783, "step": 10340 }, { "epoch": 0.1260770477618125, "grad_norm": 2.1802046298980713, "learning_rate": 4.599679281590764e-06, "loss": 0.889, "step": 10345 }, { "epoch": 0.1261379839859603, "grad_norm": 1.742607593536377, "learning_rate": 4.599358563181527e-06, "loss": 0.9103, "step": 10350 }, { "epoch": 0.1261989202101081, "grad_norm": 2.2119059562683105, "learning_rate": 4.599037844772291e-06, "loss": 0.9512, "step": 10355 }, { "epoch": 0.1262598564342559, "grad_norm": 2.355231523513794, "learning_rate": 4.598717126363054e-06, "loss": 0.8535, "step": 10360 }, { "epoch": 0.1263207926584037, "grad_norm": 1.8705954551696777, "learning_rate": 4.598396407953817e-06, "loss": 0.9339, "step": 10365 }, { "epoch": 0.12638172888255153, "grad_norm": 1.932748556137085, "learning_rate": 4.598075689544581e-06, "loss": 0.7999, "step": 10370 }, { "epoch": 0.12644266510669933, "grad_norm": 2.2156717777252197, "learning_rate": 4.597754971135344e-06, "loss": 0.929, "step": 10375 }, { "epoch": 0.12650360133084715, "grad_norm": 1.7882004976272583, "learning_rate": 4.597434252726107e-06, "loss": 0.9101, "step": 10380 }, { "epoch": 0.12656453755499494, "grad_norm": 2.0702383518218994, "learning_rate": 4.59711353431687e-06, "loss": 0.9802, "step": 10385 }, { "epoch": 0.12662547377914274, "grad_norm": 2.0226895809173584, "learning_rate": 4.5967928159076336e-06, "loss": 0.8528, "step": 10390 }, { "epoch": 0.12668641000329056, "grad_norm": 1.8343720436096191, "learning_rate": 4.5964720974983966e-06, "loss": 0.9124, "step": 10395 }, { "epoch": 0.12674734622743836, "grad_norm": 1.968092918395996, "learning_rate": 4.59615137908916e-06, "loss": 0.9554, "step": 10400 }, { "epoch": 0.12680828245158618, "grad_norm": 2.1424436569213867, "learning_rate": 4.5958306606799235e-06, "loss": 0.8535, "step": 10405 }, { "epoch": 0.12686921867573397, "grad_norm": 2.215585470199585, "learning_rate": 4.5955099422706865e-06, "loss": 0.8679, "step": 10410 }, { "epoch": 0.1269301548998818, "grad_norm": 1.8495893478393555, "learning_rate": 4.5951892238614495e-06, "loss": 0.8805, "step": 10415 }, { "epoch": 0.1269910911240296, "grad_norm": 2.090815782546997, "learning_rate": 4.594868505452213e-06, "loss": 0.8927, "step": 10420 }, { "epoch": 0.1270520273481774, "grad_norm": 3.0325889587402344, "learning_rate": 4.594547787042976e-06, "loss": 0.8377, "step": 10425 }, { "epoch": 0.1271129635723252, "grad_norm": 2.2691891193389893, "learning_rate": 4.594227068633739e-06, "loss": 0.9181, "step": 10430 }, { "epoch": 0.127173899796473, "grad_norm": 2.0842106342315674, "learning_rate": 4.593906350224503e-06, "loss": 0.8392, "step": 10435 }, { "epoch": 0.12723483602062083, "grad_norm": 1.944338321685791, "learning_rate": 4.593585631815266e-06, "loss": 0.9638, "step": 10440 }, { "epoch": 0.12729577224476862, "grad_norm": 1.7808096408843994, "learning_rate": 4.59326491340603e-06, "loss": 0.8791, "step": 10445 }, { "epoch": 0.12735670846891642, "grad_norm": 2.1487677097320557, "learning_rate": 4.592944194996793e-06, "loss": 0.8372, "step": 10450 }, { "epoch": 0.12741764469306424, "grad_norm": 2.122288465499878, "learning_rate": 4.592623476587556e-06, "loss": 0.8869, "step": 10455 }, { "epoch": 0.12747858091721204, "grad_norm": 1.8226710557937622, "learning_rate": 4.59230275817832e-06, "loss": 0.8748, "step": 10460 }, { "epoch": 0.12753951714135986, "grad_norm": 1.9219330549240112, "learning_rate": 4.591982039769083e-06, "loss": 1.002, "step": 10465 }, { "epoch": 0.12760045336550765, "grad_norm": 2.1463279724121094, "learning_rate": 4.591661321359847e-06, "loss": 0.9177, "step": 10470 }, { "epoch": 0.12766138958965548, "grad_norm": 2.173633575439453, "learning_rate": 4.59134060295061e-06, "loss": 0.8728, "step": 10475 }, { "epoch": 0.12772232581380327, "grad_norm": 2.153576612472534, "learning_rate": 4.591019884541373e-06, "loss": 0.9687, "step": 10480 }, { "epoch": 0.12778326203795107, "grad_norm": 2.2147839069366455, "learning_rate": 4.590699166132137e-06, "loss": 0.9468, "step": 10485 }, { "epoch": 0.1278441982620989, "grad_norm": 2.0484671592712402, "learning_rate": 4.5903784477229e-06, "loss": 0.8912, "step": 10490 }, { "epoch": 0.12790513448624669, "grad_norm": 2.0195915699005127, "learning_rate": 4.590057729313663e-06, "loss": 0.9354, "step": 10495 }, { "epoch": 0.1279660707103945, "grad_norm": 2.2045159339904785, "learning_rate": 4.589737010904427e-06, "loss": 0.8962, "step": 10500 }, { "epoch": 0.1280270069345423, "grad_norm": 2.1890084743499756, "learning_rate": 4.58941629249519e-06, "loss": 0.9274, "step": 10505 }, { "epoch": 0.12808794315869013, "grad_norm": 1.786597728729248, "learning_rate": 4.589095574085953e-06, "loss": 0.9081, "step": 10510 }, { "epoch": 0.12814887938283792, "grad_norm": 2.1068365573883057, "learning_rate": 4.588774855676717e-06, "loss": 0.9232, "step": 10515 }, { "epoch": 0.12820981560698572, "grad_norm": 1.6804473400115967, "learning_rate": 4.58845413726748e-06, "loss": 0.9442, "step": 10520 }, { "epoch": 0.12827075183113354, "grad_norm": 2.016129493713379, "learning_rate": 4.588133418858243e-06, "loss": 0.9064, "step": 10525 }, { "epoch": 0.12833168805528133, "grad_norm": 1.638826608657837, "learning_rate": 4.5878127004490065e-06, "loss": 0.9277, "step": 10530 }, { "epoch": 0.12839262427942916, "grad_norm": 1.7210028171539307, "learning_rate": 4.5874919820397695e-06, "loss": 0.9478, "step": 10535 }, { "epoch": 0.12845356050357695, "grad_norm": 1.9397623538970947, "learning_rate": 4.5871712636305325e-06, "loss": 0.9064, "step": 10540 }, { "epoch": 0.12851449672772478, "grad_norm": 1.969980001449585, "learning_rate": 4.5868505452212955e-06, "loss": 0.8707, "step": 10545 }, { "epoch": 0.12857543295187257, "grad_norm": 1.795667052268982, "learning_rate": 4.586529826812059e-06, "loss": 0.8564, "step": 10550 }, { "epoch": 0.12863636917602037, "grad_norm": 2.275844097137451, "learning_rate": 4.5862091084028224e-06, "loss": 0.8926, "step": 10555 }, { "epoch": 0.1286973054001682, "grad_norm": 1.9671939611434937, "learning_rate": 4.5858883899935854e-06, "loss": 0.9065, "step": 10560 }, { "epoch": 0.12875824162431598, "grad_norm": 1.8929020166397095, "learning_rate": 4.585567671584349e-06, "loss": 0.8975, "step": 10565 }, { "epoch": 0.1288191778484638, "grad_norm": 2.346123218536377, "learning_rate": 4.585246953175112e-06, "loss": 0.859, "step": 10570 }, { "epoch": 0.1288801140726116, "grad_norm": 2.1880452632904053, "learning_rate": 4.584926234765875e-06, "loss": 0.9697, "step": 10575 }, { "epoch": 0.12894105029675942, "grad_norm": 1.4554922580718994, "learning_rate": 4.584605516356639e-06, "loss": 0.8729, "step": 10580 }, { "epoch": 0.12900198652090722, "grad_norm": 2.0445046424865723, "learning_rate": 4.584284797947402e-06, "loss": 0.8608, "step": 10585 }, { "epoch": 0.12906292274505501, "grad_norm": 2.1308300495147705, "learning_rate": 4.583964079538166e-06, "loss": 0.9128, "step": 10590 }, { "epoch": 0.12912385896920284, "grad_norm": 1.7193968296051025, "learning_rate": 4.583643361128929e-06, "loss": 0.8849, "step": 10595 }, { "epoch": 0.12918479519335063, "grad_norm": 2.1879467964172363, "learning_rate": 4.583322642719692e-06, "loss": 0.9322, "step": 10600 }, { "epoch": 0.12924573141749846, "grad_norm": 1.8535059690475464, "learning_rate": 4.583001924310456e-06, "loss": 0.9584, "step": 10605 }, { "epoch": 0.12930666764164625, "grad_norm": 1.9556479454040527, "learning_rate": 4.582681205901219e-06, "loss": 1.0054, "step": 10610 }, { "epoch": 0.12936760386579407, "grad_norm": 1.9930729866027832, "learning_rate": 4.582360487491983e-06, "loss": 0.9211, "step": 10615 }, { "epoch": 0.12942854008994187, "grad_norm": 1.9220592975616455, "learning_rate": 4.582039769082746e-06, "loss": 0.8457, "step": 10620 }, { "epoch": 0.12948947631408966, "grad_norm": 1.9865992069244385, "learning_rate": 4.581719050673509e-06, "loss": 0.8566, "step": 10625 }, { "epoch": 0.12955041253823749, "grad_norm": 1.9651343822479248, "learning_rate": 4.581398332264273e-06, "loss": 0.9276, "step": 10630 }, { "epoch": 0.12961134876238528, "grad_norm": 1.992612600326538, "learning_rate": 4.581077613855036e-06, "loss": 0.8712, "step": 10635 }, { "epoch": 0.1296722849865331, "grad_norm": 1.9336646795272827, "learning_rate": 4.580756895445799e-06, "loss": 0.8979, "step": 10640 }, { "epoch": 0.1297332212106809, "grad_norm": 2.062478542327881, "learning_rate": 4.580436177036563e-06, "loss": 0.85, "step": 10645 }, { "epoch": 0.12979415743482872, "grad_norm": 2.3511276245117188, "learning_rate": 4.580115458627326e-06, "loss": 0.8847, "step": 10650 }, { "epoch": 0.12985509365897652, "grad_norm": 1.7669272422790527, "learning_rate": 4.579794740218089e-06, "loss": 0.9161, "step": 10655 }, { "epoch": 0.1299160298831243, "grad_norm": 2.011772394180298, "learning_rate": 4.5794740218088526e-06, "loss": 0.9707, "step": 10660 }, { "epoch": 0.12997696610727213, "grad_norm": 1.742249846458435, "learning_rate": 4.5791533033996156e-06, "loss": 0.9089, "step": 10665 }, { "epoch": 0.13003790233141993, "grad_norm": 1.8340671062469482, "learning_rate": 4.578832584990379e-06, "loss": 0.9707, "step": 10670 }, { "epoch": 0.13009883855556775, "grad_norm": 2.151184320449829, "learning_rate": 4.5785118665811425e-06, "loss": 0.8467, "step": 10675 }, { "epoch": 0.13015977477971555, "grad_norm": 1.9489266872406006, "learning_rate": 4.5781911481719055e-06, "loss": 1.0071, "step": 10680 }, { "epoch": 0.13022071100386334, "grad_norm": 1.877848744392395, "learning_rate": 4.5778704297626685e-06, "loss": 0.8931, "step": 10685 }, { "epoch": 0.13028164722801117, "grad_norm": 2.337952136993408, "learning_rate": 4.577549711353432e-06, "loss": 0.9828, "step": 10690 }, { "epoch": 0.13034258345215896, "grad_norm": 1.6296323537826538, "learning_rate": 4.577228992944195e-06, "loss": 0.9144, "step": 10695 }, { "epoch": 0.13040351967630678, "grad_norm": 1.8352097272872925, "learning_rate": 4.576908274534958e-06, "loss": 0.9076, "step": 10700 }, { "epoch": 0.13046445590045458, "grad_norm": 2.03897762298584, "learning_rate": 4.576587556125722e-06, "loss": 0.919, "step": 10705 }, { "epoch": 0.1305253921246024, "grad_norm": 2.5500998497009277, "learning_rate": 4.576266837716485e-06, "loss": 0.8752, "step": 10710 }, { "epoch": 0.1305863283487502, "grad_norm": 2.5031144618988037, "learning_rate": 4.575946119307248e-06, "loss": 0.9256, "step": 10715 }, { "epoch": 0.130647264572898, "grad_norm": 1.7840274572372437, "learning_rate": 4.575625400898011e-06, "loss": 0.9391, "step": 10720 }, { "epoch": 0.13070820079704581, "grad_norm": 2.2232115268707275, "learning_rate": 4.575304682488775e-06, "loss": 0.895, "step": 10725 }, { "epoch": 0.1307691370211936, "grad_norm": 2.1766903400421143, "learning_rate": 4.574983964079538e-06, "loss": 0.8688, "step": 10730 }, { "epoch": 0.13083007324534143, "grad_norm": 1.9880019426345825, "learning_rate": 4.574663245670302e-06, "loss": 0.8687, "step": 10735 }, { "epoch": 0.13089100946948923, "grad_norm": 1.8825746774673462, "learning_rate": 4.574342527261065e-06, "loss": 0.8999, "step": 10740 }, { "epoch": 0.13095194569363705, "grad_norm": 1.9464677572250366, "learning_rate": 4.574021808851828e-06, "loss": 0.9428, "step": 10745 }, { "epoch": 0.13101288191778485, "grad_norm": 2.1291890144348145, "learning_rate": 4.573701090442592e-06, "loss": 0.8299, "step": 10750 }, { "epoch": 0.13107381814193264, "grad_norm": 2.0608787536621094, "learning_rate": 4.573380372033355e-06, "loss": 0.8698, "step": 10755 }, { "epoch": 0.13113475436608046, "grad_norm": 2.503740072250366, "learning_rate": 4.573059653624119e-06, "loss": 0.8772, "step": 10760 }, { "epoch": 0.13119569059022826, "grad_norm": 2.033200979232788, "learning_rate": 4.572738935214882e-06, "loss": 0.9001, "step": 10765 }, { "epoch": 0.13125662681437608, "grad_norm": 1.753551959991455, "learning_rate": 4.572418216805645e-06, "loss": 0.9624, "step": 10770 }, { "epoch": 0.13131756303852388, "grad_norm": 2.1793651580810547, "learning_rate": 4.572097498396409e-06, "loss": 0.9033, "step": 10775 }, { "epoch": 0.1313784992626717, "grad_norm": 1.8572407960891724, "learning_rate": 4.571776779987172e-06, "loss": 0.9733, "step": 10780 }, { "epoch": 0.1314394354868195, "grad_norm": 2.0511538982391357, "learning_rate": 4.571456061577936e-06, "loss": 0.9011, "step": 10785 }, { "epoch": 0.1315003717109673, "grad_norm": 2.1400535106658936, "learning_rate": 4.571135343168699e-06, "loss": 0.9326, "step": 10790 }, { "epoch": 0.1315613079351151, "grad_norm": 2.4580626487731934, "learning_rate": 4.570814624759462e-06, "loss": 0.9105, "step": 10795 }, { "epoch": 0.1316222441592629, "grad_norm": 2.3705947399139404, "learning_rate": 4.570493906350225e-06, "loss": 0.896, "step": 10800 }, { "epoch": 0.13168318038341073, "grad_norm": 2.052828788757324, "learning_rate": 4.5701731879409885e-06, "loss": 0.9126, "step": 10805 }, { "epoch": 0.13174411660755853, "grad_norm": 1.9276022911071777, "learning_rate": 4.5698524695317515e-06, "loss": 0.8837, "step": 10810 }, { "epoch": 0.13180505283170635, "grad_norm": 1.99998140335083, "learning_rate": 4.5695317511225145e-06, "loss": 0.9297, "step": 10815 }, { "epoch": 0.13186598905585414, "grad_norm": 2.094860792160034, "learning_rate": 4.569211032713278e-06, "loss": 0.9127, "step": 10820 }, { "epoch": 0.13192692528000194, "grad_norm": 2.037956953048706, "learning_rate": 4.5688903143040414e-06, "loss": 0.8569, "step": 10825 }, { "epoch": 0.13198786150414976, "grad_norm": 1.5657998323440552, "learning_rate": 4.5685695958948044e-06, "loss": 0.8329, "step": 10830 }, { "epoch": 0.13204879772829756, "grad_norm": 2.082138776779175, "learning_rate": 4.568248877485568e-06, "loss": 0.9822, "step": 10835 }, { "epoch": 0.13210973395244538, "grad_norm": 2.076058864593506, "learning_rate": 4.567928159076331e-06, "loss": 0.9513, "step": 10840 }, { "epoch": 0.13217067017659317, "grad_norm": 1.82142174243927, "learning_rate": 4.567607440667094e-06, "loss": 0.9186, "step": 10845 }, { "epoch": 0.132231606400741, "grad_norm": 2.1267237663269043, "learning_rate": 4.567286722257858e-06, "loss": 0.8825, "step": 10850 }, { "epoch": 0.1322925426248888, "grad_norm": 2.2618203163146973, "learning_rate": 4.566966003848621e-06, "loss": 0.9623, "step": 10855 }, { "epoch": 0.1323534788490366, "grad_norm": 2.061253070831299, "learning_rate": 4.566645285439384e-06, "loss": 0.8687, "step": 10860 }, { "epoch": 0.1324144150731844, "grad_norm": 1.851570963859558, "learning_rate": 4.566324567030148e-06, "loss": 0.9134, "step": 10865 }, { "epoch": 0.1324753512973322, "grad_norm": 1.9350004196166992, "learning_rate": 4.566003848620911e-06, "loss": 0.9526, "step": 10870 }, { "epoch": 0.13253628752148003, "grad_norm": 2.060765027999878, "learning_rate": 4.565683130211674e-06, "loss": 0.9695, "step": 10875 }, { "epoch": 0.13259722374562782, "grad_norm": 1.9722810983657837, "learning_rate": 4.565362411802438e-06, "loss": 0.9593, "step": 10880 }, { "epoch": 0.13265815996977565, "grad_norm": 1.9817280769348145, "learning_rate": 4.565041693393201e-06, "loss": 0.8564, "step": 10885 }, { "epoch": 0.13271909619392344, "grad_norm": 1.8542414903640747, "learning_rate": 4.564720974983964e-06, "loss": 0.8741, "step": 10890 }, { "epoch": 0.13278003241807124, "grad_norm": 1.8372926712036133, "learning_rate": 4.564400256574728e-06, "loss": 0.9707, "step": 10895 }, { "epoch": 0.13284096864221906, "grad_norm": 2.0223214626312256, "learning_rate": 4.564079538165491e-06, "loss": 0.9372, "step": 10900 }, { "epoch": 0.13290190486636685, "grad_norm": 1.9737751483917236, "learning_rate": 4.563758819756254e-06, "loss": 0.8924, "step": 10905 }, { "epoch": 0.13296284109051468, "grad_norm": 2.0696489810943604, "learning_rate": 4.563438101347018e-06, "loss": 0.8793, "step": 10910 }, { "epoch": 0.13302377731466247, "grad_norm": 2.1693146228790283, "learning_rate": 4.563117382937781e-06, "loss": 0.8837, "step": 10915 }, { "epoch": 0.13308471353881027, "grad_norm": 2.298999786376953, "learning_rate": 4.562796664528545e-06, "loss": 0.9265, "step": 10920 }, { "epoch": 0.1331456497629581, "grad_norm": 1.9072318077087402, "learning_rate": 4.562475946119308e-06, "loss": 0.9148, "step": 10925 }, { "epoch": 0.13320658598710589, "grad_norm": 1.823320746421814, "learning_rate": 4.562155227710071e-06, "loss": 0.885, "step": 10930 }, { "epoch": 0.1332675222112537, "grad_norm": 2.045539140701294, "learning_rate": 4.5618345093008346e-06, "loss": 0.8668, "step": 10935 }, { "epoch": 0.1333284584354015, "grad_norm": 2.1201443672180176, "learning_rate": 4.561513790891598e-06, "loss": 0.9342, "step": 10940 }, { "epoch": 0.13338939465954933, "grad_norm": 1.893631935119629, "learning_rate": 4.5611930724823615e-06, "loss": 0.8993, "step": 10945 }, { "epoch": 0.13345033088369712, "grad_norm": 2.0366222858428955, "learning_rate": 4.5608723540731245e-06, "loss": 0.9325, "step": 10950 }, { "epoch": 0.13351126710784492, "grad_norm": 1.7413636445999146, "learning_rate": 4.5605516356638875e-06, "loss": 0.8939, "step": 10955 }, { "epoch": 0.13357220333199274, "grad_norm": 1.9706557989120483, "learning_rate": 4.560230917254651e-06, "loss": 0.8668, "step": 10960 }, { "epoch": 0.13363313955614053, "grad_norm": 2.0951404571533203, "learning_rate": 4.559910198845414e-06, "loss": 0.9652, "step": 10965 }, { "epoch": 0.13369407578028836, "grad_norm": 2.022643566131592, "learning_rate": 4.559589480436177e-06, "loss": 0.8557, "step": 10970 }, { "epoch": 0.13375501200443615, "grad_norm": 1.7717076539993286, "learning_rate": 4.55926876202694e-06, "loss": 0.8616, "step": 10975 }, { "epoch": 0.13381594822858398, "grad_norm": 1.902876377105713, "learning_rate": 4.558948043617704e-06, "loss": 0.8746, "step": 10980 }, { "epoch": 0.13387688445273177, "grad_norm": 2.0814828872680664, "learning_rate": 4.558627325208467e-06, "loss": 0.8896, "step": 10985 }, { "epoch": 0.13393782067687957, "grad_norm": 2.037076950073242, "learning_rate": 4.55830660679923e-06, "loss": 0.972, "step": 10990 }, { "epoch": 0.1339987569010274, "grad_norm": 2.159681558609009, "learning_rate": 4.557985888389994e-06, "loss": 0.8709, "step": 10995 }, { "epoch": 0.13405969312517518, "grad_norm": 2.0804357528686523, "learning_rate": 4.557665169980757e-06, "loss": 0.9834, "step": 11000 }, { "epoch": 0.134120629349323, "grad_norm": 2.0019779205322266, "learning_rate": 4.55734445157152e-06, "loss": 0.8506, "step": 11005 }, { "epoch": 0.1341815655734708, "grad_norm": 1.9185000658035278, "learning_rate": 4.557023733162284e-06, "loss": 0.9095, "step": 11010 }, { "epoch": 0.13424250179761862, "grad_norm": 2.015775442123413, "learning_rate": 4.556703014753047e-06, "loss": 0.9289, "step": 11015 }, { "epoch": 0.13430343802176642, "grad_norm": 2.089761257171631, "learning_rate": 4.55638229634381e-06, "loss": 0.901, "step": 11020 }, { "epoch": 0.13436437424591421, "grad_norm": 2.064337968826294, "learning_rate": 4.556061577934574e-06, "loss": 0.9522, "step": 11025 }, { "epoch": 0.13442531047006204, "grad_norm": 2.134469747543335, "learning_rate": 4.555740859525337e-06, "loss": 0.8774, "step": 11030 }, { "epoch": 0.13448624669420983, "grad_norm": 1.710332989692688, "learning_rate": 4.5554201411161e-06, "loss": 0.8981, "step": 11035 }, { "epoch": 0.13454718291835766, "grad_norm": 1.9090276956558228, "learning_rate": 4.555099422706864e-06, "loss": 1.0453, "step": 11040 }, { "epoch": 0.13460811914250545, "grad_norm": 1.9979102611541748, "learning_rate": 4.554778704297627e-06, "loss": 0.9734, "step": 11045 }, { "epoch": 0.13466905536665327, "grad_norm": 2.0927209854125977, "learning_rate": 4.55445798588839e-06, "loss": 0.8778, "step": 11050 }, { "epoch": 0.13472999159080107, "grad_norm": 2.3510818481445312, "learning_rate": 4.554137267479154e-06, "loss": 0.9368, "step": 11055 }, { "epoch": 0.13479092781494886, "grad_norm": 1.880910873413086, "learning_rate": 4.553816549069917e-06, "loss": 0.8984, "step": 11060 }, { "epoch": 0.13485186403909669, "grad_norm": 2.0706605911254883, "learning_rate": 4.553495830660681e-06, "loss": 0.9053, "step": 11065 }, { "epoch": 0.13491280026324448, "grad_norm": 1.895113229751587, "learning_rate": 4.553175112251444e-06, "loss": 0.8977, "step": 11070 }, { "epoch": 0.1349737364873923, "grad_norm": 2.5540530681610107, "learning_rate": 4.552854393842207e-06, "loss": 0.847, "step": 11075 }, { "epoch": 0.1350346727115401, "grad_norm": 2.503545045852661, "learning_rate": 4.5525336754329705e-06, "loss": 0.9331, "step": 11080 }, { "epoch": 0.13509560893568792, "grad_norm": 1.9108028411865234, "learning_rate": 4.5522129570237335e-06, "loss": 0.8756, "step": 11085 }, { "epoch": 0.13515654515983572, "grad_norm": 1.8041205406188965, "learning_rate": 4.551892238614497e-06, "loss": 0.8672, "step": 11090 }, { "epoch": 0.1352174813839835, "grad_norm": 2.459749221801758, "learning_rate": 4.5515715202052604e-06, "loss": 0.8152, "step": 11095 }, { "epoch": 0.13527841760813133, "grad_norm": 2.045231819152832, "learning_rate": 4.5512508017960234e-06, "loss": 0.8828, "step": 11100 }, { "epoch": 0.13533935383227913, "grad_norm": 1.8416621685028076, "learning_rate": 4.550930083386787e-06, "loss": 0.9481, "step": 11105 }, { "epoch": 0.13540029005642695, "grad_norm": 1.914109468460083, "learning_rate": 4.55060936497755e-06, "loss": 0.8282, "step": 11110 }, { "epoch": 0.13546122628057475, "grad_norm": 1.9585264921188354, "learning_rate": 4.550288646568313e-06, "loss": 0.8534, "step": 11115 }, { "epoch": 0.13552216250472257, "grad_norm": 2.066527843475342, "learning_rate": 4.549967928159077e-06, "loss": 0.9301, "step": 11120 }, { "epoch": 0.13558309872887037, "grad_norm": 1.8215771913528442, "learning_rate": 4.54964720974984e-06, "loss": 0.8575, "step": 11125 }, { "epoch": 0.13564403495301816, "grad_norm": 1.760231614112854, "learning_rate": 4.549326491340603e-06, "loss": 0.9201, "step": 11130 }, { "epoch": 0.13570497117716598, "grad_norm": 1.9671986103057861, "learning_rate": 4.549005772931366e-06, "loss": 0.9363, "step": 11135 }, { "epoch": 0.13576590740131378, "grad_norm": 1.872503399848938, "learning_rate": 4.54868505452213e-06, "loss": 0.8917, "step": 11140 }, { "epoch": 0.1358268436254616, "grad_norm": 2.01600980758667, "learning_rate": 4.548364336112893e-06, "loss": 0.8708, "step": 11145 }, { "epoch": 0.1358877798496094, "grad_norm": 2.20173716545105, "learning_rate": 4.548043617703656e-06, "loss": 0.8655, "step": 11150 }, { "epoch": 0.1359487160737572, "grad_norm": 1.942967414855957, "learning_rate": 4.54772289929442e-06, "loss": 0.966, "step": 11155 }, { "epoch": 0.13600965229790501, "grad_norm": 1.7923191785812378, "learning_rate": 4.547402180885183e-06, "loss": 0.9402, "step": 11160 }, { "epoch": 0.1360705885220528, "grad_norm": 2.2256860733032227, "learning_rate": 4.547081462475946e-06, "loss": 0.8967, "step": 11165 }, { "epoch": 0.13613152474620063, "grad_norm": 1.9957948923110962, "learning_rate": 4.54676074406671e-06, "loss": 0.9399, "step": 11170 }, { "epoch": 0.13619246097034843, "grad_norm": 2.4607994556427, "learning_rate": 4.546440025657473e-06, "loss": 0.8921, "step": 11175 }, { "epoch": 0.13625339719449625, "grad_norm": 1.8280293941497803, "learning_rate": 4.546119307248236e-06, "loss": 0.9394, "step": 11180 }, { "epoch": 0.13631433341864405, "grad_norm": 2.1024041175842285, "learning_rate": 4.545798588839e-06, "loss": 0.898, "step": 11185 }, { "epoch": 0.13637526964279184, "grad_norm": 1.9440009593963623, "learning_rate": 4.545477870429763e-06, "loss": 0.8757, "step": 11190 }, { "epoch": 0.13643620586693966, "grad_norm": 2.0209951400756836, "learning_rate": 4.545157152020526e-06, "loss": 0.9732, "step": 11195 }, { "epoch": 0.13649714209108746, "grad_norm": 2.10160493850708, "learning_rate": 4.54483643361129e-06, "loss": 0.8611, "step": 11200 }, { "epoch": 0.13655807831523528, "grad_norm": 2.077608346939087, "learning_rate": 4.544515715202053e-06, "loss": 0.9407, "step": 11205 }, { "epoch": 0.13661901453938308, "grad_norm": 2.079909324645996, "learning_rate": 4.544194996792817e-06, "loss": 0.93, "step": 11210 }, { "epoch": 0.1366799507635309, "grad_norm": 1.8923077583312988, "learning_rate": 4.54387427838358e-06, "loss": 0.8828, "step": 11215 }, { "epoch": 0.1367408869876787, "grad_norm": 1.8343544006347656, "learning_rate": 4.543553559974343e-06, "loss": 0.9411, "step": 11220 }, { "epoch": 0.1368018232118265, "grad_norm": 2.297642230987549, "learning_rate": 4.5432328415651065e-06, "loss": 0.8959, "step": 11225 }, { "epoch": 0.1368627594359743, "grad_norm": 1.8378589153289795, "learning_rate": 4.5429121231558695e-06, "loss": 0.8601, "step": 11230 }, { "epoch": 0.1369236956601221, "grad_norm": 1.8871139287948608, "learning_rate": 4.542591404746633e-06, "loss": 0.9017, "step": 11235 }, { "epoch": 0.13698463188426993, "grad_norm": 1.855952501296997, "learning_rate": 4.542270686337396e-06, "loss": 0.9308, "step": 11240 }, { "epoch": 0.13704556810841773, "grad_norm": 1.9085636138916016, "learning_rate": 4.541949967928159e-06, "loss": 0.975, "step": 11245 }, { "epoch": 0.13710650433256555, "grad_norm": 2.0218374729156494, "learning_rate": 4.541629249518923e-06, "loss": 0.8804, "step": 11250 }, { "epoch": 0.13716744055671334, "grad_norm": 2.0923972129821777, "learning_rate": 4.541308531109686e-06, "loss": 0.9283, "step": 11255 }, { "epoch": 0.13722837678086114, "grad_norm": 1.7546958923339844, "learning_rate": 4.540987812700449e-06, "loss": 0.9358, "step": 11260 }, { "epoch": 0.13728931300500896, "grad_norm": 1.792007327079773, "learning_rate": 4.540667094291213e-06, "loss": 0.887, "step": 11265 }, { "epoch": 0.13735024922915676, "grad_norm": 1.8876508474349976, "learning_rate": 4.540346375881976e-06, "loss": 0.8513, "step": 11270 }, { "epoch": 0.13741118545330458, "grad_norm": 2.0826122760772705, "learning_rate": 4.540025657472739e-06, "loss": 0.8975, "step": 11275 }, { "epoch": 0.13747212167745237, "grad_norm": 1.610658884048462, "learning_rate": 4.539704939063503e-06, "loss": 0.8501, "step": 11280 }, { "epoch": 0.1375330579016002, "grad_norm": 2.4561095237731934, "learning_rate": 4.539384220654266e-06, "loss": 0.9393, "step": 11285 }, { "epoch": 0.137593994125748, "grad_norm": 2.2532291412353516, "learning_rate": 4.539063502245029e-06, "loss": 0.9203, "step": 11290 }, { "epoch": 0.1376549303498958, "grad_norm": 2.490490198135376, "learning_rate": 4.538742783835793e-06, "loss": 0.8566, "step": 11295 }, { "epoch": 0.1377158665740436, "grad_norm": 2.149390459060669, "learning_rate": 4.538422065426556e-06, "loss": 0.9066, "step": 11300 }, { "epoch": 0.1377768027981914, "grad_norm": 1.9024229049682617, "learning_rate": 4.538101347017319e-06, "loss": 0.8438, "step": 11305 }, { "epoch": 0.13783773902233923, "grad_norm": 2.1433591842651367, "learning_rate": 4.537780628608082e-06, "loss": 0.8757, "step": 11310 }, { "epoch": 0.13789867524648702, "grad_norm": 2.7877962589263916, "learning_rate": 4.537459910198846e-06, "loss": 0.9852, "step": 11315 }, { "epoch": 0.13795961147063485, "grad_norm": 2.158759593963623, "learning_rate": 4.537139191789609e-06, "loss": 0.9119, "step": 11320 }, { "epoch": 0.13802054769478264, "grad_norm": 1.8767296075820923, "learning_rate": 4.536818473380372e-06, "loss": 0.966, "step": 11325 }, { "epoch": 0.13808148391893044, "grad_norm": 2.4517605304718018, "learning_rate": 4.536497754971136e-06, "loss": 0.9157, "step": 11330 }, { "epoch": 0.13814242014307826, "grad_norm": 2.048661708831787, "learning_rate": 4.536177036561899e-06, "loss": 0.846, "step": 11335 }, { "epoch": 0.13820335636722605, "grad_norm": 1.9558355808258057, "learning_rate": 4.535856318152662e-06, "loss": 0.9108, "step": 11340 }, { "epoch": 0.13826429259137388, "grad_norm": 2.157137632369995, "learning_rate": 4.535535599743426e-06, "loss": 0.8571, "step": 11345 }, { "epoch": 0.13832522881552167, "grad_norm": 1.827090859413147, "learning_rate": 4.535214881334189e-06, "loss": 0.8592, "step": 11350 }, { "epoch": 0.1383861650396695, "grad_norm": 1.930266261100769, "learning_rate": 4.534894162924952e-06, "loss": 0.9423, "step": 11355 }, { "epoch": 0.1384471012638173, "grad_norm": 2.1249477863311768, "learning_rate": 4.5345734445157156e-06, "loss": 0.9087, "step": 11360 }, { "epoch": 0.13850803748796509, "grad_norm": 2.1886279582977295, "learning_rate": 4.534252726106479e-06, "loss": 0.8775, "step": 11365 }, { "epoch": 0.1385689737121129, "grad_norm": 2.313760757446289, "learning_rate": 4.5339320076972424e-06, "loss": 0.8959, "step": 11370 }, { "epoch": 0.1386299099362607, "grad_norm": 2.2090981006622314, "learning_rate": 4.5336112892880055e-06, "loss": 0.8801, "step": 11375 }, { "epoch": 0.13869084616040853, "grad_norm": 2.1036107540130615, "learning_rate": 4.5332905708787685e-06, "loss": 0.8748, "step": 11380 }, { "epoch": 0.13875178238455632, "grad_norm": 2.2857401371002197, "learning_rate": 4.532969852469532e-06, "loss": 0.892, "step": 11385 }, { "epoch": 0.13881271860870412, "grad_norm": 2.0010054111480713, "learning_rate": 4.532649134060295e-06, "loss": 0.9062, "step": 11390 }, { "epoch": 0.13887365483285194, "grad_norm": 2.2825591564178467, "learning_rate": 4.532328415651059e-06, "loss": 0.9581, "step": 11395 }, { "epoch": 0.13893459105699973, "grad_norm": 2.228553056716919, "learning_rate": 4.532007697241822e-06, "loss": 0.8811, "step": 11400 }, { "epoch": 0.13899552728114756, "grad_norm": 2.1462652683258057, "learning_rate": 4.531686978832585e-06, "loss": 0.8652, "step": 11405 }, { "epoch": 0.13905646350529535, "grad_norm": 1.9893803596496582, "learning_rate": 4.531366260423349e-06, "loss": 0.8743, "step": 11410 }, { "epoch": 0.13911739972944318, "grad_norm": 1.785616159439087, "learning_rate": 4.531045542014112e-06, "loss": 0.8664, "step": 11415 }, { "epoch": 0.13917833595359097, "grad_norm": 1.8227107524871826, "learning_rate": 4.530724823604875e-06, "loss": 0.9391, "step": 11420 }, { "epoch": 0.13923927217773877, "grad_norm": 1.9868413209915161, "learning_rate": 4.530404105195639e-06, "loss": 0.9231, "step": 11425 }, { "epoch": 0.1393002084018866, "grad_norm": 2.3721041679382324, "learning_rate": 4.530083386786402e-06, "loss": 0.8727, "step": 11430 }, { "epoch": 0.13936114462603438, "grad_norm": 1.7456167936325073, "learning_rate": 4.529762668377165e-06, "loss": 0.8562, "step": 11435 }, { "epoch": 0.1394220808501822, "grad_norm": 1.7781016826629639, "learning_rate": 4.529441949967929e-06, "loss": 0.9546, "step": 11440 }, { "epoch": 0.13948301707433, "grad_norm": 2.068786144256592, "learning_rate": 4.529121231558692e-06, "loss": 0.9472, "step": 11445 }, { "epoch": 0.13954395329847782, "grad_norm": 1.7821404933929443, "learning_rate": 4.528800513149455e-06, "loss": 0.8884, "step": 11450 }, { "epoch": 0.13960488952262562, "grad_norm": 2.3958139419555664, "learning_rate": 4.528479794740219e-06, "loss": 0.9828, "step": 11455 }, { "epoch": 0.13966582574677341, "grad_norm": 1.9934587478637695, "learning_rate": 4.528159076330982e-06, "loss": 0.8332, "step": 11460 }, { "epoch": 0.13972676197092124, "grad_norm": 4.387547969818115, "learning_rate": 4.527838357921745e-06, "loss": 0.8626, "step": 11465 }, { "epoch": 0.13978769819506903, "grad_norm": 2.6124799251556396, "learning_rate": 4.527517639512508e-06, "loss": 0.9574, "step": 11470 }, { "epoch": 0.13984863441921686, "grad_norm": 1.8954194784164429, "learning_rate": 4.527196921103272e-06, "loss": 0.9045, "step": 11475 }, { "epoch": 0.13990957064336465, "grad_norm": 3.2373600006103516, "learning_rate": 4.526876202694035e-06, "loss": 0.8685, "step": 11480 }, { "epoch": 0.13997050686751247, "grad_norm": 1.7250827550888062, "learning_rate": 4.526555484284798e-06, "loss": 0.8666, "step": 11485 }, { "epoch": 0.14003144309166027, "grad_norm": 1.9607514142990112, "learning_rate": 4.526234765875562e-06, "loss": 0.9367, "step": 11490 }, { "epoch": 0.14009237931580806, "grad_norm": 2.051957130432129, "learning_rate": 4.525914047466325e-06, "loss": 0.8475, "step": 11495 }, { "epoch": 0.14015331553995589, "grad_norm": 1.8027212619781494, "learning_rate": 4.525593329057088e-06, "loss": 0.8074, "step": 11500 }, { "epoch": 0.14021425176410368, "grad_norm": 2.223456382751465, "learning_rate": 4.5252726106478515e-06, "loss": 0.8956, "step": 11505 }, { "epoch": 0.1402751879882515, "grad_norm": 1.9423025846481323, "learning_rate": 4.5249518922386145e-06, "loss": 1.0032, "step": 11510 }, { "epoch": 0.1403361242123993, "grad_norm": 2.022916316986084, "learning_rate": 4.524631173829378e-06, "loss": 0.9137, "step": 11515 }, { "epoch": 0.14039706043654712, "grad_norm": 2.101466655731201, "learning_rate": 4.524310455420141e-06, "loss": 0.8834, "step": 11520 }, { "epoch": 0.14045799666069492, "grad_norm": 2.1294448375701904, "learning_rate": 4.5239897370109044e-06, "loss": 0.8891, "step": 11525 }, { "epoch": 0.1405189328848427, "grad_norm": 2.4189491271972656, "learning_rate": 4.523669018601668e-06, "loss": 0.8764, "step": 11530 }, { "epoch": 0.14057986910899054, "grad_norm": 2.2126333713531494, "learning_rate": 4.523348300192431e-06, "loss": 0.8662, "step": 11535 }, { "epoch": 0.14064080533313833, "grad_norm": 2.2962872982025146, "learning_rate": 4.523027581783195e-06, "loss": 1.007, "step": 11540 }, { "epoch": 0.14070174155728615, "grad_norm": 1.873225212097168, "learning_rate": 4.522706863373958e-06, "loss": 0.9095, "step": 11545 }, { "epoch": 0.14076267778143395, "grad_norm": 2.2213969230651855, "learning_rate": 4.522386144964721e-06, "loss": 0.9263, "step": 11550 }, { "epoch": 0.14082361400558177, "grad_norm": 1.9922068119049072, "learning_rate": 4.522065426555485e-06, "loss": 0.8501, "step": 11555 }, { "epoch": 0.14088455022972957, "grad_norm": 1.9138869047164917, "learning_rate": 4.521744708146248e-06, "loss": 0.8962, "step": 11560 }, { "epoch": 0.14094548645387736, "grad_norm": 2.3415749073028564, "learning_rate": 4.521423989737011e-06, "loss": 0.9361, "step": 11565 }, { "epoch": 0.14100642267802518, "grad_norm": 2.258639097213745, "learning_rate": 4.521103271327775e-06, "loss": 0.8635, "step": 11570 }, { "epoch": 0.14106735890217298, "grad_norm": 2.221008539199829, "learning_rate": 4.520782552918538e-06, "loss": 0.8537, "step": 11575 }, { "epoch": 0.1411282951263208, "grad_norm": 2.4527781009674072, "learning_rate": 4.520461834509301e-06, "loss": 0.9088, "step": 11580 }, { "epoch": 0.1411892313504686, "grad_norm": 2.4124910831451416, "learning_rate": 4.520141116100065e-06, "loss": 0.8715, "step": 11585 }, { "epoch": 0.14125016757461642, "grad_norm": 2.0489165782928467, "learning_rate": 4.519820397690828e-06, "loss": 0.8969, "step": 11590 }, { "epoch": 0.14131110379876421, "grad_norm": 2.0533053874969482, "learning_rate": 4.519499679281591e-06, "loss": 0.9249, "step": 11595 }, { "epoch": 0.141372040022912, "grad_norm": 1.8418419361114502, "learning_rate": 4.519178960872355e-06, "loss": 0.87, "step": 11600 }, { "epoch": 0.14143297624705983, "grad_norm": 1.807851791381836, "learning_rate": 4.518858242463118e-06, "loss": 0.9302, "step": 11605 }, { "epoch": 0.14149391247120763, "grad_norm": 1.7578916549682617, "learning_rate": 4.518537524053881e-06, "loss": 0.8903, "step": 11610 }, { "epoch": 0.14155484869535545, "grad_norm": 2.3072426319122314, "learning_rate": 4.518216805644645e-06, "loss": 0.9579, "step": 11615 }, { "epoch": 0.14161578491950325, "grad_norm": 1.8019622564315796, "learning_rate": 4.517896087235408e-06, "loss": 0.8923, "step": 11620 }, { "epoch": 0.14167672114365104, "grad_norm": 1.6945208311080933, "learning_rate": 4.517575368826171e-06, "loss": 0.8478, "step": 11625 }, { "epoch": 0.14173765736779886, "grad_norm": 1.8469098806381226, "learning_rate": 4.5172546504169346e-06, "loss": 0.9204, "step": 11630 }, { "epoch": 0.14179859359194666, "grad_norm": 1.7624163627624512, "learning_rate": 4.516933932007698e-06, "loss": 0.8961, "step": 11635 }, { "epoch": 0.14185952981609448, "grad_norm": 1.9243414402008057, "learning_rate": 4.516613213598461e-06, "loss": 0.9027, "step": 11640 }, { "epoch": 0.14192046604024228, "grad_norm": 1.7335801124572754, "learning_rate": 4.516292495189224e-06, "loss": 0.9404, "step": 11645 }, { "epoch": 0.1419814022643901, "grad_norm": 1.7373439073562622, "learning_rate": 4.5159717767799875e-06, "loss": 0.8825, "step": 11650 }, { "epoch": 0.1420423384885379, "grad_norm": 1.8844243288040161, "learning_rate": 4.5156510583707505e-06, "loss": 0.9203, "step": 11655 }, { "epoch": 0.1421032747126857, "grad_norm": 2.1609556674957275, "learning_rate": 4.5153303399615135e-06, "loss": 0.9516, "step": 11660 }, { "epoch": 0.1421642109368335, "grad_norm": 2.1747241020202637, "learning_rate": 4.515009621552277e-06, "loss": 0.9617, "step": 11665 }, { "epoch": 0.1422251471609813, "grad_norm": 2.081458806991577, "learning_rate": 4.51468890314304e-06, "loss": 0.9216, "step": 11670 }, { "epoch": 0.14228608338512913, "grad_norm": 1.6698552370071411, "learning_rate": 4.514368184733804e-06, "loss": 0.8864, "step": 11675 }, { "epoch": 0.14234701960927693, "grad_norm": 1.6984364986419678, "learning_rate": 4.514047466324567e-06, "loss": 0.8976, "step": 11680 }, { "epoch": 0.14240795583342475, "grad_norm": 1.9442074298858643, "learning_rate": 4.513726747915331e-06, "loss": 0.9018, "step": 11685 }, { "epoch": 0.14246889205757254, "grad_norm": 1.8218365907669067, "learning_rate": 4.513406029506094e-06, "loss": 0.8531, "step": 11690 }, { "epoch": 0.14252982828172034, "grad_norm": 2.017460584640503, "learning_rate": 4.513085311096857e-06, "loss": 0.9867, "step": 11695 }, { "epoch": 0.14259076450586816, "grad_norm": 1.9991867542266846, "learning_rate": 4.512764592687621e-06, "loss": 0.9074, "step": 11700 }, { "epoch": 0.14265170073001596, "grad_norm": 2.067631721496582, "learning_rate": 4.512443874278384e-06, "loss": 0.9221, "step": 11705 }, { "epoch": 0.14271263695416378, "grad_norm": 2.2611403465270996, "learning_rate": 4.512123155869148e-06, "loss": 0.9224, "step": 11710 }, { "epoch": 0.14277357317831157, "grad_norm": 1.8376729488372803, "learning_rate": 4.511802437459911e-06, "loss": 0.9523, "step": 11715 }, { "epoch": 0.1428345094024594, "grad_norm": 2.030999183654785, "learning_rate": 4.511481719050674e-06, "loss": 0.8909, "step": 11720 }, { "epoch": 0.1428954456266072, "grad_norm": 2.0163583755493164, "learning_rate": 4.511161000641437e-06, "loss": 0.8628, "step": 11725 }, { "epoch": 0.142956381850755, "grad_norm": 1.9840691089630127, "learning_rate": 4.510840282232201e-06, "loss": 0.885, "step": 11730 }, { "epoch": 0.1430173180749028, "grad_norm": 2.0018177032470703, "learning_rate": 4.510519563822964e-06, "loss": 0.9718, "step": 11735 }, { "epoch": 0.1430782542990506, "grad_norm": 2.106200695037842, "learning_rate": 4.510198845413727e-06, "loss": 0.8294, "step": 11740 }, { "epoch": 0.14313919052319843, "grad_norm": 2.028585910797119, "learning_rate": 4.509878127004491e-06, "loss": 0.8769, "step": 11745 }, { "epoch": 0.14320012674734622, "grad_norm": 2.136833906173706, "learning_rate": 4.509557408595254e-06, "loss": 0.8664, "step": 11750 }, { "epoch": 0.14326106297149405, "grad_norm": 2.034344434738159, "learning_rate": 4.509236690186017e-06, "loss": 0.8757, "step": 11755 }, { "epoch": 0.14332199919564184, "grad_norm": 2.0221736431121826, "learning_rate": 4.508915971776781e-06, "loss": 0.9137, "step": 11760 }, { "epoch": 0.14338293541978964, "grad_norm": 2.187429666519165, "learning_rate": 4.508595253367544e-06, "loss": 0.9016, "step": 11765 }, { "epoch": 0.14344387164393746, "grad_norm": 2.1760001182556152, "learning_rate": 4.508274534958307e-06, "loss": 0.8722, "step": 11770 }, { "epoch": 0.14350480786808525, "grad_norm": 1.8828823566436768, "learning_rate": 4.5079538165490705e-06, "loss": 0.8765, "step": 11775 }, { "epoch": 0.14356574409223308, "grad_norm": 2.0758676528930664, "learning_rate": 4.5076330981398335e-06, "loss": 0.9082, "step": 11780 }, { "epoch": 0.14362668031638087, "grad_norm": 1.7921494245529175, "learning_rate": 4.5073123797305966e-06, "loss": 0.9331, "step": 11785 }, { "epoch": 0.1436876165405287, "grad_norm": 1.8475691080093384, "learning_rate": 4.50699166132136e-06, "loss": 0.9634, "step": 11790 }, { "epoch": 0.1437485527646765, "grad_norm": 1.8243975639343262, "learning_rate": 4.5066709429121234e-06, "loss": 0.9029, "step": 11795 }, { "epoch": 0.14380948898882429, "grad_norm": 2.068110942840576, "learning_rate": 4.5063502245028865e-06, "loss": 0.9375, "step": 11800 }, { "epoch": 0.1438704252129721, "grad_norm": 2.1258742809295654, "learning_rate": 4.50602950609365e-06, "loss": 0.8658, "step": 11805 }, { "epoch": 0.1439313614371199, "grad_norm": 2.3252310752868652, "learning_rate": 4.505708787684413e-06, "loss": 0.9149, "step": 11810 }, { "epoch": 0.14399229766126773, "grad_norm": 2.074617862701416, "learning_rate": 4.505388069275176e-06, "loss": 0.9535, "step": 11815 }, { "epoch": 0.14405323388541552, "grad_norm": 1.9651223421096802, "learning_rate": 4.50506735086594e-06, "loss": 0.9161, "step": 11820 }, { "epoch": 0.14411417010956334, "grad_norm": 1.853670358657837, "learning_rate": 4.504746632456703e-06, "loss": 0.8646, "step": 11825 }, { "epoch": 0.14417510633371114, "grad_norm": 2.133535146713257, "learning_rate": 4.504425914047466e-06, "loss": 0.9326, "step": 11830 }, { "epoch": 0.14423604255785893, "grad_norm": 2.3890318870544434, "learning_rate": 4.50410519563823e-06, "loss": 0.9162, "step": 11835 }, { "epoch": 0.14429697878200676, "grad_norm": 2.101842164993286, "learning_rate": 4.503784477228993e-06, "loss": 0.8841, "step": 11840 }, { "epoch": 0.14435791500615455, "grad_norm": 1.9698723554611206, "learning_rate": 4.503463758819757e-06, "loss": 0.9474, "step": 11845 }, { "epoch": 0.14441885123030238, "grad_norm": 1.7082607746124268, "learning_rate": 4.50314304041052e-06, "loss": 0.9055, "step": 11850 }, { "epoch": 0.14447978745445017, "grad_norm": 1.943181037902832, "learning_rate": 4.502822322001283e-06, "loss": 0.8992, "step": 11855 }, { "epoch": 0.144540723678598, "grad_norm": 2.0118138790130615, "learning_rate": 4.502501603592047e-06, "loss": 0.9565, "step": 11860 }, { "epoch": 0.1446016599027458, "grad_norm": 1.8016670942306519, "learning_rate": 4.50218088518281e-06, "loss": 0.9817, "step": 11865 }, { "epoch": 0.14466259612689358, "grad_norm": 2.424530506134033, "learning_rate": 4.501860166773574e-06, "loss": 0.8271, "step": 11870 }, { "epoch": 0.1447235323510414, "grad_norm": 1.9361528158187866, "learning_rate": 4.501539448364337e-06, "loss": 0.9286, "step": 11875 }, { "epoch": 0.1447844685751892, "grad_norm": 2.1488070487976074, "learning_rate": 4.5012187299551e-06, "loss": 0.9799, "step": 11880 }, { "epoch": 0.14484540479933702, "grad_norm": 2.1138381958007812, "learning_rate": 4.500898011545864e-06, "loss": 0.8863, "step": 11885 }, { "epoch": 0.14490634102348482, "grad_norm": 2.0736801624298096, "learning_rate": 4.500577293136627e-06, "loss": 0.833, "step": 11890 }, { "epoch": 0.14496727724763261, "grad_norm": 2.762144088745117, "learning_rate": 4.50025657472739e-06, "loss": 0.9101, "step": 11895 }, { "epoch": 0.14502821347178044, "grad_norm": 2.461534023284912, "learning_rate": 4.499935856318153e-06, "loss": 0.9612, "step": 11900 }, { "epoch": 0.14508914969592823, "grad_norm": 2.268815040588379, "learning_rate": 4.499615137908917e-06, "loss": 0.8925, "step": 11905 }, { "epoch": 0.14515008592007606, "grad_norm": 2.003140687942505, "learning_rate": 4.49929441949968e-06, "loss": 0.8913, "step": 11910 }, { "epoch": 0.14521102214422385, "grad_norm": 2.2459640502929688, "learning_rate": 4.498973701090443e-06, "loss": 0.9533, "step": 11915 }, { "epoch": 0.14527195836837167, "grad_norm": 2.1300148963928223, "learning_rate": 4.4986529826812065e-06, "loss": 0.8842, "step": 11920 }, { "epoch": 0.14533289459251947, "grad_norm": 2.017286539077759, "learning_rate": 4.4983322642719695e-06, "loss": 0.8924, "step": 11925 }, { "epoch": 0.14539383081666726, "grad_norm": 1.7263686656951904, "learning_rate": 4.4980115458627325e-06, "loss": 0.8877, "step": 11930 }, { "epoch": 0.14545476704081509, "grad_norm": 2.003056287765503, "learning_rate": 4.497690827453496e-06, "loss": 0.9509, "step": 11935 }, { "epoch": 0.14551570326496288, "grad_norm": 2.1295902729034424, "learning_rate": 4.497370109044259e-06, "loss": 0.9059, "step": 11940 }, { "epoch": 0.1455766394891107, "grad_norm": 2.2921247482299805, "learning_rate": 4.497049390635022e-06, "loss": 0.859, "step": 11945 }, { "epoch": 0.1456375757132585, "grad_norm": 2.0509984493255615, "learning_rate": 4.496728672225786e-06, "loss": 0.8579, "step": 11950 }, { "epoch": 0.14569851193740632, "grad_norm": 1.9396469593048096, "learning_rate": 4.496407953816549e-06, "loss": 0.9053, "step": 11955 }, { "epoch": 0.14575944816155412, "grad_norm": 1.8891383409500122, "learning_rate": 4.496087235407312e-06, "loss": 0.9026, "step": 11960 }, { "epoch": 0.1458203843857019, "grad_norm": 1.8050267696380615, "learning_rate": 4.495766516998076e-06, "loss": 0.9226, "step": 11965 }, { "epoch": 0.14588132060984974, "grad_norm": 1.7008187770843506, "learning_rate": 4.495445798588839e-06, "loss": 0.8021, "step": 11970 }, { "epoch": 0.14594225683399753, "grad_norm": 2.100059986114502, "learning_rate": 4.495125080179602e-06, "loss": 0.9059, "step": 11975 }, { "epoch": 0.14600319305814535, "grad_norm": 1.979010820388794, "learning_rate": 4.494804361770366e-06, "loss": 0.8941, "step": 11980 }, { "epoch": 0.14606412928229315, "grad_norm": 1.9073933362960815, "learning_rate": 4.494483643361129e-06, "loss": 0.8757, "step": 11985 }, { "epoch": 0.14612506550644097, "grad_norm": 2.1527605056762695, "learning_rate": 4.494162924951893e-06, "loss": 0.9464, "step": 11990 }, { "epoch": 0.14618600173058877, "grad_norm": 1.8236204385757446, "learning_rate": 4.493842206542656e-06, "loss": 0.855, "step": 11995 }, { "epoch": 0.14624693795473656, "grad_norm": 1.9507861137390137, "learning_rate": 4.493521488133419e-06, "loss": 0.9219, "step": 12000 }, { "epoch": 0.14630787417888438, "grad_norm": 1.8762195110321045, "learning_rate": 4.493200769724183e-06, "loss": 0.9133, "step": 12005 }, { "epoch": 0.14636881040303218, "grad_norm": 1.8616288900375366, "learning_rate": 4.492880051314946e-06, "loss": 0.9291, "step": 12010 }, { "epoch": 0.14642974662718, "grad_norm": 1.9393646717071533, "learning_rate": 4.49255933290571e-06, "loss": 0.9143, "step": 12015 }, { "epoch": 0.1464906828513278, "grad_norm": 2.0224449634552, "learning_rate": 4.492238614496473e-06, "loss": 1.0007, "step": 12020 }, { "epoch": 0.14655161907547562, "grad_norm": 2.4222195148468018, "learning_rate": 4.491917896087236e-06, "loss": 0.9173, "step": 12025 }, { "epoch": 0.14661255529962341, "grad_norm": 1.7083704471588135, "learning_rate": 4.491597177678e-06, "loss": 0.9033, "step": 12030 }, { "epoch": 0.1466734915237712, "grad_norm": 2.4970853328704834, "learning_rate": 4.491276459268763e-06, "loss": 0.9203, "step": 12035 }, { "epoch": 0.14673442774791903, "grad_norm": 1.7506924867630005, "learning_rate": 4.490955740859526e-06, "loss": 0.8784, "step": 12040 }, { "epoch": 0.14679536397206683, "grad_norm": 1.9779196977615356, "learning_rate": 4.4906350224502895e-06, "loss": 0.89, "step": 12045 }, { "epoch": 0.14685630019621465, "grad_norm": 2.3152754306793213, "learning_rate": 4.4903143040410525e-06, "loss": 0.8852, "step": 12050 }, { "epoch": 0.14691723642036245, "grad_norm": 1.640816569328308, "learning_rate": 4.4899935856318156e-06, "loss": 0.9259, "step": 12055 }, { "epoch": 0.14697817264451027, "grad_norm": 2.32369065284729, "learning_rate": 4.4896728672225786e-06, "loss": 0.9366, "step": 12060 }, { "epoch": 0.14703910886865806, "grad_norm": 2.050210952758789, "learning_rate": 4.4893521488133424e-06, "loss": 0.9335, "step": 12065 }, { "epoch": 0.14710004509280586, "grad_norm": 2.136122941970825, "learning_rate": 4.4890314304041055e-06, "loss": 0.9699, "step": 12070 }, { "epoch": 0.14716098131695368, "grad_norm": 2.037126064300537, "learning_rate": 4.4887107119948685e-06, "loss": 0.8453, "step": 12075 }, { "epoch": 0.14722191754110148, "grad_norm": 1.8807004690170288, "learning_rate": 4.488389993585632e-06, "loss": 0.94, "step": 12080 }, { "epoch": 0.1472828537652493, "grad_norm": 2.5611422061920166, "learning_rate": 4.488069275176395e-06, "loss": 0.9125, "step": 12085 }, { "epoch": 0.1473437899893971, "grad_norm": 1.8185346126556396, "learning_rate": 4.487748556767158e-06, "loss": 0.8724, "step": 12090 }, { "epoch": 0.14740472621354492, "grad_norm": 1.8695449829101562, "learning_rate": 4.487427838357922e-06, "loss": 0.9102, "step": 12095 }, { "epoch": 0.1474656624376927, "grad_norm": 2.01065731048584, "learning_rate": 4.487107119948685e-06, "loss": 0.7873, "step": 12100 }, { "epoch": 0.1475265986618405, "grad_norm": 1.7602137327194214, "learning_rate": 4.486786401539448e-06, "loss": 0.899, "step": 12105 }, { "epoch": 0.14758753488598833, "grad_norm": 2.21616792678833, "learning_rate": 4.486465683130212e-06, "loss": 0.9099, "step": 12110 }, { "epoch": 0.14764847111013613, "grad_norm": 2.2493176460266113, "learning_rate": 4.486144964720975e-06, "loss": 0.9057, "step": 12115 }, { "epoch": 0.14770940733428395, "grad_norm": 2.551973581314087, "learning_rate": 4.485824246311738e-06, "loss": 0.9651, "step": 12120 }, { "epoch": 0.14777034355843174, "grad_norm": 2.1503779888153076, "learning_rate": 4.485503527902502e-06, "loss": 0.9308, "step": 12125 }, { "epoch": 0.14783127978257954, "grad_norm": 1.960574746131897, "learning_rate": 4.485182809493265e-06, "loss": 0.8661, "step": 12130 }, { "epoch": 0.14789221600672736, "grad_norm": 2.180424928665161, "learning_rate": 4.484862091084028e-06, "loss": 0.9174, "step": 12135 }, { "epoch": 0.14795315223087516, "grad_norm": 2.1777257919311523, "learning_rate": 4.484541372674792e-06, "loss": 0.8947, "step": 12140 }, { "epoch": 0.14801408845502298, "grad_norm": 2.000802516937256, "learning_rate": 4.484220654265555e-06, "loss": 0.979, "step": 12145 }, { "epoch": 0.14807502467917077, "grad_norm": 1.8199243545532227, "learning_rate": 4.483899935856319e-06, "loss": 0.8508, "step": 12150 }, { "epoch": 0.1481359609033186, "grad_norm": 1.7631088495254517, "learning_rate": 4.483579217447082e-06, "loss": 0.8717, "step": 12155 }, { "epoch": 0.1481968971274664, "grad_norm": 2.1156256198883057, "learning_rate": 4.483258499037846e-06, "loss": 0.8522, "step": 12160 }, { "epoch": 0.1482578333516142, "grad_norm": 1.7773492336273193, "learning_rate": 4.482937780628609e-06, "loss": 0.8433, "step": 12165 }, { "epoch": 0.148318769575762, "grad_norm": 2.022413492202759, "learning_rate": 4.482617062219372e-06, "loss": 0.9254, "step": 12170 }, { "epoch": 0.1483797057999098, "grad_norm": 1.8046315908432007, "learning_rate": 4.482296343810136e-06, "loss": 0.8981, "step": 12175 }, { "epoch": 0.14844064202405763, "grad_norm": 1.9282829761505127, "learning_rate": 4.481975625400899e-06, "loss": 0.9216, "step": 12180 }, { "epoch": 0.14850157824820542, "grad_norm": 2.5018296241760254, "learning_rate": 4.481654906991662e-06, "loss": 0.8898, "step": 12185 }, { "epoch": 0.14856251447235325, "grad_norm": 2.1709258556365967, "learning_rate": 4.4813341885824255e-06, "loss": 0.9242, "step": 12190 }, { "epoch": 0.14862345069650104, "grad_norm": 1.961349368095398, "learning_rate": 4.4810134701731885e-06, "loss": 0.9313, "step": 12195 }, { "epoch": 0.14868438692064884, "grad_norm": 1.880629539489746, "learning_rate": 4.4806927517639515e-06, "loss": 0.9482, "step": 12200 }, { "epoch": 0.14874532314479666, "grad_norm": 1.9058594703674316, "learning_rate": 4.480372033354715e-06, "loss": 0.8732, "step": 12205 }, { "epoch": 0.14880625936894445, "grad_norm": 2.393663167953491, "learning_rate": 4.480051314945478e-06, "loss": 1.0119, "step": 12210 }, { "epoch": 0.14886719559309228, "grad_norm": 2.6637020111083984, "learning_rate": 4.479730596536241e-06, "loss": 0.8358, "step": 12215 }, { "epoch": 0.14892813181724007, "grad_norm": 1.7984604835510254, "learning_rate": 4.479409878127005e-06, "loss": 0.9216, "step": 12220 }, { "epoch": 0.1489890680413879, "grad_norm": 2.392307758331299, "learning_rate": 4.479089159717768e-06, "loss": 0.8956, "step": 12225 }, { "epoch": 0.1490500042655357, "grad_norm": 2.1435294151306152, "learning_rate": 4.478768441308531e-06, "loss": 0.9509, "step": 12230 }, { "epoch": 0.14911094048968349, "grad_norm": 2.068690538406372, "learning_rate": 4.478447722899294e-06, "loss": 0.8678, "step": 12235 }, { "epoch": 0.1491718767138313, "grad_norm": 1.6876744031906128, "learning_rate": 4.478127004490058e-06, "loss": 0.9335, "step": 12240 }, { "epoch": 0.1492328129379791, "grad_norm": 2.1443448066711426, "learning_rate": 4.477806286080821e-06, "loss": 0.9274, "step": 12245 }, { "epoch": 0.14929374916212693, "grad_norm": 2.2647106647491455, "learning_rate": 4.477485567671584e-06, "loss": 0.9505, "step": 12250 }, { "epoch": 0.14935468538627472, "grad_norm": 2.248072385787964, "learning_rate": 4.477164849262348e-06, "loss": 0.9341, "step": 12255 }, { "epoch": 0.14941562161042254, "grad_norm": 1.9491335153579712, "learning_rate": 4.476844130853111e-06, "loss": 0.896, "step": 12260 }, { "epoch": 0.14947655783457034, "grad_norm": 2.0580875873565674, "learning_rate": 4.476523412443874e-06, "loss": 0.9432, "step": 12265 }, { "epoch": 0.14953749405871813, "grad_norm": 2.154552936553955, "learning_rate": 4.476202694034638e-06, "loss": 0.9449, "step": 12270 }, { "epoch": 0.14959843028286596, "grad_norm": 2.172971487045288, "learning_rate": 4.475881975625401e-06, "loss": 0.8903, "step": 12275 }, { "epoch": 0.14965936650701375, "grad_norm": 2.18989634513855, "learning_rate": 4.475561257216164e-06, "loss": 0.8811, "step": 12280 }, { "epoch": 0.14972030273116158, "grad_norm": 1.6883372068405151, "learning_rate": 4.475240538806928e-06, "loss": 0.9377, "step": 12285 }, { "epoch": 0.14978123895530937, "grad_norm": 2.154898166656494, "learning_rate": 4.474919820397691e-06, "loss": 1.0003, "step": 12290 }, { "epoch": 0.1498421751794572, "grad_norm": 1.9167015552520752, "learning_rate": 4.474599101988455e-06, "loss": 0.9387, "step": 12295 }, { "epoch": 0.149903111403605, "grad_norm": 2.7232425212860107, "learning_rate": 4.474278383579218e-06, "loss": 0.8961, "step": 12300 }, { "epoch": 0.14996404762775278, "grad_norm": 1.9399585723876953, "learning_rate": 4.473957665169981e-06, "loss": 0.8547, "step": 12305 }, { "epoch": 0.1500249838519006, "grad_norm": 1.802251935005188, "learning_rate": 4.473636946760745e-06, "loss": 0.9143, "step": 12310 }, { "epoch": 0.1500859200760484, "grad_norm": 1.941295862197876, "learning_rate": 4.473316228351508e-06, "loss": 0.8707, "step": 12315 }, { "epoch": 0.15014685630019622, "grad_norm": 2.443153142929077, "learning_rate": 4.4729955099422715e-06, "loss": 0.9224, "step": 12320 }, { "epoch": 0.15020779252434402, "grad_norm": 2.024514675140381, "learning_rate": 4.4726747915330346e-06, "loss": 0.9625, "step": 12325 }, { "epoch": 0.15026872874849184, "grad_norm": 2.072205066680908, "learning_rate": 4.4723540731237976e-06, "loss": 0.9132, "step": 12330 }, { "epoch": 0.15032966497263964, "grad_norm": 2.3688700199127197, "learning_rate": 4.4720333547145614e-06, "loss": 0.9875, "step": 12335 }, { "epoch": 0.15039060119678743, "grad_norm": 1.9264559745788574, "learning_rate": 4.4717126363053245e-06, "loss": 0.897, "step": 12340 }, { "epoch": 0.15045153742093526, "grad_norm": 1.9229706525802612, "learning_rate": 4.4713919178960875e-06, "loss": 0.9933, "step": 12345 }, { "epoch": 0.15051247364508305, "grad_norm": 2.357637882232666, "learning_rate": 4.471071199486851e-06, "loss": 0.9698, "step": 12350 }, { "epoch": 0.15057340986923087, "grad_norm": 2.0464227199554443, "learning_rate": 4.470750481077614e-06, "loss": 0.9704, "step": 12355 }, { "epoch": 0.15063434609337867, "grad_norm": 2.0374608039855957, "learning_rate": 4.470429762668377e-06, "loss": 0.9308, "step": 12360 }, { "epoch": 0.15069528231752646, "grad_norm": 1.9458587169647217, "learning_rate": 4.470109044259141e-06, "loss": 0.9306, "step": 12365 }, { "epoch": 0.1507562185416743, "grad_norm": 1.7650985717773438, "learning_rate": 4.469788325849904e-06, "loss": 0.9088, "step": 12370 }, { "epoch": 0.15081715476582208, "grad_norm": 2.0300145149230957, "learning_rate": 4.469467607440667e-06, "loss": 0.9658, "step": 12375 }, { "epoch": 0.1508780909899699, "grad_norm": 2.0113697052001953, "learning_rate": 4.469146889031431e-06, "loss": 0.8718, "step": 12380 }, { "epoch": 0.1509390272141177, "grad_norm": 2.038106679916382, "learning_rate": 4.468826170622194e-06, "loss": 0.9335, "step": 12385 }, { "epoch": 0.15099996343826552, "grad_norm": 2.475511312484741, "learning_rate": 4.468505452212957e-06, "loss": 0.9542, "step": 12390 }, { "epoch": 0.15106089966241332, "grad_norm": 2.2349541187286377, "learning_rate": 4.468184733803721e-06, "loss": 0.9523, "step": 12395 }, { "epoch": 0.1511218358865611, "grad_norm": 2.013237237930298, "learning_rate": 4.467864015394484e-06, "loss": 0.8802, "step": 12400 }, { "epoch": 0.15118277211070894, "grad_norm": 2.1995720863342285, "learning_rate": 4.467543296985247e-06, "loss": 0.8587, "step": 12405 }, { "epoch": 0.15124370833485673, "grad_norm": 2.2396745681762695, "learning_rate": 4.46722257857601e-06, "loss": 0.9294, "step": 12410 }, { "epoch": 0.15130464455900455, "grad_norm": 1.7435734272003174, "learning_rate": 4.466901860166774e-06, "loss": 0.9487, "step": 12415 }, { "epoch": 0.15136558078315235, "grad_norm": 1.7629557847976685, "learning_rate": 4.466581141757537e-06, "loss": 0.8944, "step": 12420 }, { "epoch": 0.15142651700730017, "grad_norm": 2.1919784545898438, "learning_rate": 4.4662604233483e-06, "loss": 0.9026, "step": 12425 }, { "epoch": 0.15148745323144797, "grad_norm": 1.893325924873352, "learning_rate": 4.465939704939064e-06, "loss": 0.9362, "step": 12430 }, { "epoch": 0.15154838945559576, "grad_norm": 2.167599678039551, "learning_rate": 4.465618986529827e-06, "loss": 0.9109, "step": 12435 }, { "epoch": 0.15160932567974358, "grad_norm": 2.314523935317993, "learning_rate": 4.465298268120591e-06, "loss": 0.8991, "step": 12440 }, { "epoch": 0.15167026190389138, "grad_norm": 2.3430655002593994, "learning_rate": 4.464977549711354e-06, "loss": 0.8946, "step": 12445 }, { "epoch": 0.1517311981280392, "grad_norm": 1.8353437185287476, "learning_rate": 4.464656831302117e-06, "loss": 0.8895, "step": 12450 }, { "epoch": 0.151792134352187, "grad_norm": 2.0335958003997803, "learning_rate": 4.464336112892881e-06, "loss": 0.8622, "step": 12455 }, { "epoch": 0.15185307057633482, "grad_norm": 2.0159318447113037, "learning_rate": 4.464015394483644e-06, "loss": 0.8224, "step": 12460 }, { "epoch": 0.15191400680048261, "grad_norm": 1.9939388036727905, "learning_rate": 4.4636946760744075e-06, "loss": 0.9505, "step": 12465 }, { "epoch": 0.1519749430246304, "grad_norm": 2.0186820030212402, "learning_rate": 4.4633739576651705e-06, "loss": 0.856, "step": 12470 }, { "epoch": 0.15203587924877823, "grad_norm": 1.8309909105300903, "learning_rate": 4.4630532392559335e-06, "loss": 0.8442, "step": 12475 }, { "epoch": 0.15209681547292603, "grad_norm": 1.9378199577331543, "learning_rate": 4.462732520846697e-06, "loss": 0.8777, "step": 12480 }, { "epoch": 0.15215775169707385, "grad_norm": 2.0855636596679688, "learning_rate": 4.46241180243746e-06, "loss": 0.8935, "step": 12485 }, { "epoch": 0.15221868792122165, "grad_norm": 1.9208049774169922, "learning_rate": 4.4620910840282234e-06, "loss": 0.8845, "step": 12490 }, { "epoch": 0.15227962414536947, "grad_norm": 2.335836410522461, "learning_rate": 4.461770365618987e-06, "loss": 0.8845, "step": 12495 }, { "epoch": 0.15234056036951726, "grad_norm": 1.9535752534866333, "learning_rate": 4.46144964720975e-06, "loss": 0.9075, "step": 12500 }, { "epoch": 0.15240149659366506, "grad_norm": 1.795290231704712, "learning_rate": 4.461128928800513e-06, "loss": 0.8796, "step": 12505 }, { "epoch": 0.15246243281781288, "grad_norm": 2.1547250747680664, "learning_rate": 4.460808210391277e-06, "loss": 0.9219, "step": 12510 }, { "epoch": 0.15252336904196068, "grad_norm": 2.0618226528167725, "learning_rate": 4.46048749198204e-06, "loss": 0.8436, "step": 12515 }, { "epoch": 0.1525843052661085, "grad_norm": 2.1548054218292236, "learning_rate": 4.460166773572803e-06, "loss": 0.8712, "step": 12520 }, { "epoch": 0.1526452414902563, "grad_norm": 1.7413454055786133, "learning_rate": 4.459846055163567e-06, "loss": 0.8721, "step": 12525 }, { "epoch": 0.15270617771440412, "grad_norm": 1.7952531576156616, "learning_rate": 4.45952533675433e-06, "loss": 0.9168, "step": 12530 }, { "epoch": 0.1527671139385519, "grad_norm": 2.120164394378662, "learning_rate": 4.459204618345093e-06, "loss": 0.8708, "step": 12535 }, { "epoch": 0.1528280501626997, "grad_norm": 1.8343178033828735, "learning_rate": 4.458883899935857e-06, "loss": 0.9284, "step": 12540 }, { "epoch": 0.15288898638684753, "grad_norm": 1.8672147989273071, "learning_rate": 4.45856318152662e-06, "loss": 0.9301, "step": 12545 }, { "epoch": 0.15294992261099533, "grad_norm": 2.7653098106384277, "learning_rate": 4.458242463117383e-06, "loss": 0.9621, "step": 12550 }, { "epoch": 0.15301085883514315, "grad_norm": 1.9020518064498901, "learning_rate": 4.457921744708147e-06, "loss": 0.9216, "step": 12555 }, { "epoch": 0.15307179505929094, "grad_norm": 2.5016398429870605, "learning_rate": 4.45760102629891e-06, "loss": 0.9731, "step": 12560 }, { "epoch": 0.15313273128343877, "grad_norm": 2.106794834136963, "learning_rate": 4.457280307889673e-06, "loss": 0.9112, "step": 12565 }, { "epoch": 0.15319366750758656, "grad_norm": 1.5890967845916748, "learning_rate": 4.456959589480436e-06, "loss": 0.8506, "step": 12570 }, { "epoch": 0.15325460373173436, "grad_norm": 1.9062610864639282, "learning_rate": 4.4566388710712e-06, "loss": 0.9311, "step": 12575 }, { "epoch": 0.15331553995588218, "grad_norm": 1.9599055051803589, "learning_rate": 4.456318152661963e-06, "loss": 0.8104, "step": 12580 }, { "epoch": 0.15337647618002997, "grad_norm": 1.9014354944229126, "learning_rate": 4.455997434252726e-06, "loss": 0.8657, "step": 12585 }, { "epoch": 0.1534374124041778, "grad_norm": 2.284214735031128, "learning_rate": 4.45567671584349e-06, "loss": 0.9112, "step": 12590 }, { "epoch": 0.1534983486283256, "grad_norm": 2.0175838470458984, "learning_rate": 4.455355997434253e-06, "loss": 0.9321, "step": 12595 }, { "epoch": 0.1535592848524734, "grad_norm": 1.8507925271987915, "learning_rate": 4.4550352790250166e-06, "loss": 0.8813, "step": 12600 }, { "epoch": 0.1536202210766212, "grad_norm": 2.0666072368621826, "learning_rate": 4.45471456061578e-06, "loss": 0.8782, "step": 12605 }, { "epoch": 0.153681157300769, "grad_norm": 2.658525228500366, "learning_rate": 4.4543938422065435e-06, "loss": 0.9504, "step": 12610 }, { "epoch": 0.15374209352491683, "grad_norm": 1.7269692420959473, "learning_rate": 4.4540731237973065e-06, "loss": 0.9278, "step": 12615 }, { "epoch": 0.15380302974906462, "grad_norm": 1.9772682189941406, "learning_rate": 4.4537524053880695e-06, "loss": 0.8112, "step": 12620 }, { "epoch": 0.15386396597321245, "grad_norm": 2.009208917617798, "learning_rate": 4.453431686978833e-06, "loss": 0.8783, "step": 12625 }, { "epoch": 0.15392490219736024, "grad_norm": 2.4968104362487793, "learning_rate": 4.453110968569596e-06, "loss": 0.9348, "step": 12630 }, { "epoch": 0.15398583842150804, "grad_norm": 1.80270516872406, "learning_rate": 4.45279025016036e-06, "loss": 0.8566, "step": 12635 }, { "epoch": 0.15404677464565586, "grad_norm": 2.5490870475769043, "learning_rate": 4.452469531751123e-06, "loss": 0.93, "step": 12640 }, { "epoch": 0.15410771086980365, "grad_norm": 2.2872235774993896, "learning_rate": 4.452148813341886e-06, "loss": 0.8967, "step": 12645 }, { "epoch": 0.15416864709395148, "grad_norm": 2.008255958557129, "learning_rate": 4.451828094932649e-06, "loss": 0.8488, "step": 12650 }, { "epoch": 0.15422958331809927, "grad_norm": 2.126443862915039, "learning_rate": 4.451507376523413e-06, "loss": 0.912, "step": 12655 }, { "epoch": 0.1542905195422471, "grad_norm": 1.958229422569275, "learning_rate": 4.451186658114176e-06, "loss": 0.9185, "step": 12660 }, { "epoch": 0.1543514557663949, "grad_norm": 1.7701892852783203, "learning_rate": 4.450865939704939e-06, "loss": 0.932, "step": 12665 }, { "epoch": 0.15441239199054269, "grad_norm": 1.9511048793792725, "learning_rate": 4.450545221295703e-06, "loss": 0.985, "step": 12670 }, { "epoch": 0.1544733282146905, "grad_norm": 1.986346960067749, "learning_rate": 4.450224502886466e-06, "loss": 0.9143, "step": 12675 }, { "epoch": 0.1545342644388383, "grad_norm": 2.5432755947113037, "learning_rate": 4.449903784477229e-06, "loss": 0.9057, "step": 12680 }, { "epoch": 0.15459520066298613, "grad_norm": 2.0213611125946045, "learning_rate": 4.449583066067993e-06, "loss": 0.8557, "step": 12685 }, { "epoch": 0.15465613688713392, "grad_norm": 1.7099997997283936, "learning_rate": 4.449262347658756e-06, "loss": 0.8108, "step": 12690 }, { "epoch": 0.15471707311128174, "grad_norm": 1.8059453964233398, "learning_rate": 4.448941629249519e-06, "loss": 0.9192, "step": 12695 }, { "epoch": 0.15477800933542954, "grad_norm": 2.1526594161987305, "learning_rate": 4.448620910840283e-06, "loss": 0.9671, "step": 12700 }, { "epoch": 0.15483894555957733, "grad_norm": 1.6910039186477661, "learning_rate": 4.448300192431046e-06, "loss": 0.8724, "step": 12705 }, { "epoch": 0.15489988178372516, "grad_norm": 2.96203875541687, "learning_rate": 4.447979474021809e-06, "loss": 0.9452, "step": 12710 }, { "epoch": 0.15496081800787295, "grad_norm": 3.36933970451355, "learning_rate": 4.447658755612573e-06, "loss": 0.9471, "step": 12715 }, { "epoch": 0.15502175423202078, "grad_norm": 1.8891552686691284, "learning_rate": 4.447338037203336e-06, "loss": 0.8738, "step": 12720 }, { "epoch": 0.15508269045616857, "grad_norm": 2.0337140560150146, "learning_rate": 4.447017318794099e-06, "loss": 0.8756, "step": 12725 }, { "epoch": 0.1551436266803164, "grad_norm": 1.747525930404663, "learning_rate": 4.446696600384863e-06, "loss": 0.8126, "step": 12730 }, { "epoch": 0.1552045629044642, "grad_norm": 1.8465255498886108, "learning_rate": 4.446375881975626e-06, "loss": 0.8907, "step": 12735 }, { "epoch": 0.15526549912861198, "grad_norm": 2.098773241043091, "learning_rate": 4.446055163566389e-06, "loss": 0.9159, "step": 12740 }, { "epoch": 0.1553264353527598, "grad_norm": 2.0312016010284424, "learning_rate": 4.4457344451571525e-06, "loss": 0.9098, "step": 12745 }, { "epoch": 0.1553873715769076, "grad_norm": 2.237990140914917, "learning_rate": 4.4454137267479155e-06, "loss": 0.8804, "step": 12750 }, { "epoch": 0.15544830780105542, "grad_norm": 1.6829248666763306, "learning_rate": 4.4450930083386786e-06, "loss": 0.8573, "step": 12755 }, { "epoch": 0.15550924402520322, "grad_norm": 2.180382490158081, "learning_rate": 4.4447722899294424e-06, "loss": 0.8217, "step": 12760 }, { "epoch": 0.15557018024935104, "grad_norm": 2.0748305320739746, "learning_rate": 4.4444515715202054e-06, "loss": 0.9868, "step": 12765 }, { "epoch": 0.15563111647349884, "grad_norm": 2.163673162460327, "learning_rate": 4.444130853110969e-06, "loss": 0.9121, "step": 12770 }, { "epoch": 0.15569205269764663, "grad_norm": 2.0130701065063477, "learning_rate": 4.443810134701732e-06, "loss": 0.7885, "step": 12775 }, { "epoch": 0.15575298892179446, "grad_norm": 2.0213372707366943, "learning_rate": 4.443489416292495e-06, "loss": 0.9375, "step": 12780 }, { "epoch": 0.15581392514594225, "grad_norm": 2.1339821815490723, "learning_rate": 4.443168697883259e-06, "loss": 0.9304, "step": 12785 }, { "epoch": 0.15587486137009007, "grad_norm": 1.6372473239898682, "learning_rate": 4.442847979474022e-06, "loss": 0.8694, "step": 12790 }, { "epoch": 0.15593579759423787, "grad_norm": 2.099600076675415, "learning_rate": 4.442527261064786e-06, "loss": 0.8454, "step": 12795 }, { "epoch": 0.1559967338183857, "grad_norm": 2.034630060195923, "learning_rate": 4.442206542655549e-06, "loss": 0.8961, "step": 12800 }, { "epoch": 0.1560576700425335, "grad_norm": 2.1831488609313965, "learning_rate": 4.441885824246312e-06, "loss": 0.9079, "step": 12805 }, { "epoch": 0.15611860626668128, "grad_norm": 1.9416941404342651, "learning_rate": 4.441565105837076e-06, "loss": 0.9127, "step": 12810 }, { "epoch": 0.1561795424908291, "grad_norm": 2.256800413131714, "learning_rate": 4.441244387427839e-06, "loss": 0.8993, "step": 12815 }, { "epoch": 0.1562404787149769, "grad_norm": 1.9084542989730835, "learning_rate": 4.440923669018602e-06, "loss": 0.8847, "step": 12820 }, { "epoch": 0.15630141493912472, "grad_norm": 2.137319803237915, "learning_rate": 4.440602950609365e-06, "loss": 0.8917, "step": 12825 }, { "epoch": 0.15636235116327252, "grad_norm": 2.240356922149658, "learning_rate": 4.440282232200129e-06, "loss": 0.9176, "step": 12830 }, { "epoch": 0.1564232873874203, "grad_norm": 1.9685420989990234, "learning_rate": 4.439961513790892e-06, "loss": 0.8421, "step": 12835 }, { "epoch": 0.15648422361156814, "grad_norm": 2.0753729343414307, "learning_rate": 4.439640795381655e-06, "loss": 0.809, "step": 12840 }, { "epoch": 0.15654515983571593, "grad_norm": 2.2564857006073, "learning_rate": 4.439320076972419e-06, "loss": 0.8139, "step": 12845 }, { "epoch": 0.15660609605986375, "grad_norm": 1.8933922052383423, "learning_rate": 4.438999358563182e-06, "loss": 0.9449, "step": 12850 }, { "epoch": 0.15666703228401155, "grad_norm": 1.700669527053833, "learning_rate": 4.438678640153945e-06, "loss": 0.9079, "step": 12855 }, { "epoch": 0.15672796850815937, "grad_norm": 2.1038668155670166, "learning_rate": 4.438357921744709e-06, "loss": 0.8687, "step": 12860 }, { "epoch": 0.15678890473230717, "grad_norm": 2.1910035610198975, "learning_rate": 4.438037203335472e-06, "loss": 0.8804, "step": 12865 }, { "epoch": 0.15684984095645496, "grad_norm": 2.307429075241089, "learning_rate": 4.437716484926235e-06, "loss": 0.9089, "step": 12870 }, { "epoch": 0.15691077718060278, "grad_norm": 2.1534366607666016, "learning_rate": 4.437395766516999e-06, "loss": 0.926, "step": 12875 }, { "epoch": 0.15697171340475058, "grad_norm": 1.8163716793060303, "learning_rate": 4.437075048107762e-06, "loss": 0.8731, "step": 12880 }, { "epoch": 0.1570326496288984, "grad_norm": 1.7530730962753296, "learning_rate": 4.436754329698525e-06, "loss": 0.8327, "step": 12885 }, { "epoch": 0.1570935858530462, "grad_norm": 1.9226082563400269, "learning_rate": 4.4364336112892885e-06, "loss": 0.9091, "step": 12890 }, { "epoch": 0.15715452207719402, "grad_norm": 2.1885018348693848, "learning_rate": 4.4361128928800515e-06, "loss": 0.9149, "step": 12895 }, { "epoch": 0.15721545830134181, "grad_norm": 2.0147688388824463, "learning_rate": 4.4357921744708145e-06, "loss": 0.9345, "step": 12900 }, { "epoch": 0.1572763945254896, "grad_norm": 2.0803070068359375, "learning_rate": 4.435471456061578e-06, "loss": 0.9187, "step": 12905 }, { "epoch": 0.15733733074963743, "grad_norm": 1.8037314414978027, "learning_rate": 4.435150737652341e-06, "loss": 0.8992, "step": 12910 }, { "epoch": 0.15739826697378523, "grad_norm": 1.9594197273254395, "learning_rate": 4.434830019243105e-06, "loss": 0.9164, "step": 12915 }, { "epoch": 0.15745920319793305, "grad_norm": 1.7699472904205322, "learning_rate": 4.434509300833868e-06, "loss": 0.8302, "step": 12920 }, { "epoch": 0.15752013942208085, "grad_norm": 1.7174904346466064, "learning_rate": 4.434188582424631e-06, "loss": 0.9199, "step": 12925 }, { "epoch": 0.15758107564622867, "grad_norm": 1.864172101020813, "learning_rate": 4.433867864015395e-06, "loss": 0.9094, "step": 12930 }, { "epoch": 0.15764201187037646, "grad_norm": 1.9510594606399536, "learning_rate": 4.433547145606158e-06, "loss": 0.9578, "step": 12935 }, { "epoch": 0.15770294809452426, "grad_norm": 1.856583833694458, "learning_rate": 4.433226427196922e-06, "loss": 0.8868, "step": 12940 }, { "epoch": 0.15776388431867208, "grad_norm": 2.035757303237915, "learning_rate": 4.432905708787685e-06, "loss": 0.8259, "step": 12945 }, { "epoch": 0.15782482054281988, "grad_norm": 2.241283893585205, "learning_rate": 4.432584990378448e-06, "loss": 0.8199, "step": 12950 }, { "epoch": 0.1578857567669677, "grad_norm": 1.8876789808273315, "learning_rate": 4.432264271969212e-06, "loss": 0.8462, "step": 12955 }, { "epoch": 0.1579466929911155, "grad_norm": 2.059086322784424, "learning_rate": 4.431943553559975e-06, "loss": 0.9386, "step": 12960 }, { "epoch": 0.15800762921526332, "grad_norm": 2.307614803314209, "learning_rate": 4.431622835150738e-06, "loss": 0.9468, "step": 12965 }, { "epoch": 0.1580685654394111, "grad_norm": 2.1827259063720703, "learning_rate": 4.431302116741502e-06, "loss": 0.9072, "step": 12970 }, { "epoch": 0.1581295016635589, "grad_norm": 2.1083850860595703, "learning_rate": 4.430981398332265e-06, "loss": 0.9423, "step": 12975 }, { "epoch": 0.15819043788770673, "grad_norm": 2.4282426834106445, "learning_rate": 4.430660679923028e-06, "loss": 0.9212, "step": 12980 }, { "epoch": 0.15825137411185453, "grad_norm": 2.0343265533447266, "learning_rate": 4.430339961513791e-06, "loss": 0.8213, "step": 12985 }, { "epoch": 0.15831231033600235, "grad_norm": 1.8809149265289307, "learning_rate": 4.430019243104555e-06, "loss": 0.9218, "step": 12990 }, { "epoch": 0.15837324656015014, "grad_norm": 1.9270726442337036, "learning_rate": 4.429698524695318e-06, "loss": 0.9128, "step": 12995 }, { "epoch": 0.15843418278429797, "grad_norm": 2.0082292556762695, "learning_rate": 4.429377806286081e-06, "loss": 0.8902, "step": 13000 }, { "epoch": 0.15849511900844576, "grad_norm": 1.966151237487793, "learning_rate": 4.429057087876845e-06, "loss": 0.8953, "step": 13005 }, { "epoch": 0.15855605523259356, "grad_norm": 1.7058742046356201, "learning_rate": 4.428736369467608e-06, "loss": 0.8591, "step": 13010 }, { "epoch": 0.15861699145674138, "grad_norm": 1.9127817153930664, "learning_rate": 4.428415651058371e-06, "loss": 0.9862, "step": 13015 }, { "epoch": 0.15867792768088917, "grad_norm": 2.022988796234131, "learning_rate": 4.4280949326491345e-06, "loss": 0.8704, "step": 13020 }, { "epoch": 0.158738863905037, "grad_norm": 1.7228671312332153, "learning_rate": 4.4277742142398976e-06, "loss": 0.8579, "step": 13025 }, { "epoch": 0.1587998001291848, "grad_norm": 2.658874034881592, "learning_rate": 4.427453495830661e-06, "loss": 0.9112, "step": 13030 }, { "epoch": 0.15886073635333262, "grad_norm": 2.0783002376556396, "learning_rate": 4.4271327774214244e-06, "loss": 0.8671, "step": 13035 }, { "epoch": 0.1589216725774804, "grad_norm": 1.767765760421753, "learning_rate": 4.4268120590121875e-06, "loss": 0.9539, "step": 13040 }, { "epoch": 0.1589826088016282, "grad_norm": 2.000962018966675, "learning_rate": 4.4264913406029505e-06, "loss": 0.9115, "step": 13045 }, { "epoch": 0.15904354502577603, "grad_norm": 1.7512550354003906, "learning_rate": 4.426170622193714e-06, "loss": 0.9131, "step": 13050 }, { "epoch": 0.15910448124992382, "grad_norm": 2.102979898452759, "learning_rate": 4.425849903784477e-06, "loss": 0.9407, "step": 13055 }, { "epoch": 0.15916541747407165, "grad_norm": 2.304144859313965, "learning_rate": 4.42552918537524e-06, "loss": 0.9032, "step": 13060 }, { "epoch": 0.15922635369821944, "grad_norm": 2.3202016353607178, "learning_rate": 4.425208466966004e-06, "loss": 0.872, "step": 13065 }, { "epoch": 0.15928728992236724, "grad_norm": 2.5906875133514404, "learning_rate": 4.424887748556767e-06, "loss": 0.8966, "step": 13070 }, { "epoch": 0.15934822614651506, "grad_norm": 2.031069040298462, "learning_rate": 4.424567030147531e-06, "loss": 0.9647, "step": 13075 }, { "epoch": 0.15940916237066285, "grad_norm": 2.240983486175537, "learning_rate": 4.424246311738294e-06, "loss": 0.8751, "step": 13080 }, { "epoch": 0.15947009859481068, "grad_norm": 1.9414457082748413, "learning_rate": 4.423925593329058e-06, "loss": 0.9612, "step": 13085 }, { "epoch": 0.15953103481895847, "grad_norm": 1.790801763534546, "learning_rate": 4.423604874919821e-06, "loss": 0.906, "step": 13090 }, { "epoch": 0.1595919710431063, "grad_norm": 2.417318344116211, "learning_rate": 4.423284156510584e-06, "loss": 0.9279, "step": 13095 }, { "epoch": 0.1596529072672541, "grad_norm": 2.0838308334350586, "learning_rate": 4.422963438101348e-06, "loss": 0.8348, "step": 13100 }, { "epoch": 0.15971384349140189, "grad_norm": 1.998503565788269, "learning_rate": 4.422642719692111e-06, "loss": 0.9002, "step": 13105 }, { "epoch": 0.1597747797155497, "grad_norm": 1.8682724237442017, "learning_rate": 4.422322001282874e-06, "loss": 0.8665, "step": 13110 }, { "epoch": 0.1598357159396975, "grad_norm": 1.8437068462371826, "learning_rate": 4.422001282873638e-06, "loss": 0.9612, "step": 13115 }, { "epoch": 0.15989665216384533, "grad_norm": 1.7834833860397339, "learning_rate": 4.421680564464401e-06, "loss": 0.8665, "step": 13120 }, { "epoch": 0.15995758838799312, "grad_norm": 1.8584041595458984, "learning_rate": 4.421359846055164e-06, "loss": 0.8896, "step": 13125 }, { "epoch": 0.16001852461214094, "grad_norm": 2.0444424152374268, "learning_rate": 4.421039127645928e-06, "loss": 0.9819, "step": 13130 }, { "epoch": 0.16007946083628874, "grad_norm": 1.9276565313339233, "learning_rate": 4.420718409236691e-06, "loss": 0.9212, "step": 13135 }, { "epoch": 0.16014039706043653, "grad_norm": 1.682436227798462, "learning_rate": 4.420397690827454e-06, "loss": 0.9155, "step": 13140 }, { "epoch": 0.16020133328458436, "grad_norm": 1.8971284627914429, "learning_rate": 4.420076972418218e-06, "loss": 0.8991, "step": 13145 }, { "epoch": 0.16026226950873215, "grad_norm": 2.0235540866851807, "learning_rate": 4.419756254008981e-06, "loss": 0.9132, "step": 13150 }, { "epoch": 0.16032320573287998, "grad_norm": 1.9855772256851196, "learning_rate": 4.419435535599744e-06, "loss": 0.9157, "step": 13155 }, { "epoch": 0.16038414195702777, "grad_norm": 1.5418795347213745, "learning_rate": 4.419114817190507e-06, "loss": 0.8684, "step": 13160 }, { "epoch": 0.1604450781811756, "grad_norm": 1.799275279045105, "learning_rate": 4.4187940987812705e-06, "loss": 0.9176, "step": 13165 }, { "epoch": 0.1605060144053234, "grad_norm": 2.0682311058044434, "learning_rate": 4.4184733803720335e-06, "loss": 0.8511, "step": 13170 }, { "epoch": 0.16056695062947118, "grad_norm": 2.03334641456604, "learning_rate": 4.4181526619627965e-06, "loss": 0.8587, "step": 13175 }, { "epoch": 0.160627886853619, "grad_norm": 1.8812817335128784, "learning_rate": 4.41783194355356e-06, "loss": 0.8691, "step": 13180 }, { "epoch": 0.1606888230777668, "grad_norm": 2.159510374069214, "learning_rate": 4.417511225144323e-06, "loss": 0.9459, "step": 13185 }, { "epoch": 0.16074975930191462, "grad_norm": 2.3079757690429688, "learning_rate": 4.4171905067350864e-06, "loss": 0.8906, "step": 13190 }, { "epoch": 0.16081069552606242, "grad_norm": 1.9930741786956787, "learning_rate": 4.41686978832585e-06, "loss": 0.929, "step": 13195 }, { "epoch": 0.16087163175021024, "grad_norm": 1.9285069704055786, "learning_rate": 4.416549069916613e-06, "loss": 0.8934, "step": 13200 }, { "epoch": 0.16093256797435804, "grad_norm": 1.7189713716506958, "learning_rate": 4.416228351507376e-06, "loss": 0.866, "step": 13205 }, { "epoch": 0.16099350419850583, "grad_norm": 2.021683931350708, "learning_rate": 4.41590763309814e-06, "loss": 0.8686, "step": 13210 }, { "epoch": 0.16105444042265366, "grad_norm": 2.02420973777771, "learning_rate": 4.415586914688903e-06, "loss": 0.9091, "step": 13215 }, { "epoch": 0.16111537664680145, "grad_norm": 2.0736563205718994, "learning_rate": 4.415266196279667e-06, "loss": 0.8902, "step": 13220 }, { "epoch": 0.16117631287094927, "grad_norm": 1.928788423538208, "learning_rate": 4.41494547787043e-06, "loss": 0.9625, "step": 13225 }, { "epoch": 0.16123724909509707, "grad_norm": 1.7242825031280518, "learning_rate": 4.414624759461193e-06, "loss": 0.887, "step": 13230 }, { "epoch": 0.1612981853192449, "grad_norm": 1.8515598773956299, "learning_rate": 4.414304041051957e-06, "loss": 0.8944, "step": 13235 }, { "epoch": 0.1613591215433927, "grad_norm": 1.9511915445327759, "learning_rate": 4.41398332264272e-06, "loss": 0.9021, "step": 13240 }, { "epoch": 0.16142005776754048, "grad_norm": 1.7898468971252441, "learning_rate": 4.413662604233484e-06, "loss": 0.9541, "step": 13245 }, { "epoch": 0.1614809939916883, "grad_norm": 1.976599097251892, "learning_rate": 4.413341885824247e-06, "loss": 0.8519, "step": 13250 }, { "epoch": 0.1615419302158361, "grad_norm": 2.026022434234619, "learning_rate": 4.41302116741501e-06, "loss": 0.8583, "step": 13255 }, { "epoch": 0.16160286643998392, "grad_norm": 2.0960822105407715, "learning_rate": 4.412700449005774e-06, "loss": 0.9521, "step": 13260 }, { "epoch": 0.16166380266413172, "grad_norm": 1.904219388961792, "learning_rate": 4.412379730596537e-06, "loss": 0.8779, "step": 13265 }, { "epoch": 0.16172473888827954, "grad_norm": 2.241535186767578, "learning_rate": 4.4120590121873e-06, "loss": 0.9349, "step": 13270 }, { "epoch": 0.16178567511242734, "grad_norm": 2.0373713970184326, "learning_rate": 4.411738293778064e-06, "loss": 0.9545, "step": 13275 }, { "epoch": 0.16184661133657513, "grad_norm": 1.714829921722412, "learning_rate": 4.411417575368827e-06, "loss": 0.929, "step": 13280 }, { "epoch": 0.16190754756072295, "grad_norm": 2.0770649909973145, "learning_rate": 4.41109685695959e-06, "loss": 0.8893, "step": 13285 }, { "epoch": 0.16196848378487075, "grad_norm": 2.0145251750946045, "learning_rate": 4.4107761385503535e-06, "loss": 0.8876, "step": 13290 }, { "epoch": 0.16202942000901857, "grad_norm": 2.144216775894165, "learning_rate": 4.4104554201411166e-06, "loss": 0.9372, "step": 13295 }, { "epoch": 0.16209035623316637, "grad_norm": 1.8881940841674805, "learning_rate": 4.41013470173188e-06, "loss": 0.906, "step": 13300 }, { "epoch": 0.16215129245731416, "grad_norm": 1.9303996562957764, "learning_rate": 4.4098139833226434e-06, "loss": 0.9093, "step": 13305 }, { "epoch": 0.16221222868146198, "grad_norm": 2.0661404132843018, "learning_rate": 4.4094932649134065e-06, "loss": 0.895, "step": 13310 }, { "epoch": 0.16227316490560978, "grad_norm": 2.022517442703247, "learning_rate": 4.4091725465041695e-06, "loss": 0.9807, "step": 13315 }, { "epoch": 0.1623341011297576, "grad_norm": 1.9141279458999634, "learning_rate": 4.408851828094933e-06, "loss": 0.9399, "step": 13320 }, { "epoch": 0.1623950373539054, "grad_norm": 1.7919920682907104, "learning_rate": 4.408531109685696e-06, "loss": 0.8789, "step": 13325 }, { "epoch": 0.16245597357805322, "grad_norm": 3.429175615310669, "learning_rate": 4.408210391276459e-06, "loss": 0.9083, "step": 13330 }, { "epoch": 0.16251690980220102, "grad_norm": 1.7834835052490234, "learning_rate": 4.407889672867222e-06, "loss": 0.8456, "step": 13335 }, { "epoch": 0.1625778460263488, "grad_norm": 1.9035332202911377, "learning_rate": 4.407568954457986e-06, "loss": 0.9702, "step": 13340 }, { "epoch": 0.16263878225049663, "grad_norm": 1.9015053510665894, "learning_rate": 4.407248236048749e-06, "loss": 0.914, "step": 13345 }, { "epoch": 0.16269971847464443, "grad_norm": 1.785742163658142, "learning_rate": 4.406927517639512e-06, "loss": 0.8338, "step": 13350 }, { "epoch": 0.16276065469879225, "grad_norm": 1.9091507196426392, "learning_rate": 4.406606799230276e-06, "loss": 0.8696, "step": 13355 }, { "epoch": 0.16282159092294005, "grad_norm": 1.9423577785491943, "learning_rate": 4.406286080821039e-06, "loss": 0.9424, "step": 13360 }, { "epoch": 0.16288252714708787, "grad_norm": 2.6864209175109863, "learning_rate": 4.405965362411803e-06, "loss": 0.9372, "step": 13365 }, { "epoch": 0.16294346337123566, "grad_norm": 2.184478521347046, "learning_rate": 4.405644644002566e-06, "loss": 0.9167, "step": 13370 }, { "epoch": 0.16300439959538346, "grad_norm": 1.8898921012878418, "learning_rate": 4.405323925593329e-06, "loss": 0.9647, "step": 13375 }, { "epoch": 0.16306533581953128, "grad_norm": 2.123593330383301, "learning_rate": 4.405003207184093e-06, "loss": 0.9074, "step": 13380 }, { "epoch": 0.16312627204367908, "grad_norm": 2.206869125366211, "learning_rate": 4.404682488774856e-06, "loss": 0.927, "step": 13385 }, { "epoch": 0.1631872082678269, "grad_norm": 2.0075881481170654, "learning_rate": 4.40436177036562e-06, "loss": 0.8294, "step": 13390 }, { "epoch": 0.1632481444919747, "grad_norm": 2.3291444778442383, "learning_rate": 4.404041051956383e-06, "loss": 0.8801, "step": 13395 }, { "epoch": 0.16330908071612252, "grad_norm": 2.010768413543701, "learning_rate": 4.403720333547146e-06, "loss": 0.8701, "step": 13400 }, { "epoch": 0.1633700169402703, "grad_norm": 1.7338312864303589, "learning_rate": 4.40339961513791e-06, "loss": 0.8744, "step": 13405 }, { "epoch": 0.1634309531644181, "grad_norm": 1.6052207946777344, "learning_rate": 4.403078896728673e-06, "loss": 0.8507, "step": 13410 }, { "epoch": 0.16349188938856593, "grad_norm": 2.2538256645202637, "learning_rate": 4.402758178319436e-06, "loss": 0.8926, "step": 13415 }, { "epoch": 0.16355282561271373, "grad_norm": 2.5240511894226074, "learning_rate": 4.4024374599102e-06, "loss": 0.8695, "step": 13420 }, { "epoch": 0.16361376183686155, "grad_norm": 2.2908034324645996, "learning_rate": 4.402116741500963e-06, "loss": 0.8889, "step": 13425 }, { "epoch": 0.16367469806100934, "grad_norm": 1.9314961433410645, "learning_rate": 4.401796023091726e-06, "loss": 0.9229, "step": 13430 }, { "epoch": 0.16373563428515717, "grad_norm": 1.7060168981552124, "learning_rate": 4.4014753046824895e-06, "loss": 0.8657, "step": 13435 }, { "epoch": 0.16379657050930496, "grad_norm": 2.1055657863616943, "learning_rate": 4.4011545862732525e-06, "loss": 0.9005, "step": 13440 }, { "epoch": 0.16385750673345276, "grad_norm": 2.1354596614837646, "learning_rate": 4.4008338678640155e-06, "loss": 0.9211, "step": 13445 }, { "epoch": 0.16391844295760058, "grad_norm": 1.812900185585022, "learning_rate": 4.400513149454779e-06, "loss": 0.8939, "step": 13450 }, { "epoch": 0.16397937918174837, "grad_norm": 1.9457343816757202, "learning_rate": 4.400192431045542e-06, "loss": 0.9108, "step": 13455 }, { "epoch": 0.1640403154058962, "grad_norm": 1.9246920347213745, "learning_rate": 4.3998717126363054e-06, "loss": 0.844, "step": 13460 }, { "epoch": 0.164101251630044, "grad_norm": 2.5951154232025146, "learning_rate": 4.399550994227069e-06, "loss": 0.866, "step": 13465 }, { "epoch": 0.16416218785419182, "grad_norm": 2.125617742538452, "learning_rate": 4.399230275817832e-06, "loss": 0.904, "step": 13470 }, { "epoch": 0.1642231240783396, "grad_norm": 1.8440529108047485, "learning_rate": 4.398909557408595e-06, "loss": 0.9962, "step": 13475 }, { "epoch": 0.1642840603024874, "grad_norm": 1.8983289003372192, "learning_rate": 4.398588838999359e-06, "loss": 0.8489, "step": 13480 }, { "epoch": 0.16434499652663523, "grad_norm": 2.0166399478912354, "learning_rate": 4.398268120590122e-06, "loss": 0.9154, "step": 13485 }, { "epoch": 0.16440593275078302, "grad_norm": 2.059122323989868, "learning_rate": 4.397947402180885e-06, "loss": 0.7873, "step": 13490 }, { "epoch": 0.16446686897493085, "grad_norm": 2.1002180576324463, "learning_rate": 4.397626683771648e-06, "loss": 0.8945, "step": 13495 }, { "epoch": 0.16452780519907864, "grad_norm": 2.655927896499634, "learning_rate": 4.397305965362412e-06, "loss": 0.8278, "step": 13500 }, { "epoch": 0.16458874142322646, "grad_norm": 1.8800172805786133, "learning_rate": 4.396985246953175e-06, "loss": 0.8239, "step": 13505 }, { "epoch": 0.16464967764737426, "grad_norm": 1.8701846599578857, "learning_rate": 4.396664528543938e-06, "loss": 0.8027, "step": 13510 }, { "epoch": 0.16471061387152205, "grad_norm": 2.0931129455566406, "learning_rate": 4.396343810134702e-06, "loss": 0.883, "step": 13515 }, { "epoch": 0.16477155009566988, "grad_norm": 2.0903844833374023, "learning_rate": 4.396023091725465e-06, "loss": 0.92, "step": 13520 }, { "epoch": 0.16483248631981767, "grad_norm": 1.8035588264465332, "learning_rate": 4.395702373316229e-06, "loss": 0.8912, "step": 13525 }, { "epoch": 0.1648934225439655, "grad_norm": 1.8905624151229858, "learning_rate": 4.395381654906992e-06, "loss": 0.9155, "step": 13530 }, { "epoch": 0.1649543587681133, "grad_norm": 1.7421131134033203, "learning_rate": 4.395060936497755e-06, "loss": 0.8521, "step": 13535 }, { "epoch": 0.16501529499226109, "grad_norm": 1.6764497756958008, "learning_rate": 4.394740218088519e-06, "loss": 0.8744, "step": 13540 }, { "epoch": 0.1650762312164089, "grad_norm": 1.8493760824203491, "learning_rate": 4.394419499679282e-06, "loss": 0.8949, "step": 13545 }, { "epoch": 0.1651371674405567, "grad_norm": 1.7046122550964355, "learning_rate": 4.394098781270046e-06, "loss": 0.9557, "step": 13550 }, { "epoch": 0.16519810366470453, "grad_norm": 2.1050803661346436, "learning_rate": 4.393778062860809e-06, "loss": 0.9015, "step": 13555 }, { "epoch": 0.16525903988885232, "grad_norm": 1.7475613355636597, "learning_rate": 4.3934573444515725e-06, "loss": 0.8963, "step": 13560 }, { "epoch": 0.16531997611300014, "grad_norm": 1.8222054243087769, "learning_rate": 4.3931366260423356e-06, "loss": 0.9029, "step": 13565 }, { "epoch": 0.16538091233714794, "grad_norm": 2.1560332775115967, "learning_rate": 4.3928159076330986e-06, "loss": 0.8868, "step": 13570 }, { "epoch": 0.16544184856129573, "grad_norm": 2.1763339042663574, "learning_rate": 4.392495189223862e-06, "loss": 0.9275, "step": 13575 }, { "epoch": 0.16550278478544356, "grad_norm": 1.834903597831726, "learning_rate": 4.3921744708146255e-06, "loss": 0.9182, "step": 13580 }, { "epoch": 0.16556372100959135, "grad_norm": 2.7620849609375, "learning_rate": 4.3918537524053885e-06, "loss": 0.9412, "step": 13585 }, { "epoch": 0.16562465723373918, "grad_norm": 1.9698103666305542, "learning_rate": 4.3915330339961515e-06, "loss": 0.9469, "step": 13590 }, { "epoch": 0.16568559345788697, "grad_norm": 2.0422115325927734, "learning_rate": 4.391212315586915e-06, "loss": 0.8836, "step": 13595 }, { "epoch": 0.1657465296820348, "grad_norm": 2.09302020072937, "learning_rate": 4.390891597177678e-06, "loss": 0.9391, "step": 13600 }, { "epoch": 0.1658074659061826, "grad_norm": 2.0693461894989014, "learning_rate": 4.390570878768441e-06, "loss": 0.9171, "step": 13605 }, { "epoch": 0.16586840213033038, "grad_norm": 2.1247446537017822, "learning_rate": 4.390250160359205e-06, "loss": 0.9843, "step": 13610 }, { "epoch": 0.1659293383544782, "grad_norm": 2.2569751739501953, "learning_rate": 4.389929441949968e-06, "loss": 0.858, "step": 13615 }, { "epoch": 0.165990274578626, "grad_norm": 1.7330979108810425, "learning_rate": 4.389608723540731e-06, "loss": 0.8732, "step": 13620 }, { "epoch": 0.16605121080277382, "grad_norm": 2.0868961811065674, "learning_rate": 4.389288005131495e-06, "loss": 0.9003, "step": 13625 }, { "epoch": 0.16611214702692162, "grad_norm": 2.0589914321899414, "learning_rate": 4.388967286722258e-06, "loss": 0.9102, "step": 13630 }, { "epoch": 0.16617308325106944, "grad_norm": 2.0518689155578613, "learning_rate": 4.388646568313021e-06, "loss": 0.8964, "step": 13635 }, { "epoch": 0.16623401947521724, "grad_norm": 1.6561452150344849, "learning_rate": 4.388325849903785e-06, "loss": 0.8522, "step": 13640 }, { "epoch": 0.16629495569936503, "grad_norm": 2.247579574584961, "learning_rate": 4.388005131494548e-06, "loss": 0.8742, "step": 13645 }, { "epoch": 0.16635589192351286, "grad_norm": 2.085531711578369, "learning_rate": 4.387684413085311e-06, "loss": 0.9245, "step": 13650 }, { "epoch": 0.16641682814766065, "grad_norm": 2.140556812286377, "learning_rate": 4.387363694676075e-06, "loss": 0.8756, "step": 13655 }, { "epoch": 0.16647776437180847, "grad_norm": 1.908469796180725, "learning_rate": 4.387042976266838e-06, "loss": 0.7874, "step": 13660 }, { "epoch": 0.16653870059595627, "grad_norm": 2.4014763832092285, "learning_rate": 4.386722257857601e-06, "loss": 0.9007, "step": 13665 }, { "epoch": 0.1665996368201041, "grad_norm": 2.014223098754883, "learning_rate": 4.386401539448365e-06, "loss": 0.8932, "step": 13670 }, { "epoch": 0.1666605730442519, "grad_norm": 1.8537009954452515, "learning_rate": 4.386080821039128e-06, "loss": 0.891, "step": 13675 }, { "epoch": 0.16672150926839968, "grad_norm": 1.8815046548843384, "learning_rate": 4.385760102629891e-06, "loss": 0.8383, "step": 13680 }, { "epoch": 0.1667824454925475, "grad_norm": 1.674475908279419, "learning_rate": 4.385439384220655e-06, "loss": 0.8224, "step": 13685 }, { "epoch": 0.1668433817166953, "grad_norm": 2.45102596282959, "learning_rate": 4.385118665811418e-06, "loss": 0.9267, "step": 13690 }, { "epoch": 0.16690431794084312, "grad_norm": 1.7536816596984863, "learning_rate": 4.384797947402182e-06, "loss": 0.8304, "step": 13695 }, { "epoch": 0.16696525416499092, "grad_norm": 1.7852697372436523, "learning_rate": 4.384477228992945e-06, "loss": 0.9104, "step": 13700 }, { "epoch": 0.16702619038913874, "grad_norm": 1.758768081665039, "learning_rate": 4.384156510583708e-06, "loss": 0.8946, "step": 13705 }, { "epoch": 0.16708712661328654, "grad_norm": 1.961850881576538, "learning_rate": 4.3838357921744715e-06, "loss": 0.8537, "step": 13710 }, { "epoch": 0.16714806283743433, "grad_norm": 1.7959235906600952, "learning_rate": 4.3835150737652345e-06, "loss": 0.8851, "step": 13715 }, { "epoch": 0.16720899906158215, "grad_norm": 1.8779242038726807, "learning_rate": 4.383194355355998e-06, "loss": 0.9085, "step": 13720 }, { "epoch": 0.16726993528572995, "grad_norm": 1.9131838083267212, "learning_rate": 4.382873636946761e-06, "loss": 0.9132, "step": 13725 }, { "epoch": 0.16733087150987777, "grad_norm": 2.288836717605591, "learning_rate": 4.3825529185375244e-06, "loss": 0.8807, "step": 13730 }, { "epoch": 0.16739180773402557, "grad_norm": 2.0115890502929688, "learning_rate": 4.382232200128288e-06, "loss": 0.8674, "step": 13735 }, { "epoch": 0.1674527439581734, "grad_norm": 1.8511797189712524, "learning_rate": 4.381911481719051e-06, "loss": 0.9026, "step": 13740 }, { "epoch": 0.16751368018232118, "grad_norm": 1.7606121301651, "learning_rate": 4.381590763309814e-06, "loss": 0.9116, "step": 13745 }, { "epoch": 0.16757461640646898, "grad_norm": 1.7887173891067505, "learning_rate": 4.381270044900577e-06, "loss": 0.9337, "step": 13750 }, { "epoch": 0.1676355526306168, "grad_norm": 1.9300994873046875, "learning_rate": 4.380949326491341e-06, "loss": 0.8938, "step": 13755 }, { "epoch": 0.1676964888547646, "grad_norm": 2.3521246910095215, "learning_rate": 4.380628608082104e-06, "loss": 0.9374, "step": 13760 }, { "epoch": 0.16775742507891242, "grad_norm": 1.659225344657898, "learning_rate": 4.380307889672867e-06, "loss": 0.92, "step": 13765 }, { "epoch": 0.16781836130306022, "grad_norm": 1.7703808546066284, "learning_rate": 4.379987171263631e-06, "loss": 0.97, "step": 13770 }, { "epoch": 0.16787929752720804, "grad_norm": 1.8793652057647705, "learning_rate": 4.379666452854394e-06, "loss": 0.8975, "step": 13775 }, { "epoch": 0.16794023375135583, "grad_norm": 2.0467541217803955, "learning_rate": 4.379345734445157e-06, "loss": 0.9276, "step": 13780 }, { "epoch": 0.16800116997550363, "grad_norm": 2.239529609680176, "learning_rate": 4.379025016035921e-06, "loss": 0.8591, "step": 13785 }, { "epoch": 0.16806210619965145, "grad_norm": 1.8623530864715576, "learning_rate": 4.378704297626684e-06, "loss": 0.8538, "step": 13790 }, { "epoch": 0.16812304242379925, "grad_norm": 2.3836100101470947, "learning_rate": 4.378383579217447e-06, "loss": 0.8818, "step": 13795 }, { "epoch": 0.16818397864794707, "grad_norm": 2.1194393634796143, "learning_rate": 4.378062860808211e-06, "loss": 0.8516, "step": 13800 }, { "epoch": 0.16824491487209486, "grad_norm": 2.2612662315368652, "learning_rate": 4.377742142398974e-06, "loss": 0.9307, "step": 13805 }, { "epoch": 0.16830585109624266, "grad_norm": 2.199190378189087, "learning_rate": 4.377421423989737e-06, "loss": 0.8792, "step": 13810 }, { "epoch": 0.16836678732039048, "grad_norm": 2.1864311695098877, "learning_rate": 4.377100705580501e-06, "loss": 0.8534, "step": 13815 }, { "epoch": 0.16842772354453828, "grad_norm": 1.7841498851776123, "learning_rate": 4.376779987171264e-06, "loss": 0.943, "step": 13820 }, { "epoch": 0.1684886597686861, "grad_norm": 2.1226789951324463, "learning_rate": 4.376459268762027e-06, "loss": 0.9076, "step": 13825 }, { "epoch": 0.1685495959928339, "grad_norm": 2.030447006225586, "learning_rate": 4.376138550352791e-06, "loss": 0.9164, "step": 13830 }, { "epoch": 0.16861053221698172, "grad_norm": 1.9720549583435059, "learning_rate": 4.375817831943554e-06, "loss": 0.9365, "step": 13835 }, { "epoch": 0.1686714684411295, "grad_norm": 1.8179594278335571, "learning_rate": 4.3754971135343176e-06, "loss": 0.874, "step": 13840 }, { "epoch": 0.1687324046652773, "grad_norm": 1.8853389024734497, "learning_rate": 4.375176395125081e-06, "loss": 0.9007, "step": 13845 }, { "epoch": 0.16879334088942513, "grad_norm": 1.7739604711532593, "learning_rate": 4.374855676715844e-06, "loss": 0.8614, "step": 13850 }, { "epoch": 0.16885427711357293, "grad_norm": 2.0115585327148438, "learning_rate": 4.3745349583066075e-06, "loss": 0.9302, "step": 13855 }, { "epoch": 0.16891521333772075, "grad_norm": 2.201986789703369, "learning_rate": 4.3742142398973705e-06, "loss": 0.8188, "step": 13860 }, { "epoch": 0.16897614956186854, "grad_norm": 1.8673653602600098, "learning_rate": 4.373893521488134e-06, "loss": 0.8892, "step": 13865 }, { "epoch": 0.16903708578601637, "grad_norm": 1.7682924270629883, "learning_rate": 4.373572803078897e-06, "loss": 0.9162, "step": 13870 }, { "epoch": 0.16909802201016416, "grad_norm": 1.7587586641311646, "learning_rate": 4.37325208466966e-06, "loss": 0.8976, "step": 13875 }, { "epoch": 0.16915895823431196, "grad_norm": 1.8880263566970825, "learning_rate": 4.372931366260424e-06, "loss": 0.8686, "step": 13880 }, { "epoch": 0.16921989445845978, "grad_norm": 1.5794035196304321, "learning_rate": 4.372610647851187e-06, "loss": 0.8784, "step": 13885 }, { "epoch": 0.16928083068260757, "grad_norm": 2.1393511295318604, "learning_rate": 4.37228992944195e-06, "loss": 0.9122, "step": 13890 }, { "epoch": 0.1693417669067554, "grad_norm": 2.059857130050659, "learning_rate": 4.371969211032714e-06, "loss": 0.9176, "step": 13895 }, { "epoch": 0.1694027031309032, "grad_norm": 1.8739595413208008, "learning_rate": 4.371648492623477e-06, "loss": 0.8748, "step": 13900 }, { "epoch": 0.16946363935505102, "grad_norm": 1.883555293083191, "learning_rate": 4.37132777421424e-06, "loss": 0.8778, "step": 13905 }, { "epoch": 0.1695245755791988, "grad_norm": 1.893067479133606, "learning_rate": 4.371007055805004e-06, "loss": 0.8955, "step": 13910 }, { "epoch": 0.1695855118033466, "grad_norm": 1.7649539709091187, "learning_rate": 4.370686337395767e-06, "loss": 0.8876, "step": 13915 }, { "epoch": 0.16964644802749443, "grad_norm": 1.7991148233413696, "learning_rate": 4.37036561898653e-06, "loss": 0.9253, "step": 13920 }, { "epoch": 0.16970738425164222, "grad_norm": 1.8871393203735352, "learning_rate": 4.370044900577293e-06, "loss": 0.7905, "step": 13925 }, { "epoch": 0.16976832047579005, "grad_norm": 2.1651787757873535, "learning_rate": 4.369724182168057e-06, "loss": 0.8597, "step": 13930 }, { "epoch": 0.16982925669993784, "grad_norm": 2.213690996170044, "learning_rate": 4.36940346375882e-06, "loss": 0.8853, "step": 13935 }, { "epoch": 0.16989019292408566, "grad_norm": 1.8434640169143677, "learning_rate": 4.369082745349583e-06, "loss": 0.8643, "step": 13940 }, { "epoch": 0.16995112914823346, "grad_norm": 2.285747528076172, "learning_rate": 4.368762026940347e-06, "loss": 0.9186, "step": 13945 }, { "epoch": 0.17001206537238125, "grad_norm": 2.1009161472320557, "learning_rate": 4.36844130853111e-06, "loss": 0.9227, "step": 13950 }, { "epoch": 0.17007300159652908, "grad_norm": 1.9775731563568115, "learning_rate": 4.368120590121873e-06, "loss": 0.8645, "step": 13955 }, { "epoch": 0.17013393782067687, "grad_norm": 1.8783774375915527, "learning_rate": 4.367799871712637e-06, "loss": 0.9349, "step": 13960 }, { "epoch": 0.1701948740448247, "grad_norm": 2.015343427658081, "learning_rate": 4.3674791533034e-06, "loss": 0.8666, "step": 13965 }, { "epoch": 0.1702558102689725, "grad_norm": 1.9866209030151367, "learning_rate": 4.367158434894163e-06, "loss": 0.9409, "step": 13970 }, { "epoch": 0.1703167464931203, "grad_norm": 1.8967941999435425, "learning_rate": 4.366837716484927e-06, "loss": 0.9391, "step": 13975 }, { "epoch": 0.1703776827172681, "grad_norm": 1.9651259183883667, "learning_rate": 4.36651699807569e-06, "loss": 0.8018, "step": 13980 }, { "epoch": 0.1704386189414159, "grad_norm": 1.723271369934082, "learning_rate": 4.366196279666453e-06, "loss": 0.8631, "step": 13985 }, { "epoch": 0.17049955516556373, "grad_norm": 1.7615388631820679, "learning_rate": 4.3658755612572165e-06, "loss": 0.8862, "step": 13990 }, { "epoch": 0.17056049138971152, "grad_norm": 2.178530693054199, "learning_rate": 4.3655548428479796e-06, "loss": 0.8944, "step": 13995 }, { "epoch": 0.17062142761385934, "grad_norm": 2.0655019283294678, "learning_rate": 4.3652341244387434e-06, "loss": 0.9058, "step": 14000 }, { "epoch": 0.17068236383800714, "grad_norm": 2.493760108947754, "learning_rate": 4.3649134060295064e-06, "loss": 0.8391, "step": 14005 }, { "epoch": 0.17074330006215496, "grad_norm": 1.942140817642212, "learning_rate": 4.3645926876202695e-06, "loss": 0.8128, "step": 14010 }, { "epoch": 0.17080423628630276, "grad_norm": 1.9243431091308594, "learning_rate": 4.364271969211033e-06, "loss": 0.966, "step": 14015 }, { "epoch": 0.17086517251045055, "grad_norm": 1.8527876138687134, "learning_rate": 4.363951250801796e-06, "loss": 0.8287, "step": 14020 }, { "epoch": 0.17092610873459838, "grad_norm": 2.125988483428955, "learning_rate": 4.36363053239256e-06, "loss": 0.8831, "step": 14025 }, { "epoch": 0.17098704495874617, "grad_norm": 1.799952507019043, "learning_rate": 4.363309813983323e-06, "loss": 0.8373, "step": 14030 }, { "epoch": 0.171047981182894, "grad_norm": 2.2345855236053467, "learning_rate": 4.362989095574086e-06, "loss": 0.9045, "step": 14035 }, { "epoch": 0.1711089174070418, "grad_norm": 1.9788055419921875, "learning_rate": 4.36266837716485e-06, "loss": 0.9385, "step": 14040 }, { "epoch": 0.17116985363118958, "grad_norm": 2.1131370067596436, "learning_rate": 4.362347658755613e-06, "loss": 0.8941, "step": 14045 }, { "epoch": 0.1712307898553374, "grad_norm": 2.0717856884002686, "learning_rate": 4.362026940346376e-06, "loss": 0.8675, "step": 14050 }, { "epoch": 0.1712917260794852, "grad_norm": 1.8948454856872559, "learning_rate": 4.36170622193714e-06, "loss": 0.8944, "step": 14055 }, { "epoch": 0.17135266230363302, "grad_norm": 2.0683794021606445, "learning_rate": 4.361385503527903e-06, "loss": 0.8439, "step": 14060 }, { "epoch": 0.17141359852778082, "grad_norm": 1.9333446025848389, "learning_rate": 4.361064785118666e-06, "loss": 0.8473, "step": 14065 }, { "epoch": 0.17147453475192864, "grad_norm": 1.890438199043274, "learning_rate": 4.36074406670943e-06, "loss": 0.9519, "step": 14070 }, { "epoch": 0.17153547097607644, "grad_norm": 2.0093655586242676, "learning_rate": 4.360423348300193e-06, "loss": 0.9227, "step": 14075 }, { "epoch": 0.17159640720022423, "grad_norm": 1.9343843460083008, "learning_rate": 4.360102629890956e-06, "loss": 0.9396, "step": 14080 }, { "epoch": 0.17165734342437206, "grad_norm": 1.8805954456329346, "learning_rate": 4.359781911481719e-06, "loss": 0.8951, "step": 14085 }, { "epoch": 0.17171827964851985, "grad_norm": 2.2290689945220947, "learning_rate": 4.359461193072483e-06, "loss": 0.8989, "step": 14090 }, { "epoch": 0.17177921587266767, "grad_norm": 1.868364691734314, "learning_rate": 4.359140474663246e-06, "loss": 0.9282, "step": 14095 }, { "epoch": 0.17184015209681547, "grad_norm": 2.0041255950927734, "learning_rate": 4.358819756254009e-06, "loss": 0.9234, "step": 14100 }, { "epoch": 0.1719010883209633, "grad_norm": 1.850518822669983, "learning_rate": 4.358499037844773e-06, "loss": 0.8948, "step": 14105 }, { "epoch": 0.1719620245451111, "grad_norm": 2.127552032470703, "learning_rate": 4.358178319435536e-06, "loss": 0.863, "step": 14110 }, { "epoch": 0.17202296076925888, "grad_norm": 2.0336639881134033, "learning_rate": 4.357857601026299e-06, "loss": 0.8712, "step": 14115 }, { "epoch": 0.1720838969934067, "grad_norm": 2.144634962081909, "learning_rate": 4.357536882617063e-06, "loss": 0.8761, "step": 14120 }, { "epoch": 0.1721448332175545, "grad_norm": 1.7072809934616089, "learning_rate": 4.357216164207826e-06, "loss": 0.9125, "step": 14125 }, { "epoch": 0.17220576944170232, "grad_norm": 1.7997312545776367, "learning_rate": 4.356895445798589e-06, "loss": 0.93, "step": 14130 }, { "epoch": 0.17226670566585012, "grad_norm": 2.2235047817230225, "learning_rate": 4.3565747273893525e-06, "loss": 0.9259, "step": 14135 }, { "epoch": 0.17232764188999794, "grad_norm": 2.0478546619415283, "learning_rate": 4.3562540089801155e-06, "loss": 0.8004, "step": 14140 }, { "epoch": 0.17238857811414574, "grad_norm": 1.9411187171936035, "learning_rate": 4.355933290570879e-06, "loss": 0.9393, "step": 14145 }, { "epoch": 0.17244951433829353, "grad_norm": 1.975809097290039, "learning_rate": 4.355612572161642e-06, "loss": 0.9362, "step": 14150 }, { "epoch": 0.17251045056244135, "grad_norm": 1.635909914970398, "learning_rate": 4.355291853752405e-06, "loss": 0.903, "step": 14155 }, { "epoch": 0.17257138678658915, "grad_norm": 1.8488861322402954, "learning_rate": 4.354971135343169e-06, "loss": 0.8873, "step": 14160 }, { "epoch": 0.17263232301073697, "grad_norm": 1.7252609729766846, "learning_rate": 4.354650416933932e-06, "loss": 0.951, "step": 14165 }, { "epoch": 0.17269325923488477, "grad_norm": 1.909493088722229, "learning_rate": 4.354329698524696e-06, "loss": 0.8784, "step": 14170 }, { "epoch": 0.1727541954590326, "grad_norm": 2.154372453689575, "learning_rate": 4.354008980115459e-06, "loss": 0.8823, "step": 14175 }, { "epoch": 0.17281513168318038, "grad_norm": 1.8983345031738281, "learning_rate": 4.353688261706222e-06, "loss": 0.8659, "step": 14180 }, { "epoch": 0.17287606790732818, "grad_norm": 1.7797960042953491, "learning_rate": 4.353367543296986e-06, "loss": 0.8872, "step": 14185 }, { "epoch": 0.172937004131476, "grad_norm": 1.7280627489089966, "learning_rate": 4.353046824887749e-06, "loss": 0.9166, "step": 14190 }, { "epoch": 0.1729979403556238, "grad_norm": 2.25760817527771, "learning_rate": 4.352726106478512e-06, "loss": 0.8736, "step": 14195 }, { "epoch": 0.17305887657977162, "grad_norm": 1.8200498819351196, "learning_rate": 4.352405388069276e-06, "loss": 0.8804, "step": 14200 }, { "epoch": 0.17311981280391942, "grad_norm": 2.0533158779144287, "learning_rate": 4.352084669660039e-06, "loss": 0.9552, "step": 14205 }, { "epoch": 0.17318074902806724, "grad_norm": 2.308551073074341, "learning_rate": 4.351763951250802e-06, "loss": 0.9, "step": 14210 }, { "epoch": 0.17324168525221503, "grad_norm": 1.7692643404006958, "learning_rate": 4.351443232841566e-06, "loss": 0.821, "step": 14215 }, { "epoch": 0.17330262147636283, "grad_norm": 1.975965142250061, "learning_rate": 4.351122514432329e-06, "loss": 0.8654, "step": 14220 }, { "epoch": 0.17336355770051065, "grad_norm": 1.9734578132629395, "learning_rate": 4.350801796023092e-06, "loss": 0.9623, "step": 14225 }, { "epoch": 0.17342449392465845, "grad_norm": 1.8339433670043945, "learning_rate": 4.350481077613856e-06, "loss": 0.8457, "step": 14230 }, { "epoch": 0.17348543014880627, "grad_norm": 1.9543204307556152, "learning_rate": 4.350160359204619e-06, "loss": 0.9509, "step": 14235 }, { "epoch": 0.17354636637295406, "grad_norm": 1.9018243551254272, "learning_rate": 4.349839640795382e-06, "loss": 0.9039, "step": 14240 }, { "epoch": 0.1736073025971019, "grad_norm": 2.6268515586853027, "learning_rate": 4.349518922386146e-06, "loss": 0.9059, "step": 14245 }, { "epoch": 0.17366823882124968, "grad_norm": 2.3161492347717285, "learning_rate": 4.349198203976909e-06, "loss": 0.8742, "step": 14250 }, { "epoch": 0.17372917504539748, "grad_norm": 2.0271952152252197, "learning_rate": 4.348877485567672e-06, "loss": 0.8812, "step": 14255 }, { "epoch": 0.1737901112695453, "grad_norm": 1.9213435649871826, "learning_rate": 4.348556767158435e-06, "loss": 0.8965, "step": 14260 }, { "epoch": 0.1738510474936931, "grad_norm": 1.9408481121063232, "learning_rate": 4.3482360487491986e-06, "loss": 0.8414, "step": 14265 }, { "epoch": 0.17391198371784092, "grad_norm": 2.134479284286499, "learning_rate": 4.347915330339962e-06, "loss": 0.8893, "step": 14270 }, { "epoch": 0.1739729199419887, "grad_norm": 2.007962226867676, "learning_rate": 4.347594611930725e-06, "loss": 0.9494, "step": 14275 }, { "epoch": 0.1740338561661365, "grad_norm": 1.8568110466003418, "learning_rate": 4.3472738935214885e-06, "loss": 0.913, "step": 14280 }, { "epoch": 0.17409479239028433, "grad_norm": 2.327261447906494, "learning_rate": 4.3469531751122515e-06, "loss": 0.8814, "step": 14285 }, { "epoch": 0.17415572861443213, "grad_norm": 2.152235269546509, "learning_rate": 4.346632456703015e-06, "loss": 0.8537, "step": 14290 }, { "epoch": 0.17421666483857995, "grad_norm": 2.2203314304351807, "learning_rate": 4.346311738293778e-06, "loss": 0.9028, "step": 14295 }, { "epoch": 0.17427760106272774, "grad_norm": 2.063164710998535, "learning_rate": 4.345991019884541e-06, "loss": 0.882, "step": 14300 }, { "epoch": 0.17433853728687557, "grad_norm": 1.914847493171692, "learning_rate": 4.345670301475305e-06, "loss": 0.8862, "step": 14305 }, { "epoch": 0.17439947351102336, "grad_norm": 2.187490701675415, "learning_rate": 4.345349583066068e-06, "loss": 0.8491, "step": 14310 }, { "epoch": 0.17446040973517116, "grad_norm": 2.2658133506774902, "learning_rate": 4.345028864656832e-06, "loss": 0.9772, "step": 14315 }, { "epoch": 0.17452134595931898, "grad_norm": 2.0625388622283936, "learning_rate": 4.344708146247595e-06, "loss": 0.9174, "step": 14320 }, { "epoch": 0.17458228218346677, "grad_norm": 1.8255995512008667, "learning_rate": 4.344387427838358e-06, "loss": 0.9979, "step": 14325 }, { "epoch": 0.1746432184076146, "grad_norm": 2.2061145305633545, "learning_rate": 4.344066709429122e-06, "loss": 0.8573, "step": 14330 }, { "epoch": 0.1747041546317624, "grad_norm": 1.8742938041687012, "learning_rate": 4.343745991019885e-06, "loss": 0.8915, "step": 14335 }, { "epoch": 0.17476509085591022, "grad_norm": 1.9968091249465942, "learning_rate": 4.343425272610648e-06, "loss": 0.9339, "step": 14340 }, { "epoch": 0.174826027080058, "grad_norm": 2.3348326683044434, "learning_rate": 4.343104554201412e-06, "loss": 0.8654, "step": 14345 }, { "epoch": 0.1748869633042058, "grad_norm": 2.22135329246521, "learning_rate": 4.342783835792175e-06, "loss": 0.966, "step": 14350 }, { "epoch": 0.17494789952835363, "grad_norm": 1.8884752988815308, "learning_rate": 4.342463117382938e-06, "loss": 0.8758, "step": 14355 }, { "epoch": 0.17500883575250142, "grad_norm": 1.8760032653808594, "learning_rate": 4.342142398973702e-06, "loss": 0.8816, "step": 14360 }, { "epoch": 0.17506977197664925, "grad_norm": 1.7548681497573853, "learning_rate": 4.341821680564465e-06, "loss": 0.8666, "step": 14365 }, { "epoch": 0.17513070820079704, "grad_norm": 2.132857322692871, "learning_rate": 4.341500962155228e-06, "loss": 0.881, "step": 14370 }, { "epoch": 0.17519164442494486, "grad_norm": 1.841926097869873, "learning_rate": 4.341180243745992e-06, "loss": 0.8839, "step": 14375 }, { "epoch": 0.17525258064909266, "grad_norm": 2.251952648162842, "learning_rate": 4.340859525336755e-06, "loss": 0.9354, "step": 14380 }, { "epoch": 0.17531351687324045, "grad_norm": 2.0097591876983643, "learning_rate": 4.340538806927518e-06, "loss": 0.8335, "step": 14385 }, { "epoch": 0.17537445309738828, "grad_norm": 2.0085177421569824, "learning_rate": 4.340218088518282e-06, "loss": 0.9891, "step": 14390 }, { "epoch": 0.17543538932153607, "grad_norm": 2.5060276985168457, "learning_rate": 4.339897370109045e-06, "loss": 0.8263, "step": 14395 }, { "epoch": 0.1754963255456839, "grad_norm": 2.0636894702911377, "learning_rate": 4.339576651699808e-06, "loss": 0.8778, "step": 14400 }, { "epoch": 0.1755572617698317, "grad_norm": 2.3147308826446533, "learning_rate": 4.3392559332905715e-06, "loss": 0.8834, "step": 14405 }, { "epoch": 0.1756181979939795, "grad_norm": 1.9547107219696045, "learning_rate": 4.3389352148813345e-06, "loss": 0.9009, "step": 14410 }, { "epoch": 0.1756791342181273, "grad_norm": 1.802796483039856, "learning_rate": 4.3386144964720975e-06, "loss": 0.851, "step": 14415 }, { "epoch": 0.1757400704422751, "grad_norm": 2.32014799118042, "learning_rate": 4.3382937780628606e-06, "loss": 0.9148, "step": 14420 }, { "epoch": 0.17580100666642293, "grad_norm": 1.9397797584533691, "learning_rate": 4.337973059653624e-06, "loss": 0.8719, "step": 14425 }, { "epoch": 0.17586194289057072, "grad_norm": 1.8056533336639404, "learning_rate": 4.3376523412443874e-06, "loss": 0.9064, "step": 14430 }, { "epoch": 0.17592287911471854, "grad_norm": 2.2825374603271484, "learning_rate": 4.3373316228351505e-06, "loss": 0.8652, "step": 14435 }, { "epoch": 0.17598381533886634, "grad_norm": 2.2755658626556396, "learning_rate": 4.337010904425914e-06, "loss": 0.8593, "step": 14440 }, { "epoch": 0.17604475156301416, "grad_norm": 2.182252883911133, "learning_rate": 4.336690186016677e-06, "loss": 0.9002, "step": 14445 }, { "epoch": 0.17610568778716196, "grad_norm": 1.927617073059082, "learning_rate": 4.336369467607441e-06, "loss": 0.9408, "step": 14450 }, { "epoch": 0.17616662401130975, "grad_norm": 1.8509175777435303, "learning_rate": 4.336048749198204e-06, "loss": 0.9684, "step": 14455 }, { "epoch": 0.17622756023545758, "grad_norm": 2.584231376647949, "learning_rate": 4.335728030788967e-06, "loss": 0.9194, "step": 14460 }, { "epoch": 0.17628849645960537, "grad_norm": 2.0054612159729004, "learning_rate": 4.335407312379731e-06, "loss": 0.8529, "step": 14465 }, { "epoch": 0.1763494326837532, "grad_norm": 1.792898178100586, "learning_rate": 4.335086593970494e-06, "loss": 0.8081, "step": 14470 }, { "epoch": 0.176410368907901, "grad_norm": 2.4582691192626953, "learning_rate": 4.334765875561258e-06, "loss": 0.8937, "step": 14475 }, { "epoch": 0.1764713051320488, "grad_norm": 1.9124269485473633, "learning_rate": 4.334445157152021e-06, "loss": 0.8935, "step": 14480 }, { "epoch": 0.1765322413561966, "grad_norm": 2.2202959060668945, "learning_rate": 4.334124438742785e-06, "loss": 0.8692, "step": 14485 }, { "epoch": 0.1765931775803444, "grad_norm": 2.585061550140381, "learning_rate": 4.333803720333548e-06, "loss": 0.8936, "step": 14490 }, { "epoch": 0.17665411380449222, "grad_norm": 1.884887456893921, "learning_rate": 4.333483001924311e-06, "loss": 0.8334, "step": 14495 }, { "epoch": 0.17671505002864002, "grad_norm": 2.0060694217681885, "learning_rate": 4.333162283515075e-06, "loss": 0.8858, "step": 14500 }, { "epoch": 0.17677598625278784, "grad_norm": 1.8164206743240356, "learning_rate": 4.332841565105838e-06, "loss": 0.9126, "step": 14505 }, { "epoch": 0.17683692247693564, "grad_norm": 2.1946730613708496, "learning_rate": 4.332520846696601e-06, "loss": 0.8507, "step": 14510 }, { "epoch": 0.17689785870108343, "grad_norm": 1.8560060262680054, "learning_rate": 4.332200128287364e-06, "loss": 0.8865, "step": 14515 }, { "epoch": 0.17695879492523126, "grad_norm": 2.0464377403259277, "learning_rate": 4.331879409878128e-06, "loss": 0.8606, "step": 14520 }, { "epoch": 0.17701973114937905, "grad_norm": 2.02463960647583, "learning_rate": 4.331558691468891e-06, "loss": 0.9116, "step": 14525 }, { "epoch": 0.17708066737352687, "grad_norm": 2.03060245513916, "learning_rate": 4.331237973059654e-06, "loss": 0.8625, "step": 14530 }, { "epoch": 0.17714160359767467, "grad_norm": 2.1675376892089844, "learning_rate": 4.3309172546504176e-06, "loss": 0.9198, "step": 14535 }, { "epoch": 0.1772025398218225, "grad_norm": 2.0400257110595703, "learning_rate": 4.330596536241181e-06, "loss": 0.8697, "step": 14540 }, { "epoch": 0.1772634760459703, "grad_norm": 1.773086428642273, "learning_rate": 4.330275817831944e-06, "loss": 0.9195, "step": 14545 }, { "epoch": 0.17732441227011808, "grad_norm": 1.7997006177902222, "learning_rate": 4.3299550994227075e-06, "loss": 0.9417, "step": 14550 }, { "epoch": 0.1773853484942659, "grad_norm": 2.11601185798645, "learning_rate": 4.3296343810134705e-06, "loss": 0.8981, "step": 14555 }, { "epoch": 0.1774462847184137, "grad_norm": 2.038440227508545, "learning_rate": 4.3293136626042335e-06, "loss": 0.7945, "step": 14560 }, { "epoch": 0.17750722094256152, "grad_norm": 1.9584219455718994, "learning_rate": 4.328992944194997e-06, "loss": 0.9359, "step": 14565 }, { "epoch": 0.17756815716670932, "grad_norm": 2.129758596420288, "learning_rate": 4.32867222578576e-06, "loss": 0.814, "step": 14570 }, { "epoch": 0.17762909339085714, "grad_norm": 2.0560152530670166, "learning_rate": 4.328351507376523e-06, "loss": 0.8834, "step": 14575 }, { "epoch": 0.17769002961500494, "grad_norm": 2.144578456878662, "learning_rate": 4.328030788967287e-06, "loss": 0.8789, "step": 14580 }, { "epoch": 0.17775096583915273, "grad_norm": 1.8709112405776978, "learning_rate": 4.32771007055805e-06, "loss": 0.8479, "step": 14585 }, { "epoch": 0.17781190206330055, "grad_norm": 1.8592497110366821, "learning_rate": 4.327389352148813e-06, "loss": 0.9148, "step": 14590 }, { "epoch": 0.17787283828744835, "grad_norm": 1.9299126863479614, "learning_rate": 4.327068633739577e-06, "loss": 0.9368, "step": 14595 }, { "epoch": 0.17793377451159617, "grad_norm": 1.723405361175537, "learning_rate": 4.32674791533034e-06, "loss": 0.8391, "step": 14600 }, { "epoch": 0.17799471073574397, "grad_norm": 2.145259141921997, "learning_rate": 4.326427196921103e-06, "loss": 0.945, "step": 14605 }, { "epoch": 0.1780556469598918, "grad_norm": 1.9690388441085815, "learning_rate": 4.326106478511867e-06, "loss": 0.8514, "step": 14610 }, { "epoch": 0.17811658318403958, "grad_norm": 2.381549119949341, "learning_rate": 4.32578576010263e-06, "loss": 0.8605, "step": 14615 }, { "epoch": 0.17817751940818738, "grad_norm": 1.8186310529708862, "learning_rate": 4.325465041693394e-06, "loss": 0.8381, "step": 14620 }, { "epoch": 0.1782384556323352, "grad_norm": 2.150336980819702, "learning_rate": 4.325144323284157e-06, "loss": 0.8784, "step": 14625 }, { "epoch": 0.178299391856483, "grad_norm": 2.3821356296539307, "learning_rate": 4.32482360487492e-06, "loss": 0.9208, "step": 14630 }, { "epoch": 0.17836032808063082, "grad_norm": 2.15343976020813, "learning_rate": 4.324502886465684e-06, "loss": 0.92, "step": 14635 }, { "epoch": 0.17842126430477862, "grad_norm": 1.8390144109725952, "learning_rate": 4.324182168056447e-06, "loss": 0.9309, "step": 14640 }, { "epoch": 0.17848220052892644, "grad_norm": 2.143336057662964, "learning_rate": 4.323861449647211e-06, "loss": 0.8473, "step": 14645 }, { "epoch": 0.17854313675307423, "grad_norm": 2.024366617202759, "learning_rate": 4.323540731237974e-06, "loss": 0.9096, "step": 14650 }, { "epoch": 0.17860407297722203, "grad_norm": 2.0580592155456543, "learning_rate": 4.323220012828737e-06, "loss": 0.8804, "step": 14655 }, { "epoch": 0.17866500920136985, "grad_norm": 2.066340684890747, "learning_rate": 4.322899294419501e-06, "loss": 0.9697, "step": 14660 }, { "epoch": 0.17872594542551765, "grad_norm": 2.0904669761657715, "learning_rate": 4.322578576010264e-06, "loss": 0.8895, "step": 14665 }, { "epoch": 0.17878688164966547, "grad_norm": 2.3791627883911133, "learning_rate": 4.322257857601027e-06, "loss": 0.8921, "step": 14670 }, { "epoch": 0.17884781787381326, "grad_norm": 2.1602981090545654, "learning_rate": 4.32193713919179e-06, "loss": 0.9022, "step": 14675 }, { "epoch": 0.1789087540979611, "grad_norm": 2.0232198238372803, "learning_rate": 4.3216164207825535e-06, "loss": 0.9715, "step": 14680 }, { "epoch": 0.17896969032210888, "grad_norm": 2.629528760910034, "learning_rate": 4.3212957023733165e-06, "loss": 0.9085, "step": 14685 }, { "epoch": 0.17903062654625668, "grad_norm": 1.8588603734970093, "learning_rate": 4.3209749839640796e-06, "loss": 0.9215, "step": 14690 }, { "epoch": 0.1790915627704045, "grad_norm": 2.0867230892181396, "learning_rate": 4.320654265554843e-06, "loss": 0.899, "step": 14695 }, { "epoch": 0.1791524989945523, "grad_norm": 1.9643036127090454, "learning_rate": 4.3203335471456064e-06, "loss": 0.8878, "step": 14700 }, { "epoch": 0.17921343521870012, "grad_norm": 1.6042526960372925, "learning_rate": 4.3200128287363695e-06, "loss": 0.9084, "step": 14705 }, { "epoch": 0.1792743714428479, "grad_norm": 2.3442070484161377, "learning_rate": 4.319692110327133e-06, "loss": 0.9121, "step": 14710 }, { "epoch": 0.17933530766699574, "grad_norm": 1.625722885131836, "learning_rate": 4.319371391917896e-06, "loss": 0.8866, "step": 14715 }, { "epoch": 0.17939624389114353, "grad_norm": 2.1365251541137695, "learning_rate": 4.319050673508659e-06, "loss": 0.9074, "step": 14720 }, { "epoch": 0.17945718011529133, "grad_norm": 1.7583972215652466, "learning_rate": 4.318729955099423e-06, "loss": 0.892, "step": 14725 }, { "epoch": 0.17951811633943915, "grad_norm": 1.5897694826126099, "learning_rate": 4.318409236690186e-06, "loss": 0.8614, "step": 14730 }, { "epoch": 0.17957905256358694, "grad_norm": 2.4007301330566406, "learning_rate": 4.318088518280949e-06, "loss": 0.9368, "step": 14735 }, { "epoch": 0.17963998878773477, "grad_norm": 2.31900954246521, "learning_rate": 4.317767799871713e-06, "loss": 0.9282, "step": 14740 }, { "epoch": 0.17970092501188256, "grad_norm": 2.4146196842193604, "learning_rate": 4.317447081462476e-06, "loss": 0.8773, "step": 14745 }, { "epoch": 0.17976186123603036, "grad_norm": 1.7349413633346558, "learning_rate": 4.317126363053239e-06, "loss": 0.9398, "step": 14750 }, { "epoch": 0.17982279746017818, "grad_norm": 2.3292906284332275, "learning_rate": 4.316805644644003e-06, "loss": 0.8686, "step": 14755 }, { "epoch": 0.17988373368432597, "grad_norm": 2.312612533569336, "learning_rate": 4.316484926234766e-06, "loss": 0.9716, "step": 14760 }, { "epoch": 0.1799446699084738, "grad_norm": 1.8516346216201782, "learning_rate": 4.31616420782553e-06, "loss": 0.8842, "step": 14765 }, { "epoch": 0.1800056061326216, "grad_norm": 1.8058520555496216, "learning_rate": 4.315843489416293e-06, "loss": 0.8745, "step": 14770 }, { "epoch": 0.18006654235676942, "grad_norm": 2.445539951324463, "learning_rate": 4.315522771007056e-06, "loss": 0.8847, "step": 14775 }, { "epoch": 0.1801274785809172, "grad_norm": 1.846800446510315, "learning_rate": 4.31520205259782e-06, "loss": 0.8599, "step": 14780 }, { "epoch": 0.180188414805065, "grad_norm": 1.559008240699768, "learning_rate": 4.314881334188583e-06, "loss": 0.8017, "step": 14785 }, { "epoch": 0.18024935102921283, "grad_norm": 2.3259880542755127, "learning_rate": 4.314560615779347e-06, "loss": 0.9079, "step": 14790 }, { "epoch": 0.18031028725336062, "grad_norm": 1.5310698747634888, "learning_rate": 4.31423989737011e-06, "loss": 0.8859, "step": 14795 }, { "epoch": 0.18037122347750845, "grad_norm": 1.7217535972595215, "learning_rate": 4.313919178960873e-06, "loss": 0.8754, "step": 14800 }, { "epoch": 0.18043215970165624, "grad_norm": 2.0772457122802734, "learning_rate": 4.3135984605516366e-06, "loss": 0.901, "step": 14805 }, { "epoch": 0.18049309592580406, "grad_norm": 1.5602943897247314, "learning_rate": 4.3132777421424e-06, "loss": 0.8847, "step": 14810 }, { "epoch": 0.18055403214995186, "grad_norm": 1.9800652265548706, "learning_rate": 4.312957023733163e-06, "loss": 0.9427, "step": 14815 }, { "epoch": 0.18061496837409965, "grad_norm": 2.3529345989227295, "learning_rate": 4.3126363053239265e-06, "loss": 0.9332, "step": 14820 }, { "epoch": 0.18067590459824748, "grad_norm": 2.214144229888916, "learning_rate": 4.3123155869146895e-06, "loss": 0.8936, "step": 14825 }, { "epoch": 0.18073684082239527, "grad_norm": 2.022190570831299, "learning_rate": 4.3119948685054525e-06, "loss": 0.9083, "step": 14830 }, { "epoch": 0.1807977770465431, "grad_norm": 1.7606393098831177, "learning_rate": 4.311674150096216e-06, "loss": 0.8643, "step": 14835 }, { "epoch": 0.1808587132706909, "grad_norm": 3.375805616378784, "learning_rate": 4.311353431686979e-06, "loss": 0.8574, "step": 14840 }, { "epoch": 0.1809196494948387, "grad_norm": 2.339682102203369, "learning_rate": 4.311032713277742e-06, "loss": 0.9045, "step": 14845 }, { "epoch": 0.1809805857189865, "grad_norm": 1.8170944452285767, "learning_rate": 4.310711994868505e-06, "loss": 0.9091, "step": 14850 }, { "epoch": 0.1810415219431343, "grad_norm": 2.5953454971313477, "learning_rate": 4.310391276459269e-06, "loss": 0.8692, "step": 14855 }, { "epoch": 0.18110245816728213, "grad_norm": 2.3666446208953857, "learning_rate": 4.310070558050032e-06, "loss": 0.8519, "step": 14860 }, { "epoch": 0.18116339439142992, "grad_norm": 1.8951166868209839, "learning_rate": 4.309749839640795e-06, "loss": 0.9041, "step": 14865 }, { "epoch": 0.18122433061557774, "grad_norm": 2.06611704826355, "learning_rate": 4.309429121231559e-06, "loss": 0.8519, "step": 14870 }, { "epoch": 0.18128526683972554, "grad_norm": 2.1088950634002686, "learning_rate": 4.309108402822322e-06, "loss": 0.8378, "step": 14875 }, { "epoch": 0.18134620306387336, "grad_norm": 1.7604480981826782, "learning_rate": 4.308787684413085e-06, "loss": 0.95, "step": 14880 }, { "epoch": 0.18140713928802116, "grad_norm": 1.7283772230148315, "learning_rate": 4.308466966003849e-06, "loss": 0.8278, "step": 14885 }, { "epoch": 0.18146807551216895, "grad_norm": 1.892307162284851, "learning_rate": 4.308146247594612e-06, "loss": 0.9273, "step": 14890 }, { "epoch": 0.18152901173631678, "grad_norm": 1.737561583518982, "learning_rate": 4.307825529185375e-06, "loss": 0.8329, "step": 14895 }, { "epoch": 0.18158994796046457, "grad_norm": 1.7296150922775269, "learning_rate": 4.307504810776139e-06, "loss": 0.8448, "step": 14900 }, { "epoch": 0.1816508841846124, "grad_norm": 2.055602550506592, "learning_rate": 4.307184092366902e-06, "loss": 0.9325, "step": 14905 }, { "epoch": 0.1817118204087602, "grad_norm": 1.9360934495925903, "learning_rate": 4.306863373957665e-06, "loss": 0.9288, "step": 14910 }, { "epoch": 0.181772756632908, "grad_norm": 2.194597005844116, "learning_rate": 4.306542655548429e-06, "loss": 0.9165, "step": 14915 }, { "epoch": 0.1818336928570558, "grad_norm": 1.7143386602401733, "learning_rate": 4.306221937139192e-06, "loss": 0.8676, "step": 14920 }, { "epoch": 0.1818946290812036, "grad_norm": 1.9239730834960938, "learning_rate": 4.305901218729956e-06, "loss": 0.9296, "step": 14925 }, { "epoch": 0.18195556530535142, "grad_norm": 2.2659354209899902, "learning_rate": 4.305580500320719e-06, "loss": 0.8867, "step": 14930 }, { "epoch": 0.18201650152949922, "grad_norm": 2.3807265758514404, "learning_rate": 4.305259781911482e-06, "loss": 0.8486, "step": 14935 }, { "epoch": 0.18207743775364704, "grad_norm": 2.616919994354248, "learning_rate": 4.304939063502246e-06, "loss": 0.9542, "step": 14940 }, { "epoch": 0.18213837397779484, "grad_norm": 1.9366356134414673, "learning_rate": 4.304618345093009e-06, "loss": 0.9338, "step": 14945 }, { "epoch": 0.18219931020194266, "grad_norm": 2.1864829063415527, "learning_rate": 4.3042976266837725e-06, "loss": 0.8707, "step": 14950 }, { "epoch": 0.18226024642609046, "grad_norm": 1.6316499710083008, "learning_rate": 4.3039769082745355e-06, "loss": 0.8879, "step": 14955 }, { "epoch": 0.18232118265023825, "grad_norm": 2.232537269592285, "learning_rate": 4.3036561898652986e-06, "loss": 0.8394, "step": 14960 }, { "epoch": 0.18238211887438607, "grad_norm": 1.8379876613616943, "learning_rate": 4.303335471456062e-06, "loss": 0.864, "step": 14965 }, { "epoch": 0.18244305509853387, "grad_norm": 2.1686136722564697, "learning_rate": 4.3030147530468254e-06, "loss": 0.9648, "step": 14970 }, { "epoch": 0.1825039913226817, "grad_norm": 2.1917240619659424, "learning_rate": 4.3026940346375885e-06, "loss": 0.9274, "step": 14975 }, { "epoch": 0.1825649275468295, "grad_norm": 2.3189971446990967, "learning_rate": 4.302373316228352e-06, "loss": 0.8934, "step": 14980 }, { "epoch": 0.18262586377097728, "grad_norm": 1.7317358255386353, "learning_rate": 4.302052597819115e-06, "loss": 0.8701, "step": 14985 }, { "epoch": 0.1826867999951251, "grad_norm": 1.8364577293395996, "learning_rate": 4.301731879409878e-06, "loss": 0.8742, "step": 14990 }, { "epoch": 0.1827477362192729, "grad_norm": 1.927793264389038, "learning_rate": 4.301411161000642e-06, "loss": 0.8507, "step": 14995 }, { "epoch": 0.18280867244342072, "grad_norm": 1.864075779914856, "learning_rate": 4.301090442591405e-06, "loss": 0.9084, "step": 15000 }, { "epoch": 0.18286960866756852, "grad_norm": 2.2053539752960205, "learning_rate": 4.300769724182168e-06, "loss": 0.9072, "step": 15005 }, { "epoch": 0.18293054489171634, "grad_norm": 2.053009510040283, "learning_rate": 4.300449005772931e-06, "loss": 0.8586, "step": 15010 }, { "epoch": 0.18299148111586414, "grad_norm": 2.3054463863372803, "learning_rate": 4.300128287363695e-06, "loss": 0.8236, "step": 15015 }, { "epoch": 0.18305241734001193, "grad_norm": 1.9585775136947632, "learning_rate": 4.299807568954458e-06, "loss": 0.8939, "step": 15020 }, { "epoch": 0.18311335356415975, "grad_norm": 2.366779088973999, "learning_rate": 4.299486850545221e-06, "loss": 0.9032, "step": 15025 }, { "epoch": 0.18317428978830755, "grad_norm": 2.0515942573547363, "learning_rate": 4.299166132135985e-06, "loss": 0.9123, "step": 15030 }, { "epoch": 0.18323522601245537, "grad_norm": 2.0917248725891113, "learning_rate": 4.298845413726748e-06, "loss": 0.9298, "step": 15035 }, { "epoch": 0.18329616223660317, "grad_norm": 2.249925374984741, "learning_rate": 4.298524695317511e-06, "loss": 0.8564, "step": 15040 }, { "epoch": 0.183357098460751, "grad_norm": 1.8328356742858887, "learning_rate": 4.298203976908275e-06, "loss": 0.9346, "step": 15045 }, { "epoch": 0.18341803468489878, "grad_norm": 2.0368902683258057, "learning_rate": 4.297883258499038e-06, "loss": 0.8923, "step": 15050 }, { "epoch": 0.18347897090904658, "grad_norm": 2.0286898612976074, "learning_rate": 4.297562540089801e-06, "loss": 0.9755, "step": 15055 }, { "epoch": 0.1835399071331944, "grad_norm": 1.9964485168457031, "learning_rate": 4.297241821680565e-06, "loss": 0.9112, "step": 15060 }, { "epoch": 0.1836008433573422, "grad_norm": 2.14259672164917, "learning_rate": 4.296921103271328e-06, "loss": 0.9943, "step": 15065 }, { "epoch": 0.18366177958149002, "grad_norm": 2.3340113162994385, "learning_rate": 4.296600384862092e-06, "loss": 0.8756, "step": 15070 }, { "epoch": 0.18372271580563782, "grad_norm": 2.1549482345581055, "learning_rate": 4.296279666452855e-06, "loss": 0.9008, "step": 15075 }, { "epoch": 0.18378365202978564, "grad_norm": 2.5734262466430664, "learning_rate": 4.295958948043618e-06, "loss": 0.9653, "step": 15080 }, { "epoch": 0.18384458825393343, "grad_norm": 2.099513530731201, "learning_rate": 4.295638229634382e-06, "loss": 0.8586, "step": 15085 }, { "epoch": 0.18390552447808123, "grad_norm": 2.0986881256103516, "learning_rate": 4.295317511225145e-06, "loss": 0.9721, "step": 15090 }, { "epoch": 0.18396646070222905, "grad_norm": 2.169792413711548, "learning_rate": 4.2949967928159085e-06, "loss": 0.8188, "step": 15095 }, { "epoch": 0.18402739692637685, "grad_norm": 2.622735023498535, "learning_rate": 4.2946760744066715e-06, "loss": 0.9683, "step": 15100 }, { "epoch": 0.18408833315052467, "grad_norm": 1.8466449975967407, "learning_rate": 4.2943553559974345e-06, "loss": 0.9061, "step": 15105 }, { "epoch": 0.18414926937467246, "grad_norm": 2.022063970565796, "learning_rate": 4.294034637588198e-06, "loss": 0.8756, "step": 15110 }, { "epoch": 0.1842102055988203, "grad_norm": 2.2025375366210938, "learning_rate": 4.293713919178961e-06, "loss": 0.9335, "step": 15115 }, { "epoch": 0.18427114182296808, "grad_norm": 1.7024807929992676, "learning_rate": 4.293393200769724e-06, "loss": 0.8685, "step": 15120 }, { "epoch": 0.18433207804711588, "grad_norm": 2.1562633514404297, "learning_rate": 4.293072482360488e-06, "loss": 0.8876, "step": 15125 }, { "epoch": 0.1843930142712637, "grad_norm": 2.064517021179199, "learning_rate": 4.292751763951251e-06, "loss": 0.8702, "step": 15130 }, { "epoch": 0.1844539504954115, "grad_norm": 1.8224716186523438, "learning_rate": 4.292431045542014e-06, "loss": 0.8977, "step": 15135 }, { "epoch": 0.18451488671955932, "grad_norm": 1.9436618089675903, "learning_rate": 4.292110327132778e-06, "loss": 0.992, "step": 15140 }, { "epoch": 0.1845758229437071, "grad_norm": 1.8826676607131958, "learning_rate": 4.291789608723541e-06, "loss": 0.8902, "step": 15145 }, { "epoch": 0.18463675916785494, "grad_norm": 1.8217296600341797, "learning_rate": 4.291468890314304e-06, "loss": 0.8869, "step": 15150 }, { "epoch": 0.18469769539200273, "grad_norm": 2.352626323699951, "learning_rate": 4.291148171905068e-06, "loss": 0.9515, "step": 15155 }, { "epoch": 0.18475863161615053, "grad_norm": 1.6543402671813965, "learning_rate": 4.290827453495831e-06, "loss": 0.9133, "step": 15160 }, { "epoch": 0.18481956784029835, "grad_norm": 2.3184125423431396, "learning_rate": 4.290506735086594e-06, "loss": 0.9068, "step": 15165 }, { "epoch": 0.18488050406444614, "grad_norm": 1.7454605102539062, "learning_rate": 4.290186016677358e-06, "loss": 0.873, "step": 15170 }, { "epoch": 0.18494144028859397, "grad_norm": 1.7972025871276855, "learning_rate": 4.289865298268121e-06, "loss": 0.941, "step": 15175 }, { "epoch": 0.18500237651274176, "grad_norm": 2.1089775562286377, "learning_rate": 4.289544579858884e-06, "loss": 0.9489, "step": 15180 }, { "epoch": 0.18506331273688958, "grad_norm": 1.8956316709518433, "learning_rate": 4.289223861449647e-06, "loss": 0.8715, "step": 15185 }, { "epoch": 0.18512424896103738, "grad_norm": 1.82005774974823, "learning_rate": 4.288903143040411e-06, "loss": 0.8698, "step": 15190 }, { "epoch": 0.18518518518518517, "grad_norm": 1.970062494277954, "learning_rate": 4.288582424631174e-06, "loss": 0.7764, "step": 15195 }, { "epoch": 0.185246121409333, "grad_norm": 1.8597021102905273, "learning_rate": 4.288261706221937e-06, "loss": 0.8827, "step": 15200 }, { "epoch": 0.1853070576334808, "grad_norm": 2.102004289627075, "learning_rate": 4.287940987812701e-06, "loss": 0.9115, "step": 15205 }, { "epoch": 0.18536799385762862, "grad_norm": 1.891527533531189, "learning_rate": 4.287620269403464e-06, "loss": 1.0309, "step": 15210 }, { "epoch": 0.1854289300817764, "grad_norm": 1.6807647943496704, "learning_rate": 4.287299550994227e-06, "loss": 0.8876, "step": 15215 }, { "epoch": 0.1854898663059242, "grad_norm": 1.952064871788025, "learning_rate": 4.286978832584991e-06, "loss": 0.9064, "step": 15220 }, { "epoch": 0.18555080253007203, "grad_norm": 1.91242516040802, "learning_rate": 4.286658114175754e-06, "loss": 0.9028, "step": 15225 }, { "epoch": 0.18561173875421982, "grad_norm": 2.4718830585479736, "learning_rate": 4.2863373957665176e-06, "loss": 0.9157, "step": 15230 }, { "epoch": 0.18567267497836765, "grad_norm": 1.9825495481491089, "learning_rate": 4.2860166773572806e-06, "loss": 0.8866, "step": 15235 }, { "epoch": 0.18573361120251544, "grad_norm": 1.6652802228927612, "learning_rate": 4.2856959589480444e-06, "loss": 0.8875, "step": 15240 }, { "epoch": 0.18579454742666326, "grad_norm": 2.036953926086426, "learning_rate": 4.2853752405388075e-06, "loss": 1.0223, "step": 15245 }, { "epoch": 0.18585548365081106, "grad_norm": 2.111367702484131, "learning_rate": 4.2850545221295705e-06, "loss": 0.8465, "step": 15250 }, { "epoch": 0.18591641987495885, "grad_norm": 2.1152396202087402, "learning_rate": 4.284733803720334e-06, "loss": 0.959, "step": 15255 }, { "epoch": 0.18597735609910668, "grad_norm": 2.0158681869506836, "learning_rate": 4.284413085311097e-06, "loss": 0.8742, "step": 15260 }, { "epoch": 0.18603829232325447, "grad_norm": 2.2374277114868164, "learning_rate": 4.28409236690186e-06, "loss": 0.8645, "step": 15265 }, { "epoch": 0.1860992285474023, "grad_norm": 2.302626371383667, "learning_rate": 4.283771648492624e-06, "loss": 0.9417, "step": 15270 }, { "epoch": 0.1861601647715501, "grad_norm": 1.909382700920105, "learning_rate": 4.283450930083387e-06, "loss": 0.8372, "step": 15275 }, { "epoch": 0.1862211009956979, "grad_norm": 1.9893128871917725, "learning_rate": 4.28313021167415e-06, "loss": 0.8495, "step": 15280 }, { "epoch": 0.1862820372198457, "grad_norm": 2.014310359954834, "learning_rate": 4.282809493264914e-06, "loss": 0.9387, "step": 15285 }, { "epoch": 0.1863429734439935, "grad_norm": 2.241217613220215, "learning_rate": 4.282488774855677e-06, "loss": 0.9142, "step": 15290 }, { "epoch": 0.18640390966814133, "grad_norm": 2.1546919345855713, "learning_rate": 4.28216805644644e-06, "loss": 0.8572, "step": 15295 }, { "epoch": 0.18646484589228912, "grad_norm": 1.9575326442718506, "learning_rate": 4.281847338037204e-06, "loss": 0.8689, "step": 15300 }, { "epoch": 0.18652578211643694, "grad_norm": 2.3091883659362793, "learning_rate": 4.281526619627967e-06, "loss": 0.9784, "step": 15305 }, { "epoch": 0.18658671834058474, "grad_norm": 1.8430900573730469, "learning_rate": 4.28120590121873e-06, "loss": 0.9348, "step": 15310 }, { "epoch": 0.18664765456473256, "grad_norm": 2.1036434173583984, "learning_rate": 4.280885182809494e-06, "loss": 0.902, "step": 15315 }, { "epoch": 0.18670859078888036, "grad_norm": 1.9686697721481323, "learning_rate": 4.280564464400257e-06, "loss": 0.9542, "step": 15320 }, { "epoch": 0.18676952701302815, "grad_norm": 1.6976087093353271, "learning_rate": 4.28024374599102e-06, "loss": 0.8743, "step": 15325 }, { "epoch": 0.18683046323717598, "grad_norm": 1.967179775238037, "learning_rate": 4.279923027581784e-06, "loss": 0.8476, "step": 15330 }, { "epoch": 0.18689139946132377, "grad_norm": 1.8239496946334839, "learning_rate": 4.279602309172547e-06, "loss": 0.9038, "step": 15335 }, { "epoch": 0.1869523356854716, "grad_norm": 2.1718719005584717, "learning_rate": 4.27928159076331e-06, "loss": 0.8837, "step": 15340 }, { "epoch": 0.1870132719096194, "grad_norm": 2.1419339179992676, "learning_rate": 4.278960872354073e-06, "loss": 0.8739, "step": 15345 }, { "epoch": 0.1870742081337672, "grad_norm": 1.7350003719329834, "learning_rate": 4.278640153944837e-06, "loss": 0.8317, "step": 15350 }, { "epoch": 0.187135144357915, "grad_norm": 1.9157408475875854, "learning_rate": 4.2783194355356e-06, "loss": 0.9467, "step": 15355 }, { "epoch": 0.1871960805820628, "grad_norm": 1.7607884407043457, "learning_rate": 4.277998717126363e-06, "loss": 0.9063, "step": 15360 }, { "epoch": 0.18725701680621062, "grad_norm": 2.266162872314453, "learning_rate": 4.277677998717127e-06, "loss": 0.8994, "step": 15365 }, { "epoch": 0.18731795303035842, "grad_norm": 1.5818936824798584, "learning_rate": 4.27735728030789e-06, "loss": 0.9274, "step": 15370 }, { "epoch": 0.18737888925450624, "grad_norm": 1.8792991638183594, "learning_rate": 4.2770365618986535e-06, "loss": 0.8695, "step": 15375 }, { "epoch": 0.18743982547865404, "grad_norm": 1.8697912693023682, "learning_rate": 4.2767158434894165e-06, "loss": 0.9019, "step": 15380 }, { "epoch": 0.18750076170280186, "grad_norm": 2.044232130050659, "learning_rate": 4.2763951250801795e-06, "loss": 0.8779, "step": 15385 }, { "epoch": 0.18756169792694966, "grad_norm": 2.1673507690429688, "learning_rate": 4.276074406670943e-06, "loss": 0.8971, "step": 15390 }, { "epoch": 0.18762263415109745, "grad_norm": 2.0413222312927246, "learning_rate": 4.2757536882617064e-06, "loss": 0.8659, "step": 15395 }, { "epoch": 0.18768357037524527, "grad_norm": 2.3504371643066406, "learning_rate": 4.27543296985247e-06, "loss": 0.8907, "step": 15400 }, { "epoch": 0.18774450659939307, "grad_norm": 1.9891448020935059, "learning_rate": 4.275112251443233e-06, "loss": 0.8718, "step": 15405 }, { "epoch": 0.1878054428235409, "grad_norm": 2.6629369258880615, "learning_rate": 4.274791533033996e-06, "loss": 0.8595, "step": 15410 }, { "epoch": 0.1878663790476887, "grad_norm": 2.237931966781616, "learning_rate": 4.27447081462476e-06, "loss": 0.8953, "step": 15415 }, { "epoch": 0.1879273152718365, "grad_norm": 1.886281967163086, "learning_rate": 4.274150096215523e-06, "loss": 0.9056, "step": 15420 }, { "epoch": 0.1879882514959843, "grad_norm": 1.8495216369628906, "learning_rate": 4.273829377806287e-06, "loss": 0.8594, "step": 15425 }, { "epoch": 0.1880491877201321, "grad_norm": 1.8338332176208496, "learning_rate": 4.27350865939705e-06, "loss": 0.9073, "step": 15430 }, { "epoch": 0.18811012394427992, "grad_norm": 1.757216215133667, "learning_rate": 4.273187940987813e-06, "loss": 0.8376, "step": 15435 }, { "epoch": 0.18817106016842772, "grad_norm": 2.687241792678833, "learning_rate": 4.272867222578576e-06, "loss": 0.9513, "step": 15440 }, { "epoch": 0.18823199639257554, "grad_norm": 2.047394275665283, "learning_rate": 4.27254650416934e-06, "loss": 0.8803, "step": 15445 }, { "epoch": 0.18829293261672334, "grad_norm": 1.7588123083114624, "learning_rate": 4.272225785760103e-06, "loss": 0.8381, "step": 15450 }, { "epoch": 0.18835386884087116, "grad_norm": 2.1456477642059326, "learning_rate": 4.271905067350866e-06, "loss": 0.9751, "step": 15455 }, { "epoch": 0.18841480506501895, "grad_norm": 1.8698151111602783, "learning_rate": 4.27158434894163e-06, "loss": 0.8989, "step": 15460 }, { "epoch": 0.18847574128916675, "grad_norm": 2.164519786834717, "learning_rate": 4.271263630532393e-06, "loss": 0.8974, "step": 15465 }, { "epoch": 0.18853667751331457, "grad_norm": 1.6340184211730957, "learning_rate": 4.270942912123156e-06, "loss": 0.8575, "step": 15470 }, { "epoch": 0.18859761373746237, "grad_norm": 2.0767664909362793, "learning_rate": 4.27062219371392e-06, "loss": 0.8977, "step": 15475 }, { "epoch": 0.1886585499616102, "grad_norm": 1.8390259742736816, "learning_rate": 4.270301475304683e-06, "loss": 0.8747, "step": 15480 }, { "epoch": 0.18871948618575798, "grad_norm": 1.8260940313339233, "learning_rate": 4.269980756895446e-06, "loss": 0.8218, "step": 15485 }, { "epoch": 0.18878042240990578, "grad_norm": 1.798811674118042, "learning_rate": 4.26966003848621e-06, "loss": 0.8565, "step": 15490 }, { "epoch": 0.1888413586340536, "grad_norm": 2.084519624710083, "learning_rate": 4.269339320076973e-06, "loss": 0.9142, "step": 15495 }, { "epoch": 0.1889022948582014, "grad_norm": 2.3586654663085938, "learning_rate": 4.269018601667736e-06, "loss": 0.9164, "step": 15500 }, { "epoch": 0.18896323108234922, "grad_norm": 2.20534610748291, "learning_rate": 4.2686978832584996e-06, "loss": 0.8978, "step": 15505 }, { "epoch": 0.18902416730649702, "grad_norm": 2.1466610431671143, "learning_rate": 4.268377164849263e-06, "loss": 0.8914, "step": 15510 }, { "epoch": 0.18908510353064484, "grad_norm": 1.9006872177124023, "learning_rate": 4.268056446440026e-06, "loss": 0.9048, "step": 15515 }, { "epoch": 0.18914603975479263, "grad_norm": 2.5886833667755127, "learning_rate": 4.2677357280307895e-06, "loss": 0.8914, "step": 15520 }, { "epoch": 0.18920697597894043, "grad_norm": 1.8400570154190063, "learning_rate": 4.2674150096215525e-06, "loss": 0.8818, "step": 15525 }, { "epoch": 0.18926791220308825, "grad_norm": 1.9824669361114502, "learning_rate": 4.2670942912123155e-06, "loss": 0.9712, "step": 15530 }, { "epoch": 0.18932884842723605, "grad_norm": 1.8746665716171265, "learning_rate": 4.266773572803079e-06, "loss": 0.95, "step": 15535 }, { "epoch": 0.18938978465138387, "grad_norm": 1.8793350458145142, "learning_rate": 4.266452854393842e-06, "loss": 0.9612, "step": 15540 }, { "epoch": 0.18945072087553166, "grad_norm": 1.8544799089431763, "learning_rate": 4.266132135984606e-06, "loss": 0.932, "step": 15545 }, { "epoch": 0.1895116570996795, "grad_norm": 1.669069528579712, "learning_rate": 4.265811417575369e-06, "loss": 0.9591, "step": 15550 }, { "epoch": 0.18957259332382728, "grad_norm": 2.043281078338623, "learning_rate": 4.265490699166132e-06, "loss": 0.813, "step": 15555 }, { "epoch": 0.18963352954797508, "grad_norm": 1.917678952217102, "learning_rate": 4.265169980756896e-06, "loss": 0.8894, "step": 15560 }, { "epoch": 0.1896944657721229, "grad_norm": 1.8265796899795532, "learning_rate": 4.264849262347659e-06, "loss": 0.8925, "step": 15565 }, { "epoch": 0.1897554019962707, "grad_norm": 1.9663792848587036, "learning_rate": 4.264528543938423e-06, "loss": 0.9754, "step": 15570 }, { "epoch": 0.18981633822041852, "grad_norm": 2.220665454864502, "learning_rate": 4.264207825529186e-06, "loss": 0.8558, "step": 15575 }, { "epoch": 0.1898772744445663, "grad_norm": 2.4863064289093018, "learning_rate": 4.263887107119949e-06, "loss": 0.8647, "step": 15580 }, { "epoch": 0.18993821066871414, "grad_norm": 1.940902590751648, "learning_rate": 4.263566388710713e-06, "loss": 0.9598, "step": 15585 }, { "epoch": 0.18999914689286193, "grad_norm": 1.9889507293701172, "learning_rate": 4.263245670301476e-06, "loss": 0.9026, "step": 15590 }, { "epoch": 0.19006008311700973, "grad_norm": 2.538588047027588, "learning_rate": 4.262924951892239e-06, "loss": 0.9463, "step": 15595 }, { "epoch": 0.19012101934115755, "grad_norm": 1.839800477027893, "learning_rate": 4.262604233483002e-06, "loss": 0.8462, "step": 15600 }, { "epoch": 0.19018195556530534, "grad_norm": 1.8092095851898193, "learning_rate": 4.262283515073766e-06, "loss": 0.8962, "step": 15605 }, { "epoch": 0.19024289178945317, "grad_norm": 1.9320369958877563, "learning_rate": 4.261962796664529e-06, "loss": 0.877, "step": 15610 }, { "epoch": 0.19030382801360096, "grad_norm": 2.271108865737915, "learning_rate": 4.261642078255292e-06, "loss": 0.9146, "step": 15615 }, { "epoch": 0.19036476423774878, "grad_norm": 1.7348421812057495, "learning_rate": 4.261321359846056e-06, "loss": 0.8939, "step": 15620 }, { "epoch": 0.19042570046189658, "grad_norm": 2.0682876110076904, "learning_rate": 4.261000641436819e-06, "loss": 0.9232, "step": 15625 }, { "epoch": 0.19048663668604437, "grad_norm": 2.1741137504577637, "learning_rate": 4.260679923027582e-06, "loss": 0.8408, "step": 15630 }, { "epoch": 0.1905475729101922, "grad_norm": 2.1521682739257812, "learning_rate": 4.260359204618346e-06, "loss": 0.8694, "step": 15635 }, { "epoch": 0.19060850913434, "grad_norm": 1.9242669343948364, "learning_rate": 4.260038486209109e-06, "loss": 0.8576, "step": 15640 }, { "epoch": 0.19066944535848782, "grad_norm": 2.2052524089813232, "learning_rate": 4.259717767799872e-06, "loss": 0.8803, "step": 15645 }, { "epoch": 0.1907303815826356, "grad_norm": 2.0415940284729004, "learning_rate": 4.2593970493906355e-06, "loss": 0.8648, "step": 15650 }, { "epoch": 0.19079131780678343, "grad_norm": 2.2098517417907715, "learning_rate": 4.2590763309813985e-06, "loss": 0.8879, "step": 15655 }, { "epoch": 0.19085225403093123, "grad_norm": 1.921482801437378, "learning_rate": 4.2587556125721616e-06, "loss": 0.9037, "step": 15660 }, { "epoch": 0.19091319025507902, "grad_norm": 1.8529951572418213, "learning_rate": 4.2584348941629254e-06, "loss": 0.9438, "step": 15665 }, { "epoch": 0.19097412647922685, "grad_norm": 2.118016481399536, "learning_rate": 4.2581141757536884e-06, "loss": 0.8885, "step": 15670 }, { "epoch": 0.19103506270337464, "grad_norm": 1.7452259063720703, "learning_rate": 4.2577934573444515e-06, "loss": 0.8318, "step": 15675 }, { "epoch": 0.19109599892752246, "grad_norm": 1.89751136302948, "learning_rate": 4.257472738935215e-06, "loss": 0.8599, "step": 15680 }, { "epoch": 0.19115693515167026, "grad_norm": 2.283938407897949, "learning_rate": 4.257152020525978e-06, "loss": 0.8893, "step": 15685 }, { "epoch": 0.19121787137581808, "grad_norm": 2.349443197250366, "learning_rate": 4.256831302116742e-06, "loss": 0.9005, "step": 15690 }, { "epoch": 0.19127880759996588, "grad_norm": 1.8099467754364014, "learning_rate": 4.256510583707505e-06, "loss": 0.939, "step": 15695 }, { "epoch": 0.19133974382411367, "grad_norm": 1.9236794710159302, "learning_rate": 4.256189865298268e-06, "loss": 0.9121, "step": 15700 }, { "epoch": 0.1914006800482615, "grad_norm": 2.0088083744049072, "learning_rate": 4.255869146889032e-06, "loss": 0.8359, "step": 15705 }, { "epoch": 0.1914616162724093, "grad_norm": 2.6953139305114746, "learning_rate": 4.255548428479795e-06, "loss": 0.9097, "step": 15710 }, { "epoch": 0.1915225524965571, "grad_norm": 1.714236855506897, "learning_rate": 4.255227710070559e-06, "loss": 0.8945, "step": 15715 }, { "epoch": 0.1915834887207049, "grad_norm": 2.0343189239501953, "learning_rate": 4.254906991661322e-06, "loss": 0.9004, "step": 15720 }, { "epoch": 0.1916444249448527, "grad_norm": 1.750218152999878, "learning_rate": 4.254586273252085e-06, "loss": 0.866, "step": 15725 }, { "epoch": 0.19170536116900053, "grad_norm": 1.7957957983016968, "learning_rate": 4.254265554842849e-06, "loss": 0.9146, "step": 15730 }, { "epoch": 0.19176629739314832, "grad_norm": 2.1464927196502686, "learning_rate": 4.253944836433612e-06, "loss": 0.8476, "step": 15735 }, { "epoch": 0.19182723361729614, "grad_norm": 1.7902649641036987, "learning_rate": 4.253624118024375e-06, "loss": 0.922, "step": 15740 }, { "epoch": 0.19188816984144394, "grad_norm": 1.9727951288223267, "learning_rate": 4.253303399615139e-06, "loss": 0.9188, "step": 15745 }, { "epoch": 0.19194910606559176, "grad_norm": 2.121424436569214, "learning_rate": 4.252982681205902e-06, "loss": 0.8739, "step": 15750 }, { "epoch": 0.19201004228973956, "grad_norm": 1.9611268043518066, "learning_rate": 4.252661962796665e-06, "loss": 0.9113, "step": 15755 }, { "epoch": 0.19207097851388735, "grad_norm": 2.122347354888916, "learning_rate": 4.252341244387429e-06, "loss": 0.9382, "step": 15760 }, { "epoch": 0.19213191473803518, "grad_norm": 1.9715858697891235, "learning_rate": 4.252020525978192e-06, "loss": 0.8947, "step": 15765 }, { "epoch": 0.19219285096218297, "grad_norm": 2.109145164489746, "learning_rate": 4.251699807568955e-06, "loss": 0.9274, "step": 15770 }, { "epoch": 0.1922537871863308, "grad_norm": 1.6394861936569214, "learning_rate": 4.251379089159718e-06, "loss": 0.8437, "step": 15775 }, { "epoch": 0.1923147234104786, "grad_norm": 1.8198778629302979, "learning_rate": 4.251058370750482e-06, "loss": 0.9276, "step": 15780 }, { "epoch": 0.1923756596346264, "grad_norm": 1.8150478601455688, "learning_rate": 4.250737652341245e-06, "loss": 0.9701, "step": 15785 }, { "epoch": 0.1924365958587742, "grad_norm": 2.1723833084106445, "learning_rate": 4.250416933932008e-06, "loss": 0.9165, "step": 15790 }, { "epoch": 0.192497532082922, "grad_norm": 1.796249270439148, "learning_rate": 4.2500962155227715e-06, "loss": 0.8702, "step": 15795 }, { "epoch": 0.19255846830706982, "grad_norm": 2.2195777893066406, "learning_rate": 4.2497754971135345e-06, "loss": 0.8778, "step": 15800 }, { "epoch": 0.19261940453121762, "grad_norm": 2.032247543334961, "learning_rate": 4.2494547787042975e-06, "loss": 0.8487, "step": 15805 }, { "epoch": 0.19268034075536544, "grad_norm": 1.9267922639846802, "learning_rate": 4.249134060295061e-06, "loss": 0.7997, "step": 15810 }, { "epoch": 0.19274127697951324, "grad_norm": 1.8080332279205322, "learning_rate": 4.248813341885824e-06, "loss": 0.8847, "step": 15815 }, { "epoch": 0.19280221320366106, "grad_norm": 1.7576175928115845, "learning_rate": 4.248492623476587e-06, "loss": 0.8532, "step": 15820 }, { "epoch": 0.19286314942780886, "grad_norm": 1.9840017557144165, "learning_rate": 4.248171905067351e-06, "loss": 0.8994, "step": 15825 }, { "epoch": 0.19292408565195665, "grad_norm": 1.773672103881836, "learning_rate": 4.247851186658114e-06, "loss": 0.9214, "step": 15830 }, { "epoch": 0.19298502187610447, "grad_norm": 2.4731950759887695, "learning_rate": 4.247530468248877e-06, "loss": 0.9187, "step": 15835 }, { "epoch": 0.19304595810025227, "grad_norm": 1.840160608291626, "learning_rate": 4.247209749839641e-06, "loss": 0.9327, "step": 15840 }, { "epoch": 0.1931068943244001, "grad_norm": 1.913757562637329, "learning_rate": 4.246889031430404e-06, "loss": 0.8976, "step": 15845 }, { "epoch": 0.1931678305485479, "grad_norm": 1.7048569917678833, "learning_rate": 4.246568313021168e-06, "loss": 0.8323, "step": 15850 }, { "epoch": 0.1932287667726957, "grad_norm": 1.9964609146118164, "learning_rate": 4.246247594611931e-06, "loss": 0.9019, "step": 15855 }, { "epoch": 0.1932897029968435, "grad_norm": 1.859647274017334, "learning_rate": 4.245926876202694e-06, "loss": 0.9412, "step": 15860 }, { "epoch": 0.1933506392209913, "grad_norm": 1.8377388715744019, "learning_rate": 4.245606157793458e-06, "loss": 0.9372, "step": 15865 }, { "epoch": 0.19341157544513912, "grad_norm": 1.6945219039916992, "learning_rate": 4.245285439384221e-06, "loss": 0.9184, "step": 15870 }, { "epoch": 0.19347251166928692, "grad_norm": 2.026324987411499, "learning_rate": 4.244964720974985e-06, "loss": 0.9139, "step": 15875 }, { "epoch": 0.19353344789343474, "grad_norm": 2.097872495651245, "learning_rate": 4.244644002565748e-06, "loss": 0.9435, "step": 15880 }, { "epoch": 0.19359438411758254, "grad_norm": 2.1487324237823486, "learning_rate": 4.244323284156511e-06, "loss": 0.8956, "step": 15885 }, { "epoch": 0.19365532034173036, "grad_norm": 1.9486607313156128, "learning_rate": 4.244002565747275e-06, "loss": 0.9441, "step": 15890 }, { "epoch": 0.19371625656587815, "grad_norm": 2.630201816558838, "learning_rate": 4.243681847338038e-06, "loss": 0.9089, "step": 15895 }, { "epoch": 0.19377719279002595, "grad_norm": 2.0458900928497314, "learning_rate": 4.243361128928801e-06, "loss": 0.869, "step": 15900 }, { "epoch": 0.19383812901417377, "grad_norm": 1.961525797843933, "learning_rate": 4.243040410519565e-06, "loss": 0.9156, "step": 15905 }, { "epoch": 0.19389906523832157, "grad_norm": 1.7439707517623901, "learning_rate": 4.242719692110328e-06, "loss": 0.8988, "step": 15910 }, { "epoch": 0.1939600014624694, "grad_norm": 1.9398201704025269, "learning_rate": 4.242398973701091e-06, "loss": 0.8856, "step": 15915 }, { "epoch": 0.19402093768661718, "grad_norm": 2.0684406757354736, "learning_rate": 4.2420782552918545e-06, "loss": 0.9018, "step": 15920 }, { "epoch": 0.194081873910765, "grad_norm": 1.8532155752182007, "learning_rate": 4.2417575368826175e-06, "loss": 0.9669, "step": 15925 }, { "epoch": 0.1941428101349128, "grad_norm": 3.154637336730957, "learning_rate": 4.2414368184733806e-06, "loss": 0.8447, "step": 15930 }, { "epoch": 0.1942037463590606, "grad_norm": 2.112748146057129, "learning_rate": 4.241116100064144e-06, "loss": 0.9183, "step": 15935 }, { "epoch": 0.19426468258320842, "grad_norm": 1.933961272239685, "learning_rate": 4.2407953816549074e-06, "loss": 0.9304, "step": 15940 }, { "epoch": 0.19432561880735622, "grad_norm": 1.661116361618042, "learning_rate": 4.2404746632456705e-06, "loss": 0.9393, "step": 15945 }, { "epoch": 0.19438655503150404, "grad_norm": 2.452359437942505, "learning_rate": 4.2401539448364335e-06, "loss": 0.8053, "step": 15950 }, { "epoch": 0.19444749125565183, "grad_norm": 1.971739411354065, "learning_rate": 4.239833226427197e-06, "loss": 0.8418, "step": 15955 }, { "epoch": 0.19450842747979963, "grad_norm": 2.296663522720337, "learning_rate": 4.23951250801796e-06, "loss": 0.9056, "step": 15960 }, { "epoch": 0.19456936370394745, "grad_norm": 1.6502470970153809, "learning_rate": 4.239191789608723e-06, "loss": 0.8446, "step": 15965 }, { "epoch": 0.19463029992809525, "grad_norm": 1.9570517539978027, "learning_rate": 4.238871071199487e-06, "loss": 0.867, "step": 15970 }, { "epoch": 0.19469123615224307, "grad_norm": 2.1164517402648926, "learning_rate": 4.23855035279025e-06, "loss": 0.9164, "step": 15975 }, { "epoch": 0.19475217237639086, "grad_norm": 1.8546620607376099, "learning_rate": 4.238229634381013e-06, "loss": 0.904, "step": 15980 }, { "epoch": 0.1948131086005387, "grad_norm": 1.699211835861206, "learning_rate": 4.237908915971777e-06, "loss": 0.9374, "step": 15985 }, { "epoch": 0.19487404482468648, "grad_norm": 1.9582163095474243, "learning_rate": 4.23758819756254e-06, "loss": 0.9035, "step": 15990 }, { "epoch": 0.19493498104883428, "grad_norm": 1.8137638568878174, "learning_rate": 4.237267479153304e-06, "loss": 0.8567, "step": 15995 }, { "epoch": 0.1949959172729821, "grad_norm": 2.1097378730773926, "learning_rate": 4.236946760744067e-06, "loss": 0.8473, "step": 16000 }, { "epoch": 0.1950568534971299, "grad_norm": 2.442216634750366, "learning_rate": 4.23662604233483e-06, "loss": 0.8621, "step": 16005 }, { "epoch": 0.19511778972127772, "grad_norm": 2.202782154083252, "learning_rate": 4.236305323925594e-06, "loss": 0.8684, "step": 16010 }, { "epoch": 0.1951787259454255, "grad_norm": 1.898075819015503, "learning_rate": 4.235984605516357e-06, "loss": 0.8592, "step": 16015 }, { "epoch": 0.19523966216957334, "grad_norm": 1.9603185653686523, "learning_rate": 4.235663887107121e-06, "loss": 0.8468, "step": 16020 }, { "epoch": 0.19530059839372113, "grad_norm": 2.001278877258301, "learning_rate": 4.235343168697884e-06, "loss": 0.8892, "step": 16025 }, { "epoch": 0.19536153461786893, "grad_norm": 1.9915565252304077, "learning_rate": 4.235022450288647e-06, "loss": 0.8854, "step": 16030 }, { "epoch": 0.19542247084201675, "grad_norm": 2.2001397609710693, "learning_rate": 4.234701731879411e-06, "loss": 0.9155, "step": 16035 }, { "epoch": 0.19548340706616454, "grad_norm": 1.8820099830627441, "learning_rate": 4.234381013470174e-06, "loss": 0.832, "step": 16040 }, { "epoch": 0.19554434329031237, "grad_norm": 2.3584959506988525, "learning_rate": 4.234060295060937e-06, "loss": 0.8877, "step": 16045 }, { "epoch": 0.19560527951446016, "grad_norm": 1.8701802492141724, "learning_rate": 4.233739576651701e-06, "loss": 0.8777, "step": 16050 }, { "epoch": 0.19566621573860798, "grad_norm": 1.924904465675354, "learning_rate": 4.233418858242464e-06, "loss": 0.9106, "step": 16055 }, { "epoch": 0.19572715196275578, "grad_norm": 1.9091490507125854, "learning_rate": 4.233098139833227e-06, "loss": 0.918, "step": 16060 }, { "epoch": 0.19578808818690357, "grad_norm": 2.1754252910614014, "learning_rate": 4.2327774214239905e-06, "loss": 0.9105, "step": 16065 }, { "epoch": 0.1958490244110514, "grad_norm": 2.2501380443573, "learning_rate": 4.2324567030147535e-06, "loss": 0.8706, "step": 16070 }, { "epoch": 0.1959099606351992, "grad_norm": 1.8302257061004639, "learning_rate": 4.2321359846055165e-06, "loss": 0.8372, "step": 16075 }, { "epoch": 0.19597089685934702, "grad_norm": 2.2471561431884766, "learning_rate": 4.23181526619628e-06, "loss": 0.9918, "step": 16080 }, { "epoch": 0.1960318330834948, "grad_norm": 1.9418461322784424, "learning_rate": 4.231494547787043e-06, "loss": 0.8729, "step": 16085 }, { "epoch": 0.19609276930764263, "grad_norm": 1.9265937805175781, "learning_rate": 4.231173829377806e-06, "loss": 0.8837, "step": 16090 }, { "epoch": 0.19615370553179043, "grad_norm": 1.9693320989608765, "learning_rate": 4.23085311096857e-06, "loss": 0.917, "step": 16095 }, { "epoch": 0.19621464175593822, "grad_norm": 1.8184354305267334, "learning_rate": 4.230532392559333e-06, "loss": 0.8987, "step": 16100 }, { "epoch": 0.19627557798008605, "grad_norm": 2.463730812072754, "learning_rate": 4.230211674150096e-06, "loss": 0.9086, "step": 16105 }, { "epoch": 0.19633651420423384, "grad_norm": 1.7965449094772339, "learning_rate": 4.229890955740859e-06, "loss": 0.8968, "step": 16110 }, { "epoch": 0.19639745042838166, "grad_norm": 2.1692841053009033, "learning_rate": 4.229570237331623e-06, "loss": 0.8676, "step": 16115 }, { "epoch": 0.19645838665252946, "grad_norm": 1.9364699125289917, "learning_rate": 4.229249518922386e-06, "loss": 0.9032, "step": 16120 }, { "epoch": 0.19651932287667728, "grad_norm": 2.0623056888580322, "learning_rate": 4.228928800513149e-06, "loss": 0.8836, "step": 16125 }, { "epoch": 0.19658025910082508, "grad_norm": 1.8363670110702515, "learning_rate": 4.228608082103913e-06, "loss": 0.9246, "step": 16130 }, { "epoch": 0.19664119532497287, "grad_norm": 1.823791265487671, "learning_rate": 4.228287363694676e-06, "loss": 0.9451, "step": 16135 }, { "epoch": 0.1967021315491207, "grad_norm": 1.8720149993896484, "learning_rate": 4.227966645285439e-06, "loss": 0.8434, "step": 16140 }, { "epoch": 0.1967630677732685, "grad_norm": 2.1325795650482178, "learning_rate": 4.227645926876203e-06, "loss": 0.8736, "step": 16145 }, { "epoch": 0.1968240039974163, "grad_norm": 1.823980450630188, "learning_rate": 4.227325208466966e-06, "loss": 0.8869, "step": 16150 }, { "epoch": 0.1968849402215641, "grad_norm": 1.8274955749511719, "learning_rate": 4.22700449005773e-06, "loss": 0.8644, "step": 16155 }, { "epoch": 0.19694587644571193, "grad_norm": 1.9504613876342773, "learning_rate": 4.226683771648493e-06, "loss": 0.8608, "step": 16160 }, { "epoch": 0.19700681266985973, "grad_norm": 2.240994453430176, "learning_rate": 4.226363053239257e-06, "loss": 0.8886, "step": 16165 }, { "epoch": 0.19706774889400752, "grad_norm": 1.9349019527435303, "learning_rate": 4.22604233483002e-06, "loss": 0.8424, "step": 16170 }, { "epoch": 0.19712868511815534, "grad_norm": 1.7100220918655396, "learning_rate": 4.225721616420783e-06, "loss": 0.8971, "step": 16175 }, { "epoch": 0.19718962134230314, "grad_norm": 2.01554799079895, "learning_rate": 4.225400898011547e-06, "loss": 0.8664, "step": 16180 }, { "epoch": 0.19725055756645096, "grad_norm": 2.1444880962371826, "learning_rate": 4.22508017960231e-06, "loss": 0.9062, "step": 16185 }, { "epoch": 0.19731149379059876, "grad_norm": 2.212217092514038, "learning_rate": 4.224759461193073e-06, "loss": 0.8922, "step": 16190 }, { "epoch": 0.19737243001474655, "grad_norm": 1.7975527048110962, "learning_rate": 4.2244387427838365e-06, "loss": 0.8829, "step": 16195 }, { "epoch": 0.19743336623889438, "grad_norm": 1.8546730279922485, "learning_rate": 4.2241180243745996e-06, "loss": 0.8817, "step": 16200 }, { "epoch": 0.19749430246304217, "grad_norm": 2.0274221897125244, "learning_rate": 4.223797305965363e-06, "loss": 0.908, "step": 16205 }, { "epoch": 0.19755523868719, "grad_norm": 1.735410213470459, "learning_rate": 4.2234765875561264e-06, "loss": 0.8839, "step": 16210 }, { "epoch": 0.1976161749113378, "grad_norm": 2.562096118927002, "learning_rate": 4.2231558691468895e-06, "loss": 0.8858, "step": 16215 }, { "epoch": 0.1976771111354856, "grad_norm": 1.9949588775634766, "learning_rate": 4.2228351507376525e-06, "loss": 0.9395, "step": 16220 }, { "epoch": 0.1977380473596334, "grad_norm": 2.0070865154266357, "learning_rate": 4.222514432328416e-06, "loss": 0.9011, "step": 16225 }, { "epoch": 0.1977989835837812, "grad_norm": 1.9874736070632935, "learning_rate": 4.222193713919179e-06, "loss": 0.9109, "step": 16230 }, { "epoch": 0.19785991980792902, "grad_norm": 1.7373969554901123, "learning_rate": 4.221872995509942e-06, "loss": 0.8432, "step": 16235 }, { "epoch": 0.19792085603207682, "grad_norm": 1.9048271179199219, "learning_rate": 4.221552277100706e-06, "loss": 0.8862, "step": 16240 }, { "epoch": 0.19798179225622464, "grad_norm": 2.0146572589874268, "learning_rate": 4.221231558691469e-06, "loss": 0.8599, "step": 16245 }, { "epoch": 0.19804272848037244, "grad_norm": 1.8854198455810547, "learning_rate": 4.220910840282232e-06, "loss": 0.8331, "step": 16250 }, { "epoch": 0.19810366470452026, "grad_norm": 2.0797698497772217, "learning_rate": 4.220590121872996e-06, "loss": 0.8776, "step": 16255 }, { "epoch": 0.19816460092866806, "grad_norm": 1.947447657585144, "learning_rate": 4.220269403463759e-06, "loss": 0.869, "step": 16260 }, { "epoch": 0.19822553715281585, "grad_norm": 2.044849395751953, "learning_rate": 4.219948685054522e-06, "loss": 0.916, "step": 16265 }, { "epoch": 0.19828647337696367, "grad_norm": 1.8679041862487793, "learning_rate": 4.219627966645285e-06, "loss": 0.9082, "step": 16270 }, { "epoch": 0.19834740960111147, "grad_norm": 1.872450590133667, "learning_rate": 4.219307248236049e-06, "loss": 0.8559, "step": 16275 }, { "epoch": 0.1984083458252593, "grad_norm": 2.0390725135803223, "learning_rate": 4.218986529826812e-06, "loss": 0.888, "step": 16280 }, { "epoch": 0.1984692820494071, "grad_norm": 1.7710798978805542, "learning_rate": 4.218665811417575e-06, "loss": 0.9679, "step": 16285 }, { "epoch": 0.1985302182735549, "grad_norm": 1.9038736820220947, "learning_rate": 4.218345093008339e-06, "loss": 0.9459, "step": 16290 }, { "epoch": 0.1985911544977027, "grad_norm": 1.6836580038070679, "learning_rate": 4.218024374599102e-06, "loss": 0.8777, "step": 16295 }, { "epoch": 0.1986520907218505, "grad_norm": 1.933456540107727, "learning_rate": 4.217703656189866e-06, "loss": 0.8982, "step": 16300 }, { "epoch": 0.19871302694599832, "grad_norm": 2.2579033374786377, "learning_rate": 4.217382937780629e-06, "loss": 0.8544, "step": 16305 }, { "epoch": 0.19877396317014612, "grad_norm": 1.7938696146011353, "learning_rate": 4.217062219371392e-06, "loss": 0.8801, "step": 16310 }, { "epoch": 0.19883489939429394, "grad_norm": 2.0543792247772217, "learning_rate": 4.216741500962156e-06, "loss": 0.9073, "step": 16315 }, { "epoch": 0.19889583561844174, "grad_norm": 2.132615089416504, "learning_rate": 4.216420782552919e-06, "loss": 0.8777, "step": 16320 }, { "epoch": 0.19895677184258956, "grad_norm": 2.1952812671661377, "learning_rate": 4.216100064143683e-06, "loss": 0.9406, "step": 16325 }, { "epoch": 0.19901770806673735, "grad_norm": 1.95494544506073, "learning_rate": 4.215779345734446e-06, "loss": 0.8687, "step": 16330 }, { "epoch": 0.19907864429088515, "grad_norm": 2.245269298553467, "learning_rate": 4.215458627325209e-06, "loss": 0.8454, "step": 16335 }, { "epoch": 0.19913958051503297, "grad_norm": 2.200857639312744, "learning_rate": 4.2151379089159725e-06, "loss": 0.877, "step": 16340 }, { "epoch": 0.19920051673918077, "grad_norm": 2.0595524311065674, "learning_rate": 4.2148171905067355e-06, "loss": 0.9205, "step": 16345 }, { "epoch": 0.1992614529633286, "grad_norm": 2.3082079887390137, "learning_rate": 4.214496472097499e-06, "loss": 0.8705, "step": 16350 }, { "epoch": 0.19932238918747638, "grad_norm": 2.112125873565674, "learning_rate": 4.214175753688262e-06, "loss": 0.9205, "step": 16355 }, { "epoch": 0.1993833254116242, "grad_norm": 1.8221642971038818, "learning_rate": 4.213855035279025e-06, "loss": 0.9043, "step": 16360 }, { "epoch": 0.199444261635772, "grad_norm": 3.0284438133239746, "learning_rate": 4.2135343168697884e-06, "loss": 0.9457, "step": 16365 }, { "epoch": 0.1995051978599198, "grad_norm": 1.8702826499938965, "learning_rate": 4.213213598460552e-06, "loss": 0.8334, "step": 16370 }, { "epoch": 0.19956613408406762, "grad_norm": 2.4246835708618164, "learning_rate": 4.212892880051315e-06, "loss": 0.9034, "step": 16375 }, { "epoch": 0.19962707030821542, "grad_norm": 2.051607608795166, "learning_rate": 4.212572161642078e-06, "loss": 0.9668, "step": 16380 }, { "epoch": 0.19968800653236324, "grad_norm": 1.7993589639663696, "learning_rate": 4.212251443232842e-06, "loss": 0.9418, "step": 16385 }, { "epoch": 0.19974894275651103, "grad_norm": 1.819365382194519, "learning_rate": 4.211930724823605e-06, "loss": 0.9054, "step": 16390 }, { "epoch": 0.19980987898065886, "grad_norm": 1.7145663499832153, "learning_rate": 4.211610006414368e-06, "loss": 0.8685, "step": 16395 }, { "epoch": 0.19987081520480665, "grad_norm": 1.9325028657913208, "learning_rate": 4.211289288005132e-06, "loss": 0.9585, "step": 16400 }, { "epoch": 0.19993175142895445, "grad_norm": 1.8778986930847168, "learning_rate": 4.210968569595895e-06, "loss": 0.9403, "step": 16405 }, { "epoch": 0.19999268765310227, "grad_norm": 2.1581578254699707, "learning_rate": 4.210647851186658e-06, "loss": 0.8428, "step": 16410 }, { "epoch": 0.20005362387725006, "grad_norm": 1.895666241645813, "learning_rate": 4.210327132777422e-06, "loss": 0.9337, "step": 16415 }, { "epoch": 0.2001145601013979, "grad_norm": 1.7470862865447998, "learning_rate": 4.210006414368185e-06, "loss": 0.8752, "step": 16420 }, { "epoch": 0.20017549632554568, "grad_norm": 2.180246114730835, "learning_rate": 4.209685695958948e-06, "loss": 0.83, "step": 16425 }, { "epoch": 0.20023643254969348, "grad_norm": 1.8292417526245117, "learning_rate": 4.209364977549712e-06, "loss": 0.8818, "step": 16430 }, { "epoch": 0.2002973687738413, "grad_norm": 2.0688631534576416, "learning_rate": 4.209044259140475e-06, "loss": 0.8298, "step": 16435 }, { "epoch": 0.2003583049979891, "grad_norm": 1.969746708869934, "learning_rate": 4.208723540731238e-06, "loss": 0.9525, "step": 16440 }, { "epoch": 0.20041924122213692, "grad_norm": 1.5841854810714722, "learning_rate": 4.208402822322002e-06, "loss": 0.843, "step": 16445 }, { "epoch": 0.2004801774462847, "grad_norm": 1.947433590888977, "learning_rate": 4.208082103912765e-06, "loss": 0.874, "step": 16450 }, { "epoch": 0.20054111367043254, "grad_norm": 2.0363850593566895, "learning_rate": 4.207761385503528e-06, "loss": 0.9294, "step": 16455 }, { "epoch": 0.20060204989458033, "grad_norm": 1.9606245756149292, "learning_rate": 4.207440667094292e-06, "loss": 0.8898, "step": 16460 }, { "epoch": 0.20066298611872813, "grad_norm": 2.025986909866333, "learning_rate": 4.207119948685055e-06, "loss": 0.8988, "step": 16465 }, { "epoch": 0.20072392234287595, "grad_norm": 2.021016836166382, "learning_rate": 4.2067992302758186e-06, "loss": 0.8857, "step": 16470 }, { "epoch": 0.20078485856702374, "grad_norm": 1.8687525987625122, "learning_rate": 4.206478511866582e-06, "loss": 0.8378, "step": 16475 }, { "epoch": 0.20084579479117157, "grad_norm": 2.0895063877105713, "learning_rate": 4.206157793457345e-06, "loss": 0.9, "step": 16480 }, { "epoch": 0.20090673101531936, "grad_norm": 1.8043562173843384, "learning_rate": 4.2058370750481085e-06, "loss": 0.949, "step": 16485 }, { "epoch": 0.20096766723946718, "grad_norm": 2.08874249458313, "learning_rate": 4.2055163566388715e-06, "loss": 0.9114, "step": 16490 }, { "epoch": 0.20102860346361498, "grad_norm": 2.03381609916687, "learning_rate": 4.205195638229635e-06, "loss": 0.9589, "step": 16495 }, { "epoch": 0.20108953968776277, "grad_norm": 2.082388401031494, "learning_rate": 4.204874919820398e-06, "loss": 0.901, "step": 16500 }, { "epoch": 0.2011504759119106, "grad_norm": 2.426539659500122, "learning_rate": 4.204554201411161e-06, "loss": 0.9443, "step": 16505 }, { "epoch": 0.2012114121360584, "grad_norm": 2.3109257221221924, "learning_rate": 4.204233483001925e-06, "loss": 0.9678, "step": 16510 }, { "epoch": 0.20127234836020622, "grad_norm": 2.129883289337158, "learning_rate": 4.203912764592688e-06, "loss": 0.8695, "step": 16515 }, { "epoch": 0.201333284584354, "grad_norm": 1.9388890266418457, "learning_rate": 4.203592046183451e-06, "loss": 0.9791, "step": 16520 }, { "epoch": 0.20139422080850183, "grad_norm": 1.7688847780227661, "learning_rate": 4.203271327774214e-06, "loss": 0.9049, "step": 16525 }, { "epoch": 0.20145515703264963, "grad_norm": 1.7294479608535767, "learning_rate": 4.202950609364978e-06, "loss": 0.8865, "step": 16530 }, { "epoch": 0.20151609325679742, "grad_norm": 1.8192626237869263, "learning_rate": 4.202629890955741e-06, "loss": 0.8363, "step": 16535 }, { "epoch": 0.20157702948094525, "grad_norm": 1.9209479093551636, "learning_rate": 4.202309172546504e-06, "loss": 0.9629, "step": 16540 }, { "epoch": 0.20163796570509304, "grad_norm": 2.830956220626831, "learning_rate": 4.201988454137268e-06, "loss": 0.8394, "step": 16545 }, { "epoch": 0.20169890192924086, "grad_norm": 2.063227415084839, "learning_rate": 4.201667735728031e-06, "loss": 0.8646, "step": 16550 }, { "epoch": 0.20175983815338866, "grad_norm": 1.8681679964065552, "learning_rate": 4.201347017318794e-06, "loss": 0.9242, "step": 16555 }, { "epoch": 0.20182077437753648, "grad_norm": 1.8297938108444214, "learning_rate": 4.201026298909558e-06, "loss": 0.9118, "step": 16560 }, { "epoch": 0.20188171060168428, "grad_norm": 2.398533344268799, "learning_rate": 4.200705580500321e-06, "loss": 0.9187, "step": 16565 }, { "epoch": 0.20194264682583207, "grad_norm": 1.8329789638519287, "learning_rate": 4.200384862091084e-06, "loss": 0.8436, "step": 16570 }, { "epoch": 0.2020035830499799, "grad_norm": 1.897797703742981, "learning_rate": 4.200064143681848e-06, "loss": 0.8281, "step": 16575 }, { "epoch": 0.2020645192741277, "grad_norm": 1.9575071334838867, "learning_rate": 4.199743425272611e-06, "loss": 0.9129, "step": 16580 }, { "epoch": 0.2021254554982755, "grad_norm": 1.94670832157135, "learning_rate": 4.199422706863374e-06, "loss": 0.9114, "step": 16585 }, { "epoch": 0.2021863917224233, "grad_norm": 1.96864652633667, "learning_rate": 4.199101988454138e-06, "loss": 0.8954, "step": 16590 }, { "epoch": 0.20224732794657113, "grad_norm": 2.234560489654541, "learning_rate": 4.198781270044901e-06, "loss": 0.9287, "step": 16595 }, { "epoch": 0.20230826417071893, "grad_norm": 1.9449392557144165, "learning_rate": 4.198460551635664e-06, "loss": 0.9792, "step": 16600 }, { "epoch": 0.20236920039486672, "grad_norm": 2.1470589637756348, "learning_rate": 4.198139833226428e-06, "loss": 0.9537, "step": 16605 }, { "epoch": 0.20243013661901454, "grad_norm": 1.8850444555282593, "learning_rate": 4.197819114817191e-06, "loss": 0.9086, "step": 16610 }, { "epoch": 0.20249107284316234, "grad_norm": 1.995531678199768, "learning_rate": 4.197498396407954e-06, "loss": 0.8733, "step": 16615 }, { "epoch": 0.20255200906731016, "grad_norm": 1.7459070682525635, "learning_rate": 4.1971776779987175e-06, "loss": 0.8841, "step": 16620 }, { "epoch": 0.20261294529145796, "grad_norm": 1.6276534795761108, "learning_rate": 4.1968569595894806e-06, "loss": 0.8839, "step": 16625 }, { "epoch": 0.20267388151560578, "grad_norm": 1.7656433582305908, "learning_rate": 4.196536241180244e-06, "loss": 0.788, "step": 16630 }, { "epoch": 0.20273481773975358, "grad_norm": 1.8254520893096924, "learning_rate": 4.1962155227710074e-06, "loss": 0.9025, "step": 16635 }, { "epoch": 0.20279575396390137, "grad_norm": 2.005995035171509, "learning_rate": 4.195894804361771e-06, "loss": 0.7961, "step": 16640 }, { "epoch": 0.2028566901880492, "grad_norm": 2.0742924213409424, "learning_rate": 4.195574085952534e-06, "loss": 0.9391, "step": 16645 }, { "epoch": 0.202917626412197, "grad_norm": 2.4857470989227295, "learning_rate": 4.195253367543297e-06, "loss": 0.8618, "step": 16650 }, { "epoch": 0.2029785626363448, "grad_norm": 2.005977153778076, "learning_rate": 4.194932649134061e-06, "loss": 0.8752, "step": 16655 }, { "epoch": 0.2030394988604926, "grad_norm": 1.8472188711166382, "learning_rate": 4.194611930724824e-06, "loss": 0.8615, "step": 16660 }, { "epoch": 0.2031004350846404, "grad_norm": 1.7748498916625977, "learning_rate": 4.194291212315587e-06, "loss": 0.8528, "step": 16665 }, { "epoch": 0.20316137130878822, "grad_norm": 1.8654906749725342, "learning_rate": 4.193970493906351e-06, "loss": 0.8209, "step": 16670 }, { "epoch": 0.20322230753293602, "grad_norm": 2.0594654083251953, "learning_rate": 4.193649775497114e-06, "loss": 0.8404, "step": 16675 }, { "epoch": 0.20328324375708384, "grad_norm": 2.0610480308532715, "learning_rate": 4.193329057087877e-06, "loss": 0.7713, "step": 16680 }, { "epoch": 0.20334417998123164, "grad_norm": 1.9597725868225098, "learning_rate": 4.193008338678641e-06, "loss": 0.9509, "step": 16685 }, { "epoch": 0.20340511620537946, "grad_norm": 2.0297389030456543, "learning_rate": 4.192687620269404e-06, "loss": 0.8478, "step": 16690 }, { "epoch": 0.20346605242952726, "grad_norm": 1.8609262704849243, "learning_rate": 4.192366901860167e-06, "loss": 0.9047, "step": 16695 }, { "epoch": 0.20352698865367505, "grad_norm": 2.2036311626434326, "learning_rate": 4.19204618345093e-06, "loss": 0.9143, "step": 16700 }, { "epoch": 0.20358792487782287, "grad_norm": 1.6541755199432373, "learning_rate": 4.191725465041694e-06, "loss": 0.8895, "step": 16705 }, { "epoch": 0.20364886110197067, "grad_norm": 1.9752284288406372, "learning_rate": 4.191404746632457e-06, "loss": 0.8443, "step": 16710 }, { "epoch": 0.2037097973261185, "grad_norm": 1.8299534320831299, "learning_rate": 4.19108402822322e-06, "loss": 0.8345, "step": 16715 }, { "epoch": 0.2037707335502663, "grad_norm": 1.8276586532592773, "learning_rate": 4.190763309813984e-06, "loss": 0.9084, "step": 16720 }, { "epoch": 0.2038316697744141, "grad_norm": 1.956252932548523, "learning_rate": 4.190442591404747e-06, "loss": 0.9051, "step": 16725 }, { "epoch": 0.2038926059985619, "grad_norm": 2.0280508995056152, "learning_rate": 4.19012187299551e-06, "loss": 0.8337, "step": 16730 }, { "epoch": 0.2039535422227097, "grad_norm": 1.973861813545227, "learning_rate": 4.189801154586274e-06, "loss": 0.8862, "step": 16735 }, { "epoch": 0.20401447844685752, "grad_norm": 2.223634719848633, "learning_rate": 4.189480436177037e-06, "loss": 0.8702, "step": 16740 }, { "epoch": 0.20407541467100532, "grad_norm": 1.9877339601516724, "learning_rate": 4.1891597177678e-06, "loss": 0.905, "step": 16745 }, { "epoch": 0.20413635089515314, "grad_norm": 1.8672128915786743, "learning_rate": 4.188838999358564e-06, "loss": 0.8528, "step": 16750 }, { "epoch": 0.20419728711930094, "grad_norm": 1.9960732460021973, "learning_rate": 4.188518280949327e-06, "loss": 0.8516, "step": 16755 }, { "epoch": 0.20425822334344876, "grad_norm": 1.7111644744873047, "learning_rate": 4.18819756254009e-06, "loss": 0.9279, "step": 16760 }, { "epoch": 0.20431915956759655, "grad_norm": 1.904703974723816, "learning_rate": 4.1878768441308535e-06, "loss": 0.8315, "step": 16765 }, { "epoch": 0.20438009579174435, "grad_norm": 2.02960205078125, "learning_rate": 4.1875561257216165e-06, "loss": 0.9975, "step": 16770 }, { "epoch": 0.20444103201589217, "grad_norm": 2.5684776306152344, "learning_rate": 4.18723540731238e-06, "loss": 0.8488, "step": 16775 }, { "epoch": 0.20450196824003997, "grad_norm": 2.375004529953003, "learning_rate": 4.186914688903143e-06, "loss": 0.8647, "step": 16780 }, { "epoch": 0.2045629044641878, "grad_norm": 2.5118937492370605, "learning_rate": 4.186593970493906e-06, "loss": 0.9254, "step": 16785 }, { "epoch": 0.20462384068833558, "grad_norm": 2.1252596378326416, "learning_rate": 4.18627325208467e-06, "loss": 0.8724, "step": 16790 }, { "epoch": 0.2046847769124834, "grad_norm": 2.5627739429473877, "learning_rate": 4.185952533675433e-06, "loss": 0.8836, "step": 16795 }, { "epoch": 0.2047457131366312, "grad_norm": 1.8272775411605835, "learning_rate": 4.185631815266197e-06, "loss": 0.8398, "step": 16800 }, { "epoch": 0.204806649360779, "grad_norm": 1.7087905406951904, "learning_rate": 4.18531109685696e-06, "loss": 0.9169, "step": 16805 }, { "epoch": 0.20486758558492682, "grad_norm": 1.660724401473999, "learning_rate": 4.184990378447723e-06, "loss": 0.8625, "step": 16810 }, { "epoch": 0.20492852180907462, "grad_norm": 2.661208152770996, "learning_rate": 4.184669660038487e-06, "loss": 0.913, "step": 16815 }, { "epoch": 0.20498945803322244, "grad_norm": 1.901197910308838, "learning_rate": 4.18434894162925e-06, "loss": 0.8645, "step": 16820 }, { "epoch": 0.20505039425737023, "grad_norm": 1.9288005828857422, "learning_rate": 4.184028223220013e-06, "loss": 0.9482, "step": 16825 }, { "epoch": 0.20511133048151806, "grad_norm": 2.0333704948425293, "learning_rate": 4.183707504810777e-06, "loss": 0.962, "step": 16830 }, { "epoch": 0.20517226670566585, "grad_norm": 1.938327431678772, "learning_rate": 4.18338678640154e-06, "loss": 0.8696, "step": 16835 }, { "epoch": 0.20523320292981365, "grad_norm": 1.9178436994552612, "learning_rate": 4.183066067992303e-06, "loss": 0.835, "step": 16840 }, { "epoch": 0.20529413915396147, "grad_norm": 2.021263360977173, "learning_rate": 4.182745349583067e-06, "loss": 0.8742, "step": 16845 }, { "epoch": 0.20535507537810926, "grad_norm": 2.0359506607055664, "learning_rate": 4.18242463117383e-06, "loss": 0.8478, "step": 16850 }, { "epoch": 0.2054160116022571, "grad_norm": 1.884650468826294, "learning_rate": 4.182103912764593e-06, "loss": 0.9107, "step": 16855 }, { "epoch": 0.20547694782640488, "grad_norm": 1.6339731216430664, "learning_rate": 4.181783194355356e-06, "loss": 0.8845, "step": 16860 }, { "epoch": 0.2055378840505527, "grad_norm": 2.0305614471435547, "learning_rate": 4.18146247594612e-06, "loss": 0.8144, "step": 16865 }, { "epoch": 0.2055988202747005, "grad_norm": 1.830458164215088, "learning_rate": 4.181141757536883e-06, "loss": 0.8429, "step": 16870 }, { "epoch": 0.2056597564988483, "grad_norm": 1.7949018478393555, "learning_rate": 4.180821039127646e-06, "loss": 0.9165, "step": 16875 }, { "epoch": 0.20572069272299612, "grad_norm": 2.030917167663574, "learning_rate": 4.18050032071841e-06, "loss": 0.8972, "step": 16880 }, { "epoch": 0.2057816289471439, "grad_norm": 1.902521014213562, "learning_rate": 4.180179602309173e-06, "loss": 0.8435, "step": 16885 }, { "epoch": 0.20584256517129174, "grad_norm": 1.8489910364151, "learning_rate": 4.179858883899936e-06, "loss": 0.923, "step": 16890 }, { "epoch": 0.20590350139543953, "grad_norm": 2.43107008934021, "learning_rate": 4.1795381654906996e-06, "loss": 0.9428, "step": 16895 }, { "epoch": 0.20596443761958733, "grad_norm": 1.7398351430892944, "learning_rate": 4.1792174470814626e-06, "loss": 0.887, "step": 16900 }, { "epoch": 0.20602537384373515, "grad_norm": 1.7084105014801025, "learning_rate": 4.178896728672226e-06, "loss": 0.9055, "step": 16905 }, { "epoch": 0.20608631006788294, "grad_norm": 2.0867538452148438, "learning_rate": 4.1785760102629894e-06, "loss": 0.9211, "step": 16910 }, { "epoch": 0.20614724629203077, "grad_norm": 2.25628924369812, "learning_rate": 4.1782552918537525e-06, "loss": 0.8944, "step": 16915 }, { "epoch": 0.20620818251617856, "grad_norm": 2.081005334854126, "learning_rate": 4.177934573444516e-06, "loss": 0.9188, "step": 16920 }, { "epoch": 0.20626911874032638, "grad_norm": 1.780476689338684, "learning_rate": 4.177613855035279e-06, "loss": 0.9063, "step": 16925 }, { "epoch": 0.20633005496447418, "grad_norm": 1.6983505487442017, "learning_rate": 4.177293136626042e-06, "loss": 0.8321, "step": 16930 }, { "epoch": 0.20639099118862198, "grad_norm": 2.258894920349121, "learning_rate": 4.176972418216806e-06, "loss": 0.8882, "step": 16935 }, { "epoch": 0.2064519274127698, "grad_norm": 1.5225197076797485, "learning_rate": 4.176651699807569e-06, "loss": 0.8056, "step": 16940 }, { "epoch": 0.2065128636369176, "grad_norm": 1.9617983102798462, "learning_rate": 4.176330981398333e-06, "loss": 0.8477, "step": 16945 }, { "epoch": 0.20657379986106542, "grad_norm": 2.0302352905273438, "learning_rate": 4.176010262989096e-06, "loss": 0.8425, "step": 16950 }, { "epoch": 0.2066347360852132, "grad_norm": 2.087425947189331, "learning_rate": 4.175689544579859e-06, "loss": 0.8641, "step": 16955 }, { "epoch": 0.20669567230936103, "grad_norm": 1.980552077293396, "learning_rate": 4.175368826170623e-06, "loss": 0.9369, "step": 16960 }, { "epoch": 0.20675660853350883, "grad_norm": 2.0536530017852783, "learning_rate": 4.175048107761386e-06, "loss": 0.8886, "step": 16965 }, { "epoch": 0.20681754475765662, "grad_norm": 1.7144687175750732, "learning_rate": 4.174727389352149e-06, "loss": 0.9442, "step": 16970 }, { "epoch": 0.20687848098180445, "grad_norm": 2.2083580493927, "learning_rate": 4.174406670942913e-06, "loss": 0.8885, "step": 16975 }, { "epoch": 0.20693941720595224, "grad_norm": 1.9355937242507935, "learning_rate": 4.174085952533676e-06, "loss": 0.857, "step": 16980 }, { "epoch": 0.20700035343010006, "grad_norm": 1.9089460372924805, "learning_rate": 4.173765234124439e-06, "loss": 0.8812, "step": 16985 }, { "epoch": 0.20706128965424786, "grad_norm": 1.8857789039611816, "learning_rate": 4.173444515715203e-06, "loss": 0.8198, "step": 16990 }, { "epoch": 0.20712222587839568, "grad_norm": 1.8769930601119995, "learning_rate": 4.173123797305966e-06, "loss": 0.8571, "step": 16995 }, { "epoch": 0.20718316210254348, "grad_norm": 1.8793379068374634, "learning_rate": 4.172803078896729e-06, "loss": 0.8791, "step": 17000 }, { "epoch": 0.20724409832669127, "grad_norm": 2.1573097705841064, "learning_rate": 4.172482360487493e-06, "loss": 0.9115, "step": 17005 }, { "epoch": 0.2073050345508391, "grad_norm": 2.2163784503936768, "learning_rate": 4.172161642078256e-06, "loss": 0.838, "step": 17010 }, { "epoch": 0.2073659707749869, "grad_norm": 1.8567322492599487, "learning_rate": 4.171840923669019e-06, "loss": 0.9566, "step": 17015 }, { "epoch": 0.2074269069991347, "grad_norm": 1.8014527559280396, "learning_rate": 4.171520205259783e-06, "loss": 0.7856, "step": 17020 }, { "epoch": 0.2074878432232825, "grad_norm": 2.0956194400787354, "learning_rate": 4.171199486850546e-06, "loss": 0.8928, "step": 17025 }, { "epoch": 0.20754877944743033, "grad_norm": 1.695217251777649, "learning_rate": 4.170878768441309e-06, "loss": 0.8394, "step": 17030 }, { "epoch": 0.20760971567157813, "grad_norm": 1.8908076286315918, "learning_rate": 4.170558050032072e-06, "loss": 0.9357, "step": 17035 }, { "epoch": 0.20767065189572592, "grad_norm": 2.6847569942474365, "learning_rate": 4.1702373316228355e-06, "loss": 0.8691, "step": 17040 }, { "epoch": 0.20773158811987374, "grad_norm": 1.8265385627746582, "learning_rate": 4.1699166132135985e-06, "loss": 0.8999, "step": 17045 }, { "epoch": 0.20779252434402154, "grad_norm": 2.3949737548828125, "learning_rate": 4.1695958948043615e-06, "loss": 0.8714, "step": 17050 }, { "epoch": 0.20785346056816936, "grad_norm": 1.6804215908050537, "learning_rate": 4.169275176395125e-06, "loss": 0.8808, "step": 17055 }, { "epoch": 0.20791439679231716, "grad_norm": 2.5974671840667725, "learning_rate": 4.168954457985888e-06, "loss": 0.8706, "step": 17060 }, { "epoch": 0.20797533301646498, "grad_norm": 1.8219555616378784, "learning_rate": 4.1686337395766514e-06, "loss": 0.8205, "step": 17065 }, { "epoch": 0.20803626924061278, "grad_norm": 2.1293859481811523, "learning_rate": 4.168313021167415e-06, "loss": 0.8734, "step": 17070 }, { "epoch": 0.20809720546476057, "grad_norm": 2.1258256435394287, "learning_rate": 4.167992302758178e-06, "loss": 0.8763, "step": 17075 }, { "epoch": 0.2081581416889084, "grad_norm": 2.0669877529144287, "learning_rate": 4.167671584348942e-06, "loss": 0.8328, "step": 17080 }, { "epoch": 0.2082190779130562, "grad_norm": 2.1055400371551514, "learning_rate": 4.167350865939705e-06, "loss": 0.8758, "step": 17085 }, { "epoch": 0.208280014137204, "grad_norm": 2.320091962814331, "learning_rate": 4.167030147530468e-06, "loss": 0.8932, "step": 17090 }, { "epoch": 0.2083409503613518, "grad_norm": 1.9292089939117432, "learning_rate": 4.166709429121232e-06, "loss": 0.83, "step": 17095 }, { "epoch": 0.20840188658549963, "grad_norm": 1.962499737739563, "learning_rate": 4.166388710711995e-06, "loss": 0.8924, "step": 17100 }, { "epoch": 0.20846282280964742, "grad_norm": 1.824868083000183, "learning_rate": 4.166067992302759e-06, "loss": 0.8306, "step": 17105 }, { "epoch": 0.20852375903379522, "grad_norm": 1.7022610902786255, "learning_rate": 4.165747273893522e-06, "loss": 0.9079, "step": 17110 }, { "epoch": 0.20858469525794304, "grad_norm": 1.8351290225982666, "learning_rate": 4.165426555484285e-06, "loss": 0.8975, "step": 17115 }, { "epoch": 0.20864563148209084, "grad_norm": 1.9946480989456177, "learning_rate": 4.165105837075049e-06, "loss": 0.8404, "step": 17120 }, { "epoch": 0.20870656770623866, "grad_norm": 1.9236923456192017, "learning_rate": 4.164785118665812e-06, "loss": 0.9091, "step": 17125 }, { "epoch": 0.20876750393038646, "grad_norm": 1.9401370286941528, "learning_rate": 4.164464400256575e-06, "loss": 0.844, "step": 17130 }, { "epoch": 0.20882844015453425, "grad_norm": 2.10274600982666, "learning_rate": 4.164143681847339e-06, "loss": 0.9004, "step": 17135 }, { "epoch": 0.20888937637868207, "grad_norm": 1.9561283588409424, "learning_rate": 4.163822963438102e-06, "loss": 0.8875, "step": 17140 }, { "epoch": 0.20895031260282987, "grad_norm": 2.181925058364868, "learning_rate": 4.163502245028865e-06, "loss": 0.9374, "step": 17145 }, { "epoch": 0.2090112488269777, "grad_norm": 2.429624557495117, "learning_rate": 4.163181526619629e-06, "loss": 0.9295, "step": 17150 }, { "epoch": 0.2090721850511255, "grad_norm": 2.246774196624756, "learning_rate": 4.162860808210392e-06, "loss": 0.8818, "step": 17155 }, { "epoch": 0.2091331212752733, "grad_norm": 2.04443097114563, "learning_rate": 4.162540089801155e-06, "loss": 0.9465, "step": 17160 }, { "epoch": 0.2091940574994211, "grad_norm": 1.9576464891433716, "learning_rate": 4.1622193713919185e-06, "loss": 0.8996, "step": 17165 }, { "epoch": 0.2092549937235689, "grad_norm": 2.0277419090270996, "learning_rate": 4.1618986529826816e-06, "loss": 0.8569, "step": 17170 }, { "epoch": 0.20931592994771672, "grad_norm": 2.051825761795044, "learning_rate": 4.161577934573445e-06, "loss": 0.8888, "step": 17175 }, { "epoch": 0.20937686617186452, "grad_norm": 2.2911200523376465, "learning_rate": 4.1612572161642084e-06, "loss": 0.9258, "step": 17180 }, { "epoch": 0.20943780239601234, "grad_norm": 2.2199959754943848, "learning_rate": 4.1609364977549715e-06, "loss": 0.9507, "step": 17185 }, { "epoch": 0.20949873862016014, "grad_norm": 1.9921789169311523, "learning_rate": 4.1606157793457345e-06, "loss": 0.8467, "step": 17190 }, { "epoch": 0.20955967484430796, "grad_norm": 2.139177083969116, "learning_rate": 4.1602950609364975e-06, "loss": 0.8923, "step": 17195 }, { "epoch": 0.20962061106845575, "grad_norm": 1.715291142463684, "learning_rate": 4.159974342527261e-06, "loss": 0.8592, "step": 17200 }, { "epoch": 0.20968154729260355, "grad_norm": 1.7712644338607788, "learning_rate": 4.159653624118024e-06, "loss": 0.9398, "step": 17205 }, { "epoch": 0.20974248351675137, "grad_norm": 1.9217778444290161, "learning_rate": 4.159332905708787e-06, "loss": 0.8611, "step": 17210 }, { "epoch": 0.20980341974089917, "grad_norm": 2.103442430496216, "learning_rate": 4.159012187299551e-06, "loss": 0.9003, "step": 17215 }, { "epoch": 0.209864355965047, "grad_norm": 1.9208946228027344, "learning_rate": 4.158691468890314e-06, "loss": 0.8643, "step": 17220 }, { "epoch": 0.20992529218919478, "grad_norm": 2.0081942081451416, "learning_rate": 4.158370750481078e-06, "loss": 0.8613, "step": 17225 }, { "epoch": 0.2099862284133426, "grad_norm": 2.0712013244628906, "learning_rate": 4.158050032071841e-06, "loss": 0.9248, "step": 17230 }, { "epoch": 0.2100471646374904, "grad_norm": 1.9690810441970825, "learning_rate": 4.157729313662604e-06, "loss": 0.8966, "step": 17235 }, { "epoch": 0.2101081008616382, "grad_norm": 1.8471076488494873, "learning_rate": 4.157408595253368e-06, "loss": 0.892, "step": 17240 }, { "epoch": 0.21016903708578602, "grad_norm": 2.0208194255828857, "learning_rate": 4.157087876844131e-06, "loss": 0.8895, "step": 17245 }, { "epoch": 0.21022997330993382, "grad_norm": 2.02470326423645, "learning_rate": 4.156767158434895e-06, "loss": 0.8743, "step": 17250 }, { "epoch": 0.21029090953408164, "grad_norm": 2.294318914413452, "learning_rate": 4.156446440025658e-06, "loss": 0.9063, "step": 17255 }, { "epoch": 0.21035184575822943, "grad_norm": 1.7694532871246338, "learning_rate": 4.156125721616421e-06, "loss": 0.9115, "step": 17260 }, { "epoch": 0.21041278198237726, "grad_norm": 2.0390124320983887, "learning_rate": 4.155805003207185e-06, "loss": 0.9403, "step": 17265 }, { "epoch": 0.21047371820652505, "grad_norm": 1.9792813062667847, "learning_rate": 4.155484284797948e-06, "loss": 0.8437, "step": 17270 }, { "epoch": 0.21053465443067285, "grad_norm": 2.115203619003296, "learning_rate": 4.155163566388712e-06, "loss": 0.8674, "step": 17275 }, { "epoch": 0.21059559065482067, "grad_norm": 1.8708280324935913, "learning_rate": 4.154842847979475e-06, "loss": 0.8746, "step": 17280 }, { "epoch": 0.21065652687896846, "grad_norm": 1.8239550590515137, "learning_rate": 4.154522129570238e-06, "loss": 0.9165, "step": 17285 }, { "epoch": 0.2107174631031163, "grad_norm": 2.3681626319885254, "learning_rate": 4.154201411161001e-06, "loss": 0.8646, "step": 17290 }, { "epoch": 0.21077839932726408, "grad_norm": 2.154682159423828, "learning_rate": 4.153880692751765e-06, "loss": 0.9188, "step": 17295 }, { "epoch": 0.2108393355514119, "grad_norm": 2.1557493209838867, "learning_rate": 4.153559974342528e-06, "loss": 0.9281, "step": 17300 }, { "epoch": 0.2109002717755597, "grad_norm": 1.666967511177063, "learning_rate": 4.153239255933291e-06, "loss": 0.8893, "step": 17305 }, { "epoch": 0.2109612079997075, "grad_norm": 2.105790853500366, "learning_rate": 4.1529185375240545e-06, "loss": 0.9266, "step": 17310 }, { "epoch": 0.21102214422385532, "grad_norm": 2.1066384315490723, "learning_rate": 4.1525978191148175e-06, "loss": 0.9428, "step": 17315 }, { "epoch": 0.2110830804480031, "grad_norm": 2.0436160564422607, "learning_rate": 4.1522771007055805e-06, "loss": 0.9001, "step": 17320 }, { "epoch": 0.21114401667215094, "grad_norm": 1.980368971824646, "learning_rate": 4.151956382296344e-06, "loss": 0.9069, "step": 17325 }, { "epoch": 0.21120495289629873, "grad_norm": 2.0862061977386475, "learning_rate": 4.151635663887107e-06, "loss": 0.9254, "step": 17330 }, { "epoch": 0.21126588912044655, "grad_norm": 1.8444457054138184, "learning_rate": 4.1513149454778704e-06, "loss": 0.923, "step": 17335 }, { "epoch": 0.21132682534459435, "grad_norm": 1.9131529331207275, "learning_rate": 4.150994227068634e-06, "loss": 0.8674, "step": 17340 }, { "epoch": 0.21138776156874214, "grad_norm": 1.9792028665542603, "learning_rate": 4.150673508659397e-06, "loss": 0.954, "step": 17345 }, { "epoch": 0.21144869779288997, "grad_norm": 2.1017420291900635, "learning_rate": 4.15035279025016e-06, "loss": 0.8844, "step": 17350 }, { "epoch": 0.21150963401703776, "grad_norm": 2.0145046710968018, "learning_rate": 4.150032071840924e-06, "loss": 0.8471, "step": 17355 }, { "epoch": 0.21157057024118558, "grad_norm": 2.1548244953155518, "learning_rate": 4.149711353431687e-06, "loss": 0.8651, "step": 17360 }, { "epoch": 0.21163150646533338, "grad_norm": 2.0357284545898438, "learning_rate": 4.14939063502245e-06, "loss": 0.8989, "step": 17365 }, { "epoch": 0.2116924426894812, "grad_norm": 2.0875484943389893, "learning_rate": 4.149069916613214e-06, "loss": 0.8382, "step": 17370 }, { "epoch": 0.211753378913629, "grad_norm": 1.9435417652130127, "learning_rate": 4.148749198203977e-06, "loss": 0.8656, "step": 17375 }, { "epoch": 0.2118143151377768, "grad_norm": 2.2979142665863037, "learning_rate": 4.14842847979474e-06, "loss": 0.9375, "step": 17380 }, { "epoch": 0.21187525136192462, "grad_norm": 2.034505605697632, "learning_rate": 4.148107761385504e-06, "loss": 0.8869, "step": 17385 }, { "epoch": 0.2119361875860724, "grad_norm": 2.0473005771636963, "learning_rate": 4.147787042976267e-06, "loss": 0.8685, "step": 17390 }, { "epoch": 0.21199712381022023, "grad_norm": 1.9376012086868286, "learning_rate": 4.147466324567031e-06, "loss": 0.8522, "step": 17395 }, { "epoch": 0.21205806003436803, "grad_norm": 1.8141170740127563, "learning_rate": 4.147145606157794e-06, "loss": 0.8812, "step": 17400 }, { "epoch": 0.21211899625851582, "grad_norm": 1.826647162437439, "learning_rate": 4.146824887748557e-06, "loss": 0.9238, "step": 17405 }, { "epoch": 0.21217993248266365, "grad_norm": 2.072160005569458, "learning_rate": 4.146504169339321e-06, "loss": 0.8647, "step": 17410 }, { "epoch": 0.21224086870681144, "grad_norm": 1.859688639640808, "learning_rate": 4.146183450930084e-06, "loss": 0.8534, "step": 17415 }, { "epoch": 0.21230180493095926, "grad_norm": 1.8529084920883179, "learning_rate": 4.145862732520848e-06, "loss": 0.9281, "step": 17420 }, { "epoch": 0.21236274115510706, "grad_norm": 1.9753825664520264, "learning_rate": 4.145542014111611e-06, "loss": 0.9088, "step": 17425 }, { "epoch": 0.21242367737925488, "grad_norm": 2.22031831741333, "learning_rate": 4.145221295702374e-06, "loss": 0.9366, "step": 17430 }, { "epoch": 0.21248461360340268, "grad_norm": 1.863114833831787, "learning_rate": 4.1449005772931375e-06, "loss": 0.866, "step": 17435 }, { "epoch": 0.21254554982755047, "grad_norm": 1.86057448387146, "learning_rate": 4.1445798588839006e-06, "loss": 0.8726, "step": 17440 }, { "epoch": 0.2126064860516983, "grad_norm": 2.048692226409912, "learning_rate": 4.144259140474664e-06, "loss": 0.9065, "step": 17445 }, { "epoch": 0.2126674222758461, "grad_norm": 1.948913812637329, "learning_rate": 4.143938422065427e-06, "loss": 0.8464, "step": 17450 }, { "epoch": 0.2127283584999939, "grad_norm": 2.4866199493408203, "learning_rate": 4.1436177036561905e-06, "loss": 0.8913, "step": 17455 }, { "epoch": 0.2127892947241417, "grad_norm": 2.0875158309936523, "learning_rate": 4.1432969852469535e-06, "loss": 0.8513, "step": 17460 }, { "epoch": 0.21285023094828953, "grad_norm": 1.8578999042510986, "learning_rate": 4.1429762668377165e-06, "loss": 0.9251, "step": 17465 }, { "epoch": 0.21291116717243733, "grad_norm": 1.9728158712387085, "learning_rate": 4.14265554842848e-06, "loss": 0.869, "step": 17470 }, { "epoch": 0.21297210339658512, "grad_norm": 2.1851682662963867, "learning_rate": 4.142334830019243e-06, "loss": 0.843, "step": 17475 }, { "epoch": 0.21303303962073294, "grad_norm": 1.6259870529174805, "learning_rate": 4.142014111610006e-06, "loss": 0.8598, "step": 17480 }, { "epoch": 0.21309397584488074, "grad_norm": 1.9029704332351685, "learning_rate": 4.14169339320077e-06, "loss": 0.9115, "step": 17485 }, { "epoch": 0.21315491206902856, "grad_norm": 2.2057571411132812, "learning_rate": 4.141372674791533e-06, "loss": 0.8805, "step": 17490 }, { "epoch": 0.21321584829317636, "grad_norm": 2.0905702114105225, "learning_rate": 4.141051956382296e-06, "loss": 0.8533, "step": 17495 }, { "epoch": 0.21327678451732418, "grad_norm": 2.1318750381469727, "learning_rate": 4.14073123797306e-06, "loss": 0.8618, "step": 17500 }, { "epoch": 0.21333772074147198, "grad_norm": 1.8434498310089111, "learning_rate": 4.140410519563823e-06, "loss": 0.8586, "step": 17505 }, { "epoch": 0.21339865696561977, "grad_norm": 1.7576991319656372, "learning_rate": 4.140089801154586e-06, "loss": 0.9155, "step": 17510 }, { "epoch": 0.2134595931897676, "grad_norm": 2.6454851627349854, "learning_rate": 4.13976908274535e-06, "loss": 0.9477, "step": 17515 }, { "epoch": 0.2135205294139154, "grad_norm": 2.4888863563537598, "learning_rate": 4.139448364336113e-06, "loss": 0.9006, "step": 17520 }, { "epoch": 0.2135814656380632, "grad_norm": 2.0095574855804443, "learning_rate": 4.139127645926876e-06, "loss": 0.8185, "step": 17525 }, { "epoch": 0.213642401862211, "grad_norm": 2.0642547607421875, "learning_rate": 4.13880692751764e-06, "loss": 0.9372, "step": 17530 }, { "epoch": 0.21370333808635883, "grad_norm": 1.8423413038253784, "learning_rate": 4.138486209108403e-06, "loss": 0.8527, "step": 17535 }, { "epoch": 0.21376427431050662, "grad_norm": 1.9639540910720825, "learning_rate": 4.138165490699166e-06, "loss": 0.8903, "step": 17540 }, { "epoch": 0.21382521053465442, "grad_norm": 1.9794560670852661, "learning_rate": 4.13784477228993e-06, "loss": 0.8255, "step": 17545 }, { "epoch": 0.21388614675880224, "grad_norm": 1.7644691467285156, "learning_rate": 4.137524053880693e-06, "loss": 0.886, "step": 17550 }, { "epoch": 0.21394708298295004, "grad_norm": 2.288207530975342, "learning_rate": 4.137203335471457e-06, "loss": 0.9271, "step": 17555 }, { "epoch": 0.21400801920709786, "grad_norm": 1.8457988500595093, "learning_rate": 4.13688261706222e-06, "loss": 0.9441, "step": 17560 }, { "epoch": 0.21406895543124566, "grad_norm": 1.8593865633010864, "learning_rate": 4.136561898652984e-06, "loss": 0.8389, "step": 17565 }, { "epoch": 0.21412989165539348, "grad_norm": 1.6524924039840698, "learning_rate": 4.136241180243747e-06, "loss": 0.8409, "step": 17570 }, { "epoch": 0.21419082787954127, "grad_norm": 1.9458160400390625, "learning_rate": 4.13592046183451e-06, "loss": 0.8715, "step": 17575 }, { "epoch": 0.21425176410368907, "grad_norm": 2.165053367614746, "learning_rate": 4.1355997434252735e-06, "loss": 0.8929, "step": 17580 }, { "epoch": 0.2143127003278369, "grad_norm": 1.92807137966156, "learning_rate": 4.1352790250160365e-06, "loss": 0.8614, "step": 17585 }, { "epoch": 0.2143736365519847, "grad_norm": 1.8987067937850952, "learning_rate": 4.1349583066067995e-06, "loss": 0.8949, "step": 17590 }, { "epoch": 0.2144345727761325, "grad_norm": 2.744098663330078, "learning_rate": 4.134637588197563e-06, "loss": 0.8358, "step": 17595 }, { "epoch": 0.2144955090002803, "grad_norm": 1.8666534423828125, "learning_rate": 4.134316869788326e-06, "loss": 0.8426, "step": 17600 }, { "epoch": 0.21455644522442813, "grad_norm": 2.326387643814087, "learning_rate": 4.1339961513790894e-06, "loss": 0.8864, "step": 17605 }, { "epoch": 0.21461738144857592, "grad_norm": 1.6965100765228271, "learning_rate": 4.133675432969853e-06, "loss": 0.912, "step": 17610 }, { "epoch": 0.21467831767272372, "grad_norm": 1.8979499340057373, "learning_rate": 4.133354714560616e-06, "loss": 0.8562, "step": 17615 }, { "epoch": 0.21473925389687154, "grad_norm": 1.9233769178390503, "learning_rate": 4.133033996151379e-06, "loss": 0.8613, "step": 17620 }, { "epoch": 0.21480019012101934, "grad_norm": 1.8432798385620117, "learning_rate": 4.132713277742142e-06, "loss": 0.8699, "step": 17625 }, { "epoch": 0.21486112634516716, "grad_norm": 1.7726150751113892, "learning_rate": 4.132392559332906e-06, "loss": 0.8137, "step": 17630 }, { "epoch": 0.21492206256931495, "grad_norm": 1.7337589263916016, "learning_rate": 4.132071840923669e-06, "loss": 0.897, "step": 17635 }, { "epoch": 0.21498299879346275, "grad_norm": 1.9854955673217773, "learning_rate": 4.131751122514432e-06, "loss": 0.8927, "step": 17640 }, { "epoch": 0.21504393501761057, "grad_norm": 2.539912462234497, "learning_rate": 4.131430404105196e-06, "loss": 0.9145, "step": 17645 }, { "epoch": 0.21510487124175837, "grad_norm": 1.8233779668807983, "learning_rate": 4.131109685695959e-06, "loss": 0.9351, "step": 17650 }, { "epoch": 0.2151658074659062, "grad_norm": 2.005206823348999, "learning_rate": 4.130788967286722e-06, "loss": 0.8802, "step": 17655 }, { "epoch": 0.21522674369005398, "grad_norm": 2.0599026679992676, "learning_rate": 4.130468248877486e-06, "loss": 0.7942, "step": 17660 }, { "epoch": 0.2152876799142018, "grad_norm": 2.238503932952881, "learning_rate": 4.130147530468249e-06, "loss": 0.9565, "step": 17665 }, { "epoch": 0.2153486161383496, "grad_norm": 2.9131383895874023, "learning_rate": 4.129826812059012e-06, "loss": 0.9889, "step": 17670 }, { "epoch": 0.2154095523624974, "grad_norm": 2.286224365234375, "learning_rate": 4.129506093649776e-06, "loss": 0.9108, "step": 17675 }, { "epoch": 0.21547048858664522, "grad_norm": 2.1162056922912598, "learning_rate": 4.129185375240539e-06, "loss": 0.8662, "step": 17680 }, { "epoch": 0.21553142481079302, "grad_norm": 2.042935848236084, "learning_rate": 4.128864656831302e-06, "loss": 0.8912, "step": 17685 }, { "epoch": 0.21559236103494084, "grad_norm": 1.967236876487732, "learning_rate": 4.128543938422066e-06, "loss": 0.9015, "step": 17690 }, { "epoch": 0.21565329725908863, "grad_norm": 2.1295559406280518, "learning_rate": 4.128223220012829e-06, "loss": 0.8619, "step": 17695 }, { "epoch": 0.21571423348323646, "grad_norm": 2.199009418487549, "learning_rate": 4.127902501603593e-06, "loss": 0.8925, "step": 17700 }, { "epoch": 0.21577516970738425, "grad_norm": 2.0448732376098633, "learning_rate": 4.127581783194356e-06, "loss": 0.8999, "step": 17705 }, { "epoch": 0.21583610593153205, "grad_norm": 2.0783777236938477, "learning_rate": 4.127261064785119e-06, "loss": 0.835, "step": 17710 }, { "epoch": 0.21589704215567987, "grad_norm": 2.065424680709839, "learning_rate": 4.126940346375883e-06, "loss": 0.8836, "step": 17715 }, { "epoch": 0.21595797837982766, "grad_norm": 1.863046407699585, "learning_rate": 4.126619627966646e-06, "loss": 0.9174, "step": 17720 }, { "epoch": 0.2160189146039755, "grad_norm": 1.9261053800582886, "learning_rate": 4.1262989095574095e-06, "loss": 0.812, "step": 17725 }, { "epoch": 0.21607985082812328, "grad_norm": 2.80037260055542, "learning_rate": 4.1259781911481725e-06, "loss": 0.8543, "step": 17730 }, { "epoch": 0.2161407870522711, "grad_norm": 2.313546657562256, "learning_rate": 4.1256574727389355e-06, "loss": 0.8559, "step": 17735 }, { "epoch": 0.2162017232764189, "grad_norm": 2.3244290351867676, "learning_rate": 4.125336754329699e-06, "loss": 0.9215, "step": 17740 }, { "epoch": 0.2162626595005667, "grad_norm": 2.449779510498047, "learning_rate": 4.125016035920462e-06, "loss": 0.8601, "step": 17745 }, { "epoch": 0.21632359572471452, "grad_norm": 2.635716438293457, "learning_rate": 4.124695317511225e-06, "loss": 0.9786, "step": 17750 }, { "epoch": 0.2163845319488623, "grad_norm": 2.3525032997131348, "learning_rate": 4.124374599101989e-06, "loss": 0.8622, "step": 17755 }, { "epoch": 0.21644546817301014, "grad_norm": 1.9034823179244995, "learning_rate": 4.124053880692752e-06, "loss": 0.916, "step": 17760 }, { "epoch": 0.21650640439715793, "grad_norm": 2.043332099914551, "learning_rate": 4.123733162283515e-06, "loss": 0.8376, "step": 17765 }, { "epoch": 0.21656734062130575, "grad_norm": 1.969516634941101, "learning_rate": 4.123412443874279e-06, "loss": 0.8329, "step": 17770 }, { "epoch": 0.21662827684545355, "grad_norm": 2.1932785511016846, "learning_rate": 4.123091725465042e-06, "loss": 0.9357, "step": 17775 }, { "epoch": 0.21668921306960134, "grad_norm": 2.1064212322235107, "learning_rate": 4.122771007055805e-06, "loss": 0.8577, "step": 17780 }, { "epoch": 0.21675014929374917, "grad_norm": 2.1664671897888184, "learning_rate": 4.122450288646568e-06, "loss": 0.9402, "step": 17785 }, { "epoch": 0.21681108551789696, "grad_norm": 2.017652988433838, "learning_rate": 4.122129570237332e-06, "loss": 0.8923, "step": 17790 }, { "epoch": 0.21687202174204478, "grad_norm": 1.7396708726882935, "learning_rate": 4.121808851828095e-06, "loss": 0.8839, "step": 17795 }, { "epoch": 0.21693295796619258, "grad_norm": 2.315380573272705, "learning_rate": 4.121488133418858e-06, "loss": 0.8621, "step": 17800 }, { "epoch": 0.2169938941903404, "grad_norm": 1.7837055921554565, "learning_rate": 4.121167415009622e-06, "loss": 0.8839, "step": 17805 }, { "epoch": 0.2170548304144882, "grad_norm": 1.8830267190933228, "learning_rate": 4.120846696600385e-06, "loss": 0.8818, "step": 17810 }, { "epoch": 0.217115766638636, "grad_norm": 1.8353520631790161, "learning_rate": 4.120525978191148e-06, "loss": 0.9016, "step": 17815 }, { "epoch": 0.21717670286278382, "grad_norm": 2.174873113632202, "learning_rate": 4.120205259781912e-06, "loss": 0.9119, "step": 17820 }, { "epoch": 0.2172376390869316, "grad_norm": 1.9898700714111328, "learning_rate": 4.119884541372675e-06, "loss": 0.9245, "step": 17825 }, { "epoch": 0.21729857531107943, "grad_norm": 2.266601085662842, "learning_rate": 4.119563822963438e-06, "loss": 0.8776, "step": 17830 }, { "epoch": 0.21735951153522723, "grad_norm": 1.7656469345092773, "learning_rate": 4.119243104554202e-06, "loss": 0.8482, "step": 17835 }, { "epoch": 0.21742044775937505, "grad_norm": 2.1969385147094727, "learning_rate": 4.118922386144965e-06, "loss": 0.8851, "step": 17840 }, { "epoch": 0.21748138398352285, "grad_norm": 1.9368385076522827, "learning_rate": 4.118601667735729e-06, "loss": 0.8998, "step": 17845 }, { "epoch": 0.21754232020767064, "grad_norm": 1.8915749788284302, "learning_rate": 4.118280949326492e-06, "loss": 0.9039, "step": 17850 }, { "epoch": 0.21760325643181846, "grad_norm": 1.8138121366500854, "learning_rate": 4.117960230917255e-06, "loss": 0.847, "step": 17855 }, { "epoch": 0.21766419265596626, "grad_norm": 2.13387131690979, "learning_rate": 4.1176395125080185e-06, "loss": 0.8265, "step": 17860 }, { "epoch": 0.21772512888011408, "grad_norm": 1.939544439315796, "learning_rate": 4.1173187940987816e-06, "loss": 0.9093, "step": 17865 }, { "epoch": 0.21778606510426188, "grad_norm": 2.040635347366333, "learning_rate": 4.116998075689545e-06, "loss": 0.8559, "step": 17870 }, { "epoch": 0.21784700132840967, "grad_norm": 2.0355663299560547, "learning_rate": 4.1166773572803084e-06, "loss": 0.9324, "step": 17875 }, { "epoch": 0.2179079375525575, "grad_norm": 2.035909414291382, "learning_rate": 4.1163566388710715e-06, "loss": 0.8942, "step": 17880 }, { "epoch": 0.2179688737767053, "grad_norm": 1.901395320892334, "learning_rate": 4.116035920461835e-06, "loss": 0.8799, "step": 17885 }, { "epoch": 0.2180298100008531, "grad_norm": 1.8785138130187988, "learning_rate": 4.115715202052598e-06, "loss": 0.8653, "step": 17890 }, { "epoch": 0.2180907462250009, "grad_norm": 1.9084974527359009, "learning_rate": 4.115394483643361e-06, "loss": 0.8541, "step": 17895 }, { "epoch": 0.21815168244914873, "grad_norm": 1.7810906171798706, "learning_rate": 4.115073765234125e-06, "loss": 0.8559, "step": 17900 }, { "epoch": 0.21821261867329653, "grad_norm": 2.080662727355957, "learning_rate": 4.114753046824888e-06, "loss": 0.8002, "step": 17905 }, { "epoch": 0.21827355489744432, "grad_norm": 2.0671393871307373, "learning_rate": 4.114432328415651e-06, "loss": 0.8977, "step": 17910 }, { "epoch": 0.21833449112159214, "grad_norm": 2.0517337322235107, "learning_rate": 4.114111610006415e-06, "loss": 0.8479, "step": 17915 }, { "epoch": 0.21839542734573994, "grad_norm": 2.196340322494507, "learning_rate": 4.113790891597178e-06, "loss": 0.8847, "step": 17920 }, { "epoch": 0.21845636356988776, "grad_norm": 1.9609037637710571, "learning_rate": 4.113470173187941e-06, "loss": 0.9383, "step": 17925 }, { "epoch": 0.21851729979403556, "grad_norm": 1.7964129447937012, "learning_rate": 4.113149454778705e-06, "loss": 0.81, "step": 17930 }, { "epoch": 0.21857823601818338, "grad_norm": 2.178177833557129, "learning_rate": 4.112828736369468e-06, "loss": 0.8529, "step": 17935 }, { "epoch": 0.21863917224233118, "grad_norm": 1.865743637084961, "learning_rate": 4.112508017960231e-06, "loss": 0.8721, "step": 17940 }, { "epoch": 0.21870010846647897, "grad_norm": 1.888131022453308, "learning_rate": 4.112187299550995e-06, "loss": 0.8649, "step": 17945 }, { "epoch": 0.2187610446906268, "grad_norm": 2.0798656940460205, "learning_rate": 4.111866581141758e-06, "loss": 0.9452, "step": 17950 }, { "epoch": 0.2188219809147746, "grad_norm": 1.7593834400177002, "learning_rate": 4.111545862732521e-06, "loss": 0.8725, "step": 17955 }, { "epoch": 0.2188829171389224, "grad_norm": 1.7678241729736328, "learning_rate": 4.111225144323284e-06, "loss": 0.9254, "step": 17960 }, { "epoch": 0.2189438533630702, "grad_norm": 2.2050061225891113, "learning_rate": 4.110904425914048e-06, "loss": 0.8575, "step": 17965 }, { "epoch": 0.21900478958721803, "grad_norm": 1.8752515316009521, "learning_rate": 4.110583707504811e-06, "loss": 0.881, "step": 17970 }, { "epoch": 0.21906572581136582, "grad_norm": 2.032517433166504, "learning_rate": 4.110262989095574e-06, "loss": 0.8164, "step": 17975 }, { "epoch": 0.21912666203551362, "grad_norm": 1.8132247924804688, "learning_rate": 4.109942270686338e-06, "loss": 0.8641, "step": 17980 }, { "epoch": 0.21918759825966144, "grad_norm": 1.828989863395691, "learning_rate": 4.109621552277101e-06, "loss": 0.866, "step": 17985 }, { "epoch": 0.21924853448380924, "grad_norm": 2.265890121459961, "learning_rate": 4.109300833867864e-06, "loss": 0.8505, "step": 17990 }, { "epoch": 0.21930947070795706, "grad_norm": 2.0615735054016113, "learning_rate": 4.108980115458628e-06, "loss": 0.8884, "step": 17995 }, { "epoch": 0.21937040693210486, "grad_norm": 1.5837830305099487, "learning_rate": 4.108659397049391e-06, "loss": 0.8212, "step": 18000 }, { "epoch": 0.21943134315625268, "grad_norm": 1.9280004501342773, "learning_rate": 4.1083386786401545e-06, "loss": 0.9221, "step": 18005 }, { "epoch": 0.21949227938040047, "grad_norm": 1.851058840751648, "learning_rate": 4.1080179602309175e-06, "loss": 0.8011, "step": 18010 }, { "epoch": 0.21955321560454827, "grad_norm": 1.9497965574264526, "learning_rate": 4.1076972418216805e-06, "loss": 0.9272, "step": 18015 }, { "epoch": 0.2196141518286961, "grad_norm": 2.0937063694000244, "learning_rate": 4.107376523412444e-06, "loss": 0.9509, "step": 18020 }, { "epoch": 0.2196750880528439, "grad_norm": 1.8325313329696655, "learning_rate": 4.107055805003207e-06, "loss": 0.8195, "step": 18025 }, { "epoch": 0.2197360242769917, "grad_norm": 1.840686559677124, "learning_rate": 4.106735086593971e-06, "loss": 0.966, "step": 18030 }, { "epoch": 0.2197969605011395, "grad_norm": 1.9245975017547607, "learning_rate": 4.106414368184734e-06, "loss": 0.8828, "step": 18035 }, { "epoch": 0.21985789672528733, "grad_norm": 2.064586639404297, "learning_rate": 4.106093649775497e-06, "loss": 0.8447, "step": 18040 }, { "epoch": 0.21991883294943512, "grad_norm": 1.9048844575881958, "learning_rate": 4.105772931366261e-06, "loss": 0.967, "step": 18045 }, { "epoch": 0.21997976917358292, "grad_norm": 1.8646011352539062, "learning_rate": 4.105452212957024e-06, "loss": 0.8516, "step": 18050 }, { "epoch": 0.22004070539773074, "grad_norm": 2.313730001449585, "learning_rate": 4.105131494547787e-06, "loss": 0.8417, "step": 18055 }, { "epoch": 0.22010164162187854, "grad_norm": 1.6836823225021362, "learning_rate": 4.104810776138551e-06, "loss": 0.8577, "step": 18060 }, { "epoch": 0.22016257784602636, "grad_norm": 1.7381865978240967, "learning_rate": 4.104490057729314e-06, "loss": 0.8819, "step": 18065 }, { "epoch": 0.22022351407017415, "grad_norm": 1.960488200187683, "learning_rate": 4.104169339320077e-06, "loss": 0.8511, "step": 18070 }, { "epoch": 0.22028445029432198, "grad_norm": 2.5608842372894287, "learning_rate": 4.103848620910841e-06, "loss": 0.8537, "step": 18075 }, { "epoch": 0.22034538651846977, "grad_norm": 1.9328621625900269, "learning_rate": 4.103527902501604e-06, "loss": 0.9065, "step": 18080 }, { "epoch": 0.22040632274261757, "grad_norm": 1.8587265014648438, "learning_rate": 4.103207184092367e-06, "loss": 0.8571, "step": 18085 }, { "epoch": 0.2204672589667654, "grad_norm": 1.672930121421814, "learning_rate": 4.102886465683131e-06, "loss": 0.8307, "step": 18090 }, { "epoch": 0.22052819519091318, "grad_norm": 1.718825101852417, "learning_rate": 4.102565747273894e-06, "loss": 0.9149, "step": 18095 }, { "epoch": 0.220589131415061, "grad_norm": 2.002222776412964, "learning_rate": 4.102245028864657e-06, "loss": 0.9212, "step": 18100 }, { "epoch": 0.2206500676392088, "grad_norm": 1.8713642358779907, "learning_rate": 4.101924310455421e-06, "loss": 0.8723, "step": 18105 }, { "epoch": 0.2207110038633566, "grad_norm": 1.8029046058654785, "learning_rate": 4.101603592046184e-06, "loss": 0.8779, "step": 18110 }, { "epoch": 0.22077194008750442, "grad_norm": 1.8111330270767212, "learning_rate": 4.101282873636947e-06, "loss": 0.9047, "step": 18115 }, { "epoch": 0.22083287631165222, "grad_norm": 1.7664821147918701, "learning_rate": 4.10096215522771e-06, "loss": 0.8793, "step": 18120 }, { "epoch": 0.22089381253580004, "grad_norm": 2.1784002780914307, "learning_rate": 4.100641436818474e-06, "loss": 0.8717, "step": 18125 }, { "epoch": 0.22095474875994783, "grad_norm": 1.937990427017212, "learning_rate": 4.100320718409237e-06, "loss": 0.846, "step": 18130 }, { "epoch": 0.22101568498409566, "grad_norm": 2.2293100357055664, "learning_rate": 4.1e-06, "loss": 0.8751, "step": 18135 }, { "epoch": 0.22107662120824345, "grad_norm": 1.8156776428222656, "learning_rate": 4.0996792815907636e-06, "loss": 0.8058, "step": 18140 }, { "epoch": 0.22113755743239125, "grad_norm": 2.3924238681793213, "learning_rate": 4.099358563181527e-06, "loss": 0.8986, "step": 18145 }, { "epoch": 0.22119849365653907, "grad_norm": 1.6358522176742554, "learning_rate": 4.0990378447722905e-06, "loss": 0.8572, "step": 18150 }, { "epoch": 0.22125942988068686, "grad_norm": 1.8841758966445923, "learning_rate": 4.0987171263630535e-06, "loss": 0.8872, "step": 18155 }, { "epoch": 0.2213203661048347, "grad_norm": 2.030954360961914, "learning_rate": 4.0983964079538165e-06, "loss": 0.8756, "step": 18160 }, { "epoch": 0.22138130232898248, "grad_norm": 1.969785213470459, "learning_rate": 4.09807568954458e-06, "loss": 0.8388, "step": 18165 }, { "epoch": 0.2214422385531303, "grad_norm": 2.185093879699707, "learning_rate": 4.097754971135343e-06, "loss": 0.9148, "step": 18170 }, { "epoch": 0.2215031747772781, "grad_norm": 1.990787148475647, "learning_rate": 4.097434252726107e-06, "loss": 0.8666, "step": 18175 }, { "epoch": 0.2215641110014259, "grad_norm": 2.1974434852600098, "learning_rate": 4.09711353431687e-06, "loss": 0.9065, "step": 18180 }, { "epoch": 0.22162504722557372, "grad_norm": 1.888755440711975, "learning_rate": 4.096792815907633e-06, "loss": 0.8331, "step": 18185 }, { "epoch": 0.2216859834497215, "grad_norm": 1.936599850654602, "learning_rate": 4.096472097498397e-06, "loss": 0.8809, "step": 18190 }, { "epoch": 0.22174691967386934, "grad_norm": 2.036860942840576, "learning_rate": 4.09615137908916e-06, "loss": 0.8949, "step": 18195 }, { "epoch": 0.22180785589801713, "grad_norm": 2.0174219608306885, "learning_rate": 4.095830660679924e-06, "loss": 0.9197, "step": 18200 }, { "epoch": 0.22186879212216495, "grad_norm": 1.9145011901855469, "learning_rate": 4.095509942270687e-06, "loss": 0.9188, "step": 18205 }, { "epoch": 0.22192972834631275, "grad_norm": 2.887331962585449, "learning_rate": 4.09518922386145e-06, "loss": 0.9023, "step": 18210 }, { "epoch": 0.22199066457046054, "grad_norm": 1.6165183782577515, "learning_rate": 4.094868505452213e-06, "loss": 0.8397, "step": 18215 }, { "epoch": 0.22205160079460837, "grad_norm": 1.923643946647644, "learning_rate": 4.094547787042977e-06, "loss": 0.8564, "step": 18220 }, { "epoch": 0.22211253701875616, "grad_norm": 1.920585036277771, "learning_rate": 4.09422706863374e-06, "loss": 0.9147, "step": 18225 }, { "epoch": 0.22217347324290398, "grad_norm": 1.868523359298706, "learning_rate": 4.093906350224503e-06, "loss": 0.9041, "step": 18230 }, { "epoch": 0.22223440946705178, "grad_norm": 1.972464919090271, "learning_rate": 4.093585631815267e-06, "loss": 0.843, "step": 18235 }, { "epoch": 0.2222953456911996, "grad_norm": 1.854888677597046, "learning_rate": 4.09326491340603e-06, "loss": 0.8355, "step": 18240 }, { "epoch": 0.2223562819153474, "grad_norm": 2.187993049621582, "learning_rate": 4.092944194996793e-06, "loss": 0.8878, "step": 18245 }, { "epoch": 0.2224172181394952, "grad_norm": 2.0950629711151123, "learning_rate": 4.092623476587557e-06, "loss": 0.8968, "step": 18250 }, { "epoch": 0.22247815436364302, "grad_norm": 1.916092872619629, "learning_rate": 4.09230275817832e-06, "loss": 0.8466, "step": 18255 }, { "epoch": 0.2225390905877908, "grad_norm": 2.3583266735076904, "learning_rate": 4.091982039769083e-06, "loss": 0.8685, "step": 18260 }, { "epoch": 0.22260002681193863, "grad_norm": 2.4631495475769043, "learning_rate": 4.091661321359847e-06, "loss": 0.8302, "step": 18265 }, { "epoch": 0.22266096303608643, "grad_norm": 1.9076573848724365, "learning_rate": 4.09134060295061e-06, "loss": 0.9834, "step": 18270 }, { "epoch": 0.22272189926023425, "grad_norm": 2.2142446041107178, "learning_rate": 4.091019884541373e-06, "loss": 0.8927, "step": 18275 }, { "epoch": 0.22278283548438205, "grad_norm": 1.9025325775146484, "learning_rate": 4.0906991661321365e-06, "loss": 0.8859, "step": 18280 }, { "epoch": 0.22284377170852984, "grad_norm": 2.2794079780578613, "learning_rate": 4.0903784477228995e-06, "loss": 0.9185, "step": 18285 }, { "epoch": 0.22290470793267766, "grad_norm": 1.8172988891601562, "learning_rate": 4.0900577293136625e-06, "loss": 0.9024, "step": 18290 }, { "epoch": 0.22296564415682546, "grad_norm": 1.715179443359375, "learning_rate": 4.0897370109044256e-06, "loss": 0.9342, "step": 18295 }, { "epoch": 0.22302658038097328, "grad_norm": 2.2873623371124268, "learning_rate": 4.0894162924951894e-06, "loss": 0.8702, "step": 18300 }, { "epoch": 0.22308751660512108, "grad_norm": 1.934671401977539, "learning_rate": 4.0890955740859524e-06, "loss": 0.8176, "step": 18305 }, { "epoch": 0.2231484528292689, "grad_norm": 1.8359843492507935, "learning_rate": 4.088774855676716e-06, "loss": 0.8708, "step": 18310 }, { "epoch": 0.2232093890534167, "grad_norm": 1.790290355682373, "learning_rate": 4.088454137267479e-06, "loss": 0.9456, "step": 18315 }, { "epoch": 0.2232703252775645, "grad_norm": 1.7944860458374023, "learning_rate": 4.088133418858243e-06, "loss": 0.9193, "step": 18320 }, { "epoch": 0.2233312615017123, "grad_norm": 2.5189850330352783, "learning_rate": 4.087812700449006e-06, "loss": 0.8526, "step": 18325 }, { "epoch": 0.2233921977258601, "grad_norm": 1.9607571363449097, "learning_rate": 4.087491982039769e-06, "loss": 0.8779, "step": 18330 }, { "epoch": 0.22345313395000793, "grad_norm": 1.8617191314697266, "learning_rate": 4.087171263630533e-06, "loss": 0.8871, "step": 18335 }, { "epoch": 0.22351407017415573, "grad_norm": 2.0983335971832275, "learning_rate": 4.086850545221296e-06, "loss": 0.895, "step": 18340 }, { "epoch": 0.22357500639830352, "grad_norm": 2.4429574012756348, "learning_rate": 4.08652982681206e-06, "loss": 0.9028, "step": 18345 }, { "epoch": 0.22363594262245134, "grad_norm": 2.240997314453125, "learning_rate": 4.086209108402823e-06, "loss": 0.8638, "step": 18350 }, { "epoch": 0.22369687884659914, "grad_norm": 2.0895562171936035, "learning_rate": 4.085888389993586e-06, "loss": 0.853, "step": 18355 }, { "epoch": 0.22375781507074696, "grad_norm": 2.141162633895874, "learning_rate": 4.08556767158435e-06, "loss": 0.8345, "step": 18360 }, { "epoch": 0.22381875129489476, "grad_norm": 1.9246746301651, "learning_rate": 4.085246953175113e-06, "loss": 0.891, "step": 18365 }, { "epoch": 0.22387968751904258, "grad_norm": 2.070371627807617, "learning_rate": 4.084926234765876e-06, "loss": 0.8475, "step": 18370 }, { "epoch": 0.22394062374319038, "grad_norm": 1.9267274141311646, "learning_rate": 4.084605516356639e-06, "loss": 0.9004, "step": 18375 }, { "epoch": 0.22400155996733817, "grad_norm": 1.8121980428695679, "learning_rate": 4.084284797947403e-06, "loss": 0.8321, "step": 18380 }, { "epoch": 0.224062496191486, "grad_norm": 2.25890851020813, "learning_rate": 4.083964079538166e-06, "loss": 0.9721, "step": 18385 }, { "epoch": 0.2241234324156338, "grad_norm": 1.7994178533554077, "learning_rate": 4.083643361128929e-06, "loss": 0.8444, "step": 18390 }, { "epoch": 0.2241843686397816, "grad_norm": 1.9514334201812744, "learning_rate": 4.083322642719693e-06, "loss": 0.9266, "step": 18395 }, { "epoch": 0.2242453048639294, "grad_norm": 1.8608362674713135, "learning_rate": 4.083001924310456e-06, "loss": 0.9227, "step": 18400 }, { "epoch": 0.22430624108807723, "grad_norm": 2.028188467025757, "learning_rate": 4.082681205901219e-06, "loss": 0.9163, "step": 18405 }, { "epoch": 0.22436717731222502, "grad_norm": 2.077139139175415, "learning_rate": 4.0823604874919826e-06, "loss": 0.8931, "step": 18410 }, { "epoch": 0.22442811353637282, "grad_norm": 2.070434093475342, "learning_rate": 4.082039769082746e-06, "loss": 0.895, "step": 18415 }, { "epoch": 0.22448904976052064, "grad_norm": 1.9898273944854736, "learning_rate": 4.081719050673509e-06, "loss": 0.8324, "step": 18420 }, { "epoch": 0.22454998598466844, "grad_norm": 2.2287886142730713, "learning_rate": 4.0813983322642725e-06, "loss": 0.8467, "step": 18425 }, { "epoch": 0.22461092220881626, "grad_norm": 2.0279176235198975, "learning_rate": 4.0810776138550355e-06, "loss": 0.8978, "step": 18430 }, { "epoch": 0.22467185843296406, "grad_norm": 1.867973804473877, "learning_rate": 4.0807568954457985e-06, "loss": 0.8571, "step": 18435 }, { "epoch": 0.22473279465711188, "grad_norm": 2.0326478481292725, "learning_rate": 4.080436177036562e-06, "loss": 0.8874, "step": 18440 }, { "epoch": 0.22479373088125967, "grad_norm": 1.9035325050354004, "learning_rate": 4.080115458627325e-06, "loss": 0.9133, "step": 18445 }, { "epoch": 0.22485466710540747, "grad_norm": 1.7082539796829224, "learning_rate": 4.079794740218088e-06, "loss": 0.7942, "step": 18450 }, { "epoch": 0.2249156033295553, "grad_norm": 2.0418848991394043, "learning_rate": 4.079474021808852e-06, "loss": 0.9272, "step": 18455 }, { "epoch": 0.2249765395537031, "grad_norm": 1.9782960414886475, "learning_rate": 4.079153303399615e-06, "loss": 0.8332, "step": 18460 }, { "epoch": 0.2250374757778509, "grad_norm": 2.0176799297332764, "learning_rate": 4.078832584990378e-06, "loss": 0.9032, "step": 18465 }, { "epoch": 0.2250984120019987, "grad_norm": 2.4332470893859863, "learning_rate": 4.078511866581142e-06, "loss": 0.8672, "step": 18470 }, { "epoch": 0.22515934822614653, "grad_norm": 1.9199113845825195, "learning_rate": 4.078191148171905e-06, "loss": 0.838, "step": 18475 }, { "epoch": 0.22522028445029432, "grad_norm": 1.5766364336013794, "learning_rate": 4.077870429762669e-06, "loss": 0.8661, "step": 18480 }, { "epoch": 0.22528122067444212, "grad_norm": 2.047525405883789, "learning_rate": 4.077549711353432e-06, "loss": 0.9057, "step": 18485 }, { "epoch": 0.22534215689858994, "grad_norm": 1.6460174322128296, "learning_rate": 4.077228992944195e-06, "loss": 0.8688, "step": 18490 }, { "epoch": 0.22540309312273774, "grad_norm": 1.8886234760284424, "learning_rate": 4.076908274534959e-06, "loss": 0.8802, "step": 18495 }, { "epoch": 0.22546402934688556, "grad_norm": 2.2419698238372803, "learning_rate": 4.076587556125722e-06, "loss": 0.9106, "step": 18500 }, { "epoch": 0.22552496557103335, "grad_norm": 1.945368766784668, "learning_rate": 4.076266837716486e-06, "loss": 0.8133, "step": 18505 }, { "epoch": 0.22558590179518118, "grad_norm": 1.8791096210479736, "learning_rate": 4.075946119307249e-06, "loss": 0.8994, "step": 18510 }, { "epoch": 0.22564683801932897, "grad_norm": 1.747198224067688, "learning_rate": 4.075625400898012e-06, "loss": 0.8647, "step": 18515 }, { "epoch": 0.22570777424347677, "grad_norm": 2.13981294631958, "learning_rate": 4.075304682488776e-06, "loss": 0.8752, "step": 18520 }, { "epoch": 0.2257687104676246, "grad_norm": 1.8851441144943237, "learning_rate": 4.074983964079539e-06, "loss": 0.9115, "step": 18525 }, { "epoch": 0.22582964669177238, "grad_norm": 1.8176923990249634, "learning_rate": 4.074663245670302e-06, "loss": 0.8512, "step": 18530 }, { "epoch": 0.2258905829159202, "grad_norm": 2.135298013687134, "learning_rate": 4.074342527261066e-06, "loss": 0.9529, "step": 18535 }, { "epoch": 0.225951519140068, "grad_norm": 1.8696128129959106, "learning_rate": 4.074021808851829e-06, "loss": 0.9679, "step": 18540 }, { "epoch": 0.22601245536421583, "grad_norm": 1.8875209093093872, "learning_rate": 4.073701090442592e-06, "loss": 0.9454, "step": 18545 }, { "epoch": 0.22607339158836362, "grad_norm": 2.125332832336426, "learning_rate": 4.073380372033355e-06, "loss": 0.8423, "step": 18550 }, { "epoch": 0.22613432781251142, "grad_norm": 1.8521184921264648, "learning_rate": 4.0730596536241185e-06, "loss": 0.8385, "step": 18555 }, { "epoch": 0.22619526403665924, "grad_norm": 1.7765716314315796, "learning_rate": 4.0727389352148815e-06, "loss": 0.9023, "step": 18560 }, { "epoch": 0.22625620026080703, "grad_norm": 1.9744242429733276, "learning_rate": 4.0724182168056446e-06, "loss": 0.9255, "step": 18565 }, { "epoch": 0.22631713648495486, "grad_norm": 1.955629587173462, "learning_rate": 4.0720974983964084e-06, "loss": 0.9233, "step": 18570 }, { "epoch": 0.22637807270910265, "grad_norm": 1.758359432220459, "learning_rate": 4.0717767799871714e-06, "loss": 0.8619, "step": 18575 }, { "epoch": 0.22643900893325045, "grad_norm": 1.7717753648757935, "learning_rate": 4.0714560615779345e-06, "loss": 0.862, "step": 18580 }, { "epoch": 0.22649994515739827, "grad_norm": 1.9021354913711548, "learning_rate": 4.071135343168698e-06, "loss": 0.9248, "step": 18585 }, { "epoch": 0.22656088138154606, "grad_norm": 1.7523397207260132, "learning_rate": 4.070814624759461e-06, "loss": 0.9399, "step": 18590 }, { "epoch": 0.2266218176056939, "grad_norm": 2.0204379558563232, "learning_rate": 4.070493906350224e-06, "loss": 0.7889, "step": 18595 }, { "epoch": 0.22668275382984168, "grad_norm": 2.07395601272583, "learning_rate": 4.070173187940988e-06, "loss": 0.9224, "step": 18600 }, { "epoch": 0.2267436900539895, "grad_norm": 2.098151683807373, "learning_rate": 4.069852469531751e-06, "loss": 0.8767, "step": 18605 }, { "epoch": 0.2268046262781373, "grad_norm": 2.0897443294525146, "learning_rate": 4.069531751122514e-06, "loss": 0.9392, "step": 18610 }, { "epoch": 0.2268655625022851, "grad_norm": 2.0419046878814697, "learning_rate": 4.069211032713278e-06, "loss": 0.8781, "step": 18615 }, { "epoch": 0.22692649872643292, "grad_norm": 1.779228687286377, "learning_rate": 4.068890314304041e-06, "loss": 0.8818, "step": 18620 }, { "epoch": 0.2269874349505807, "grad_norm": 2.0212173461914062, "learning_rate": 4.068569595894805e-06, "loss": 0.8528, "step": 18625 }, { "epoch": 0.22704837117472854, "grad_norm": 1.9156113862991333, "learning_rate": 4.068248877485568e-06, "loss": 0.8746, "step": 18630 }, { "epoch": 0.22710930739887633, "grad_norm": 1.9450089931488037, "learning_rate": 4.067928159076331e-06, "loss": 0.994, "step": 18635 }, { "epoch": 0.22717024362302415, "grad_norm": 1.7819877862930298, "learning_rate": 4.067607440667095e-06, "loss": 0.7901, "step": 18640 }, { "epoch": 0.22723117984717195, "grad_norm": 2.0642688274383545, "learning_rate": 4.067286722257858e-06, "loss": 0.8126, "step": 18645 }, { "epoch": 0.22729211607131974, "grad_norm": 2.089214563369751, "learning_rate": 4.066966003848622e-06, "loss": 0.9091, "step": 18650 }, { "epoch": 0.22735305229546757, "grad_norm": 2.0515170097351074, "learning_rate": 4.066645285439385e-06, "loss": 0.8464, "step": 18655 }, { "epoch": 0.22741398851961536, "grad_norm": 2.987180233001709, "learning_rate": 4.066324567030148e-06, "loss": 0.8935, "step": 18660 }, { "epoch": 0.22747492474376318, "grad_norm": 2.3409135341644287, "learning_rate": 4.066003848620912e-06, "loss": 0.878, "step": 18665 }, { "epoch": 0.22753586096791098, "grad_norm": 1.796581506729126, "learning_rate": 4.065683130211675e-06, "loss": 0.8948, "step": 18670 }, { "epoch": 0.2275967971920588, "grad_norm": 1.9457401037216187, "learning_rate": 4.065362411802438e-06, "loss": 0.8815, "step": 18675 }, { "epoch": 0.2276577334162066, "grad_norm": 1.8089557886123657, "learning_rate": 4.0650416933932016e-06, "loss": 0.8554, "step": 18680 }, { "epoch": 0.2277186696403544, "grad_norm": 1.8551353216171265, "learning_rate": 4.064720974983965e-06, "loss": 0.9482, "step": 18685 }, { "epoch": 0.22777960586450222, "grad_norm": 2.289339065551758, "learning_rate": 4.064400256574728e-06, "loss": 0.9437, "step": 18690 }, { "epoch": 0.22784054208865, "grad_norm": 2.1862363815307617, "learning_rate": 4.0640795381654915e-06, "loss": 0.9014, "step": 18695 }, { "epoch": 0.22790147831279783, "grad_norm": 2.036006450653076, "learning_rate": 4.0637588197562545e-06, "loss": 0.8857, "step": 18700 }, { "epoch": 0.22796241453694563, "grad_norm": 1.969812035560608, "learning_rate": 4.0634381013470175e-06, "loss": 0.8654, "step": 18705 }, { "epoch": 0.22802335076109345, "grad_norm": 2.2155346870422363, "learning_rate": 4.0631173829377805e-06, "loss": 0.9066, "step": 18710 }, { "epoch": 0.22808428698524125, "grad_norm": 1.9253402948379517, "learning_rate": 4.062796664528544e-06, "loss": 0.8627, "step": 18715 }, { "epoch": 0.22814522320938904, "grad_norm": 1.8823292255401611, "learning_rate": 4.062475946119307e-06, "loss": 0.8625, "step": 18720 }, { "epoch": 0.22820615943353686, "grad_norm": 1.7586854696273804, "learning_rate": 4.06215522771007e-06, "loss": 0.8413, "step": 18725 }, { "epoch": 0.22826709565768466, "grad_norm": 1.7159899473190308, "learning_rate": 4.061834509300834e-06, "loss": 0.9102, "step": 18730 }, { "epoch": 0.22832803188183248, "grad_norm": 2.0818991661071777, "learning_rate": 4.061513790891597e-06, "loss": 0.9176, "step": 18735 }, { "epoch": 0.22838896810598028, "grad_norm": 1.8386456966400146, "learning_rate": 4.06119307248236e-06, "loss": 0.9391, "step": 18740 }, { "epoch": 0.2284499043301281, "grad_norm": 1.8887711763381958, "learning_rate": 4.060872354073124e-06, "loss": 0.7934, "step": 18745 }, { "epoch": 0.2285108405542759, "grad_norm": 2.2249197959899902, "learning_rate": 4.060551635663887e-06, "loss": 0.8885, "step": 18750 }, { "epoch": 0.2285717767784237, "grad_norm": 2.0587728023529053, "learning_rate": 4.06023091725465e-06, "loss": 0.868, "step": 18755 }, { "epoch": 0.2286327130025715, "grad_norm": 1.839077353477478, "learning_rate": 4.059910198845414e-06, "loss": 0.9106, "step": 18760 }, { "epoch": 0.2286936492267193, "grad_norm": 2.0434036254882812, "learning_rate": 4.059589480436177e-06, "loss": 0.8808, "step": 18765 }, { "epoch": 0.22875458545086713, "grad_norm": 1.8097968101501465, "learning_rate": 4.059268762026941e-06, "loss": 0.9105, "step": 18770 }, { "epoch": 0.22881552167501493, "grad_norm": 1.8905415534973145, "learning_rate": 4.058948043617704e-06, "loss": 0.8778, "step": 18775 }, { "epoch": 0.22887645789916275, "grad_norm": 1.9703304767608643, "learning_rate": 4.058627325208467e-06, "loss": 0.8895, "step": 18780 }, { "epoch": 0.22893739412331054, "grad_norm": 1.676952838897705, "learning_rate": 4.058306606799231e-06, "loss": 0.8779, "step": 18785 }, { "epoch": 0.22899833034745834, "grad_norm": 2.135028600692749, "learning_rate": 4.057985888389994e-06, "loss": 0.8624, "step": 18790 }, { "epoch": 0.22905926657160616, "grad_norm": 1.8094449043273926, "learning_rate": 4.057665169980758e-06, "loss": 0.7879, "step": 18795 }, { "epoch": 0.22912020279575396, "grad_norm": 2.5763490200042725, "learning_rate": 4.057344451571521e-06, "loss": 0.8332, "step": 18800 }, { "epoch": 0.22918113901990178, "grad_norm": 2.2095835208892822, "learning_rate": 4.057023733162284e-06, "loss": 0.9403, "step": 18805 }, { "epoch": 0.22924207524404958, "grad_norm": 1.8749988079071045, "learning_rate": 4.056703014753048e-06, "loss": 0.794, "step": 18810 }, { "epoch": 0.22930301146819737, "grad_norm": 2.0281870365142822, "learning_rate": 4.056382296343811e-06, "loss": 0.8885, "step": 18815 }, { "epoch": 0.2293639476923452, "grad_norm": 2.62541127204895, "learning_rate": 4.056061577934574e-06, "loss": 0.8823, "step": 18820 }, { "epoch": 0.229424883916493, "grad_norm": 2.0497379302978516, "learning_rate": 4.0557408595253375e-06, "loss": 0.9206, "step": 18825 }, { "epoch": 0.2294858201406408, "grad_norm": 1.929711937904358, "learning_rate": 4.0554201411161005e-06, "loss": 0.9613, "step": 18830 }, { "epoch": 0.2295467563647886, "grad_norm": 2.0693185329437256, "learning_rate": 4.0550994227068636e-06, "loss": 0.8697, "step": 18835 }, { "epoch": 0.22960769258893643, "grad_norm": 2.4260103702545166, "learning_rate": 4.0547787042976274e-06, "loss": 0.8916, "step": 18840 }, { "epoch": 0.22966862881308422, "grad_norm": 2.0512893199920654, "learning_rate": 4.0544579858883904e-06, "loss": 0.8517, "step": 18845 }, { "epoch": 0.22972956503723202, "grad_norm": 2.013962745666504, "learning_rate": 4.0541372674791535e-06, "loss": 0.812, "step": 18850 }, { "epoch": 0.22979050126137984, "grad_norm": 2.410001516342163, "learning_rate": 4.053816549069917e-06, "loss": 0.8748, "step": 18855 }, { "epoch": 0.22985143748552764, "grad_norm": 2.1479992866516113, "learning_rate": 4.05349583066068e-06, "loss": 0.9488, "step": 18860 }, { "epoch": 0.22991237370967546, "grad_norm": 1.9724003076553345, "learning_rate": 4.053175112251443e-06, "loss": 0.8189, "step": 18865 }, { "epoch": 0.22997330993382326, "grad_norm": 1.8624165058135986, "learning_rate": 4.052854393842207e-06, "loss": 0.8475, "step": 18870 }, { "epoch": 0.23003424615797108, "grad_norm": 1.6386175155639648, "learning_rate": 4.05253367543297e-06, "loss": 0.8809, "step": 18875 }, { "epoch": 0.23009518238211887, "grad_norm": 1.865208625793457, "learning_rate": 4.052212957023733e-06, "loss": 0.8467, "step": 18880 }, { "epoch": 0.23015611860626667, "grad_norm": 1.9057024717330933, "learning_rate": 4.051892238614496e-06, "loss": 0.821, "step": 18885 }, { "epoch": 0.2302170548304145, "grad_norm": 1.8521151542663574, "learning_rate": 4.05157152020526e-06, "loss": 0.8767, "step": 18890 }, { "epoch": 0.2302779910545623, "grad_norm": 2.00256085395813, "learning_rate": 4.051250801796023e-06, "loss": 0.8514, "step": 18895 }, { "epoch": 0.2303389272787101, "grad_norm": 1.9228037595748901, "learning_rate": 4.050930083386786e-06, "loss": 0.8723, "step": 18900 }, { "epoch": 0.2303998635028579, "grad_norm": 2.106050968170166, "learning_rate": 4.05060936497755e-06, "loss": 0.9458, "step": 18905 }, { "epoch": 0.23046079972700573, "grad_norm": 1.7598413228988647, "learning_rate": 4.050288646568313e-06, "loss": 0.9042, "step": 18910 }, { "epoch": 0.23052173595115352, "grad_norm": 2.2180233001708984, "learning_rate": 4.049967928159076e-06, "loss": 0.842, "step": 18915 }, { "epoch": 0.23058267217530132, "grad_norm": 2.135659694671631, "learning_rate": 4.04964720974984e-06, "loss": 0.8987, "step": 18920 }, { "epoch": 0.23064360839944914, "grad_norm": 1.8589116334915161, "learning_rate": 4.049326491340603e-06, "loss": 0.8448, "step": 18925 }, { "epoch": 0.23070454462359694, "grad_norm": 1.90176522731781, "learning_rate": 4.049005772931367e-06, "loss": 0.8158, "step": 18930 }, { "epoch": 0.23076548084774476, "grad_norm": 1.8112326860427856, "learning_rate": 4.04868505452213e-06, "loss": 0.843, "step": 18935 }, { "epoch": 0.23082641707189255, "grad_norm": 2.3826308250427246, "learning_rate": 4.048364336112893e-06, "loss": 0.8623, "step": 18940 }, { "epoch": 0.23088735329604038, "grad_norm": 1.831437349319458, "learning_rate": 4.048043617703657e-06, "loss": 0.9204, "step": 18945 }, { "epoch": 0.23094828952018817, "grad_norm": 2.1045706272125244, "learning_rate": 4.04772289929442e-06, "loss": 0.8968, "step": 18950 }, { "epoch": 0.23100922574433597, "grad_norm": 1.809677243232727, "learning_rate": 4.047402180885184e-06, "loss": 0.9043, "step": 18955 }, { "epoch": 0.2310701619684838, "grad_norm": 1.8790043592453003, "learning_rate": 4.047081462475947e-06, "loss": 0.8741, "step": 18960 }, { "epoch": 0.23113109819263158, "grad_norm": 1.7717225551605225, "learning_rate": 4.04676074406671e-06, "loss": 0.8843, "step": 18965 }, { "epoch": 0.2311920344167794, "grad_norm": 1.7944337129592896, "learning_rate": 4.0464400256574735e-06, "loss": 0.804, "step": 18970 }, { "epoch": 0.2312529706409272, "grad_norm": 2.725700616836548, "learning_rate": 4.0461193072482365e-06, "loss": 0.9315, "step": 18975 }, { "epoch": 0.23131390686507503, "grad_norm": 1.955627202987671, "learning_rate": 4.0457985888389995e-06, "loss": 0.8591, "step": 18980 }, { "epoch": 0.23137484308922282, "grad_norm": 1.9652312994003296, "learning_rate": 4.045477870429763e-06, "loss": 0.9526, "step": 18985 }, { "epoch": 0.23143577931337062, "grad_norm": 1.7386419773101807, "learning_rate": 4.045157152020526e-06, "loss": 0.8858, "step": 18990 }, { "epoch": 0.23149671553751844, "grad_norm": 1.9236805438995361, "learning_rate": 4.044836433611289e-06, "loss": 0.9536, "step": 18995 }, { "epoch": 0.23155765176166623, "grad_norm": 2.170252561569214, "learning_rate": 4.044515715202053e-06, "loss": 0.829, "step": 19000 }, { "epoch": 0.23161858798581406, "grad_norm": 2.170844078063965, "learning_rate": 4.044194996792816e-06, "loss": 0.8866, "step": 19005 }, { "epoch": 0.23167952420996185, "grad_norm": 1.5641640424728394, "learning_rate": 4.043874278383579e-06, "loss": 0.7861, "step": 19010 }, { "epoch": 0.23174046043410967, "grad_norm": 1.7613128423690796, "learning_rate": 4.043553559974343e-06, "loss": 0.8432, "step": 19015 }, { "epoch": 0.23180139665825747, "grad_norm": 2.0572023391723633, "learning_rate": 4.043232841565106e-06, "loss": 0.9281, "step": 19020 }, { "epoch": 0.23186233288240526, "grad_norm": 2.198124408721924, "learning_rate": 4.042912123155869e-06, "loss": 0.7988, "step": 19025 }, { "epoch": 0.2319232691065531, "grad_norm": 1.8176190853118896, "learning_rate": 4.042591404746633e-06, "loss": 0.9433, "step": 19030 }, { "epoch": 0.23198420533070088, "grad_norm": 1.762030005455017, "learning_rate": 4.042270686337396e-06, "loss": 0.9471, "step": 19035 }, { "epoch": 0.2320451415548487, "grad_norm": 2.2579338550567627, "learning_rate": 4.041949967928159e-06, "loss": 0.9047, "step": 19040 }, { "epoch": 0.2321060777789965, "grad_norm": 1.8535661697387695, "learning_rate": 4.041629249518923e-06, "loss": 0.7964, "step": 19045 }, { "epoch": 0.23216701400314432, "grad_norm": 2.290147066116333, "learning_rate": 4.041308531109686e-06, "loss": 0.9029, "step": 19050 }, { "epoch": 0.23222795022729212, "grad_norm": 2.311534881591797, "learning_rate": 4.040987812700449e-06, "loss": 0.9195, "step": 19055 }, { "epoch": 0.2322888864514399, "grad_norm": 1.9989516735076904, "learning_rate": 4.040667094291212e-06, "loss": 0.9109, "step": 19060 }, { "epoch": 0.23234982267558774, "grad_norm": 2.1161320209503174, "learning_rate": 4.040346375881976e-06, "loss": 0.9195, "step": 19065 }, { "epoch": 0.23241075889973553, "grad_norm": 1.9006022214889526, "learning_rate": 4.040025657472739e-06, "loss": 0.8819, "step": 19070 }, { "epoch": 0.23247169512388335, "grad_norm": 2.052781105041504, "learning_rate": 4.039704939063503e-06, "loss": 0.9389, "step": 19075 }, { "epoch": 0.23253263134803115, "grad_norm": 1.9884896278381348, "learning_rate": 4.039384220654266e-06, "loss": 1.0474, "step": 19080 }, { "epoch": 0.23259356757217894, "grad_norm": 1.718116044998169, "learning_rate": 4.039063502245029e-06, "loss": 0.8941, "step": 19085 }, { "epoch": 0.23265450379632677, "grad_norm": 1.9366874694824219, "learning_rate": 4.038742783835793e-06, "loss": 0.8097, "step": 19090 }, { "epoch": 0.23271544002047456, "grad_norm": 2.5364208221435547, "learning_rate": 4.038422065426556e-06, "loss": 0.89, "step": 19095 }, { "epoch": 0.23277637624462238, "grad_norm": 1.6683229207992554, "learning_rate": 4.0381013470173195e-06, "loss": 0.8712, "step": 19100 }, { "epoch": 0.23283731246877018, "grad_norm": 1.7070444822311401, "learning_rate": 4.0377806286080826e-06, "loss": 0.83, "step": 19105 }, { "epoch": 0.232898248692918, "grad_norm": 2.32501220703125, "learning_rate": 4.037459910198846e-06, "loss": 0.9242, "step": 19110 }, { "epoch": 0.2329591849170658, "grad_norm": 1.9148492813110352, "learning_rate": 4.0371391917896094e-06, "loss": 0.8194, "step": 19115 }, { "epoch": 0.2330201211412136, "grad_norm": 1.9670644998550415, "learning_rate": 4.0368184733803725e-06, "loss": 0.8142, "step": 19120 }, { "epoch": 0.23308105736536142, "grad_norm": 2.056638479232788, "learning_rate": 4.036497754971136e-06, "loss": 0.8314, "step": 19125 }, { "epoch": 0.2331419935895092, "grad_norm": 2.2320358753204346, "learning_rate": 4.036177036561899e-06, "loss": 0.9543, "step": 19130 }, { "epoch": 0.23320292981365703, "grad_norm": 1.9483445882797241, "learning_rate": 4.035856318152662e-06, "loss": 0.9137, "step": 19135 }, { "epoch": 0.23326386603780483, "grad_norm": 2.0842106342315674, "learning_rate": 4.035535599743425e-06, "loss": 0.8759, "step": 19140 }, { "epoch": 0.23332480226195265, "grad_norm": 2.073651075363159, "learning_rate": 4.035214881334189e-06, "loss": 0.953, "step": 19145 }, { "epoch": 0.23338573848610045, "grad_norm": 1.9425280094146729, "learning_rate": 4.034894162924952e-06, "loss": 0.872, "step": 19150 }, { "epoch": 0.23344667471024824, "grad_norm": 2.7429590225219727, "learning_rate": 4.034573444515715e-06, "loss": 0.9925, "step": 19155 }, { "epoch": 0.23350761093439606, "grad_norm": 2.0003554821014404, "learning_rate": 4.034252726106479e-06, "loss": 0.8231, "step": 19160 }, { "epoch": 0.23356854715854386, "grad_norm": 1.698695182800293, "learning_rate": 4.033932007697242e-06, "loss": 0.7874, "step": 19165 }, { "epoch": 0.23362948338269168, "grad_norm": 1.839954137802124, "learning_rate": 4.033611289288005e-06, "loss": 0.8536, "step": 19170 }, { "epoch": 0.23369041960683948, "grad_norm": 1.7063325643539429, "learning_rate": 4.033290570878769e-06, "loss": 0.8738, "step": 19175 }, { "epoch": 0.2337513558309873, "grad_norm": 1.8720989227294922, "learning_rate": 4.032969852469532e-06, "loss": 0.9528, "step": 19180 }, { "epoch": 0.2338122920551351, "grad_norm": 2.1862151622772217, "learning_rate": 4.032649134060295e-06, "loss": 0.7962, "step": 19185 }, { "epoch": 0.2338732282792829, "grad_norm": 1.9090533256530762, "learning_rate": 4.032328415651059e-06, "loss": 0.9139, "step": 19190 }, { "epoch": 0.2339341645034307, "grad_norm": 1.768202304840088, "learning_rate": 4.032007697241822e-06, "loss": 0.9028, "step": 19195 }, { "epoch": 0.2339951007275785, "grad_norm": 2.0202958583831787, "learning_rate": 4.031686978832585e-06, "loss": 0.8407, "step": 19200 }, { "epoch": 0.23405603695172633, "grad_norm": 2.0324130058288574, "learning_rate": 4.031366260423349e-06, "loss": 0.8774, "step": 19205 }, { "epoch": 0.23411697317587413, "grad_norm": 1.9031805992126465, "learning_rate": 4.031045542014112e-06, "loss": 0.8802, "step": 19210 }, { "epoch": 0.23417790940002195, "grad_norm": 2.009974241256714, "learning_rate": 4.030724823604875e-06, "loss": 0.9197, "step": 19215 }, { "epoch": 0.23423884562416974, "grad_norm": 2.0508790016174316, "learning_rate": 4.030404105195638e-06, "loss": 0.8697, "step": 19220 }, { "epoch": 0.23429978184831754, "grad_norm": 2.046847105026245, "learning_rate": 4.030083386786402e-06, "loss": 0.9104, "step": 19225 }, { "epoch": 0.23436071807246536, "grad_norm": 2.389946937561035, "learning_rate": 4.029762668377165e-06, "loss": 0.8716, "step": 19230 }, { "epoch": 0.23442165429661316, "grad_norm": 1.8781136274337769, "learning_rate": 4.029441949967929e-06, "loss": 0.8756, "step": 19235 }, { "epoch": 0.23448259052076098, "grad_norm": 2.099588394165039, "learning_rate": 4.029121231558692e-06, "loss": 0.8448, "step": 19240 }, { "epoch": 0.23454352674490878, "grad_norm": 1.7871644496917725, "learning_rate": 4.0288005131494555e-06, "loss": 0.863, "step": 19245 }, { "epoch": 0.2346044629690566, "grad_norm": 2.136059522628784, "learning_rate": 4.0284797947402185e-06, "loss": 0.844, "step": 19250 }, { "epoch": 0.2346653991932044, "grad_norm": 1.6290377378463745, "learning_rate": 4.0281590763309815e-06, "loss": 0.8316, "step": 19255 }, { "epoch": 0.2347263354173522, "grad_norm": 2.3084025382995605, "learning_rate": 4.027838357921745e-06, "loss": 0.8318, "step": 19260 }, { "epoch": 0.2347872716415, "grad_norm": 1.9177720546722412, "learning_rate": 4.027517639512508e-06, "loss": 0.8279, "step": 19265 }, { "epoch": 0.2348482078656478, "grad_norm": 2.0827012062072754, "learning_rate": 4.027196921103272e-06, "loss": 0.836, "step": 19270 }, { "epoch": 0.23490914408979563, "grad_norm": 1.5986769199371338, "learning_rate": 4.026876202694035e-06, "loss": 0.8687, "step": 19275 }, { "epoch": 0.23497008031394342, "grad_norm": 1.8391985893249512, "learning_rate": 4.026555484284798e-06, "loss": 0.8997, "step": 19280 }, { "epoch": 0.23503101653809125, "grad_norm": 1.8008825778961182, "learning_rate": 4.026234765875562e-06, "loss": 0.8882, "step": 19285 }, { "epoch": 0.23509195276223904, "grad_norm": 1.889499306678772, "learning_rate": 4.025914047466325e-06, "loss": 0.8915, "step": 19290 }, { "epoch": 0.23515288898638684, "grad_norm": 2.4606757164001465, "learning_rate": 4.025593329057088e-06, "loss": 0.9789, "step": 19295 }, { "epoch": 0.23521382521053466, "grad_norm": 1.8305716514587402, "learning_rate": 4.025272610647851e-06, "loss": 0.8754, "step": 19300 }, { "epoch": 0.23527476143468246, "grad_norm": 1.728247046470642, "learning_rate": 4.024951892238615e-06, "loss": 0.8714, "step": 19305 }, { "epoch": 0.23533569765883028, "grad_norm": 1.8976150751113892, "learning_rate": 4.024631173829378e-06, "loss": 0.7969, "step": 19310 }, { "epoch": 0.23539663388297807, "grad_norm": 1.645179271697998, "learning_rate": 4.024310455420141e-06, "loss": 0.816, "step": 19315 }, { "epoch": 0.23545757010712587, "grad_norm": 1.9351493120193481, "learning_rate": 4.023989737010905e-06, "loss": 0.8734, "step": 19320 }, { "epoch": 0.2355185063312737, "grad_norm": 1.737830400466919, "learning_rate": 4.023669018601668e-06, "loss": 0.9799, "step": 19325 }, { "epoch": 0.2355794425554215, "grad_norm": 1.9602371454238892, "learning_rate": 4.023348300192431e-06, "loss": 0.9087, "step": 19330 }, { "epoch": 0.2356403787795693, "grad_norm": 2.0417540073394775, "learning_rate": 4.023027581783195e-06, "loss": 0.866, "step": 19335 }, { "epoch": 0.2357013150037171, "grad_norm": 2.1378376483917236, "learning_rate": 4.022706863373958e-06, "loss": 0.9085, "step": 19340 }, { "epoch": 0.23576225122786493, "grad_norm": 2.398854970932007, "learning_rate": 4.022386144964721e-06, "loss": 0.8763, "step": 19345 }, { "epoch": 0.23582318745201272, "grad_norm": 2.66814923286438, "learning_rate": 4.022065426555485e-06, "loss": 0.8825, "step": 19350 }, { "epoch": 0.23588412367616052, "grad_norm": 1.705403447151184, "learning_rate": 4.021744708146248e-06, "loss": 0.8885, "step": 19355 }, { "epoch": 0.23594505990030834, "grad_norm": 2.0696587562561035, "learning_rate": 4.021423989737011e-06, "loss": 1.0282, "step": 19360 }, { "epoch": 0.23600599612445614, "grad_norm": 2.0829782485961914, "learning_rate": 4.021103271327775e-06, "loss": 0.8732, "step": 19365 }, { "epoch": 0.23606693234860396, "grad_norm": 2.084188222885132, "learning_rate": 4.020782552918538e-06, "loss": 0.8001, "step": 19370 }, { "epoch": 0.23612786857275175, "grad_norm": 2.046260118484497, "learning_rate": 4.020461834509301e-06, "loss": 0.8321, "step": 19375 }, { "epoch": 0.23618880479689958, "grad_norm": 1.9790146350860596, "learning_rate": 4.020141116100065e-06, "loss": 0.8856, "step": 19380 }, { "epoch": 0.23624974102104737, "grad_norm": 1.8600516319274902, "learning_rate": 4.019820397690828e-06, "loss": 0.907, "step": 19385 }, { "epoch": 0.23631067724519517, "grad_norm": 2.0773680210113525, "learning_rate": 4.019499679281591e-06, "loss": 0.8901, "step": 19390 }, { "epoch": 0.236371613469343, "grad_norm": 1.9729554653167725, "learning_rate": 4.0191789608723545e-06, "loss": 0.9045, "step": 19395 }, { "epoch": 0.23643254969349078, "grad_norm": 1.8572821617126465, "learning_rate": 4.0188582424631175e-06, "loss": 0.9031, "step": 19400 }, { "epoch": 0.2364934859176386, "grad_norm": 2.2868762016296387, "learning_rate": 4.018537524053881e-06, "loss": 0.8716, "step": 19405 }, { "epoch": 0.2365544221417864, "grad_norm": 2.0339603424072266, "learning_rate": 4.018216805644644e-06, "loss": 0.9535, "step": 19410 }, { "epoch": 0.23661535836593423, "grad_norm": 2.220261335372925, "learning_rate": 4.017896087235407e-06, "loss": 0.9581, "step": 19415 }, { "epoch": 0.23667629459008202, "grad_norm": 2.0185139179229736, "learning_rate": 4.017575368826171e-06, "loss": 0.8433, "step": 19420 }, { "epoch": 0.23673723081422982, "grad_norm": 2.2248716354370117, "learning_rate": 4.017254650416934e-06, "loss": 0.8241, "step": 19425 }, { "epoch": 0.23679816703837764, "grad_norm": 2.123487949371338, "learning_rate": 4.016933932007698e-06, "loss": 0.8738, "step": 19430 }, { "epoch": 0.23685910326252543, "grad_norm": 1.9537124633789062, "learning_rate": 4.016613213598461e-06, "loss": 0.8832, "step": 19435 }, { "epoch": 0.23692003948667326, "grad_norm": 1.8691970109939575, "learning_rate": 4.016292495189224e-06, "loss": 0.8347, "step": 19440 }, { "epoch": 0.23698097571082105, "grad_norm": 2.2838146686553955, "learning_rate": 4.015971776779988e-06, "loss": 0.8842, "step": 19445 }, { "epoch": 0.23704191193496887, "grad_norm": 1.9978458881378174, "learning_rate": 4.015651058370751e-06, "loss": 0.8721, "step": 19450 }, { "epoch": 0.23710284815911667, "grad_norm": 1.8258254528045654, "learning_rate": 4.015330339961514e-06, "loss": 0.9352, "step": 19455 }, { "epoch": 0.23716378438326446, "grad_norm": 2.025703191757202, "learning_rate": 4.015009621552278e-06, "loss": 0.9432, "step": 19460 }, { "epoch": 0.2372247206074123, "grad_norm": 2.0500078201293945, "learning_rate": 4.014688903143041e-06, "loss": 0.94, "step": 19465 }, { "epoch": 0.23728565683156008, "grad_norm": 2.185725688934326, "learning_rate": 4.014368184733804e-06, "loss": 0.9177, "step": 19470 }, { "epoch": 0.2373465930557079, "grad_norm": 1.9074784517288208, "learning_rate": 4.014047466324567e-06, "loss": 0.8672, "step": 19475 }, { "epoch": 0.2374075292798557, "grad_norm": 2.421517848968506, "learning_rate": 4.013726747915331e-06, "loss": 0.8811, "step": 19480 }, { "epoch": 0.23746846550400352, "grad_norm": 2.0851173400878906, "learning_rate": 4.013406029506094e-06, "loss": 0.8265, "step": 19485 }, { "epoch": 0.23752940172815132, "grad_norm": 2.2967517375946045, "learning_rate": 4.013085311096857e-06, "loss": 0.8354, "step": 19490 }, { "epoch": 0.2375903379522991, "grad_norm": 2.1521708965301514, "learning_rate": 4.012764592687621e-06, "loss": 0.8572, "step": 19495 }, { "epoch": 0.23765127417644694, "grad_norm": 1.9701871871948242, "learning_rate": 4.012443874278384e-06, "loss": 0.9061, "step": 19500 }, { "epoch": 0.23771221040059473, "grad_norm": 1.7279490232467651, "learning_rate": 4.012123155869147e-06, "loss": 0.854, "step": 19505 }, { "epoch": 0.23777314662474255, "grad_norm": 2.177211284637451, "learning_rate": 4.011802437459911e-06, "loss": 0.9061, "step": 19510 }, { "epoch": 0.23783408284889035, "grad_norm": 2.7986199855804443, "learning_rate": 4.011481719050674e-06, "loss": 0.8676, "step": 19515 }, { "epoch": 0.23789501907303817, "grad_norm": 2.0607478618621826, "learning_rate": 4.011161000641437e-06, "loss": 0.8295, "step": 19520 }, { "epoch": 0.23795595529718597, "grad_norm": 1.6157352924346924, "learning_rate": 4.0108402822322005e-06, "loss": 0.7974, "step": 19525 }, { "epoch": 0.23801689152133376, "grad_norm": 1.8788713216781616, "learning_rate": 4.0105195638229636e-06, "loss": 0.7692, "step": 19530 }, { "epoch": 0.23807782774548158, "grad_norm": 1.7654625177383423, "learning_rate": 4.0101988454137266e-06, "loss": 0.85, "step": 19535 }, { "epoch": 0.23813876396962938, "grad_norm": 2.2578327655792236, "learning_rate": 4.0098781270044904e-06, "loss": 0.9154, "step": 19540 }, { "epoch": 0.2381997001937772, "grad_norm": 2.3565757274627686, "learning_rate": 4.0095574085952534e-06, "loss": 0.8595, "step": 19545 }, { "epoch": 0.238260636417925, "grad_norm": 2.100046396255493, "learning_rate": 4.009236690186017e-06, "loss": 0.8558, "step": 19550 }, { "epoch": 0.2383215726420728, "grad_norm": 2.4605214595794678, "learning_rate": 4.00891597177678e-06, "loss": 0.8638, "step": 19555 }, { "epoch": 0.23838250886622062, "grad_norm": 1.7967973947525024, "learning_rate": 4.008595253367543e-06, "loss": 0.9054, "step": 19560 }, { "epoch": 0.2384434450903684, "grad_norm": 1.981117606163025, "learning_rate": 4.008274534958307e-06, "loss": 0.8777, "step": 19565 }, { "epoch": 0.23850438131451623, "grad_norm": 1.8093947172164917, "learning_rate": 4.00795381654907e-06, "loss": 0.8951, "step": 19570 }, { "epoch": 0.23856531753866403, "grad_norm": 1.7118555307388306, "learning_rate": 4.007633098139834e-06, "loss": 0.9163, "step": 19575 }, { "epoch": 0.23862625376281185, "grad_norm": 1.855982780456543, "learning_rate": 4.007312379730597e-06, "loss": 0.8175, "step": 19580 }, { "epoch": 0.23868718998695965, "grad_norm": 2.151484966278076, "learning_rate": 4.00699166132136e-06, "loss": 0.9013, "step": 19585 }, { "epoch": 0.23874812621110744, "grad_norm": 1.7996001243591309, "learning_rate": 4.006670942912124e-06, "loss": 0.8764, "step": 19590 }, { "epoch": 0.23880906243525526, "grad_norm": 1.9039788246154785, "learning_rate": 4.006350224502887e-06, "loss": 0.8673, "step": 19595 }, { "epoch": 0.23886999865940306, "grad_norm": 1.6479182243347168, "learning_rate": 4.00602950609365e-06, "loss": 0.8898, "step": 19600 }, { "epoch": 0.23893093488355088, "grad_norm": 2.3539159297943115, "learning_rate": 4.005708787684414e-06, "loss": 0.848, "step": 19605 }, { "epoch": 0.23899187110769868, "grad_norm": 2.273139238357544, "learning_rate": 4.005388069275177e-06, "loss": 0.7888, "step": 19610 }, { "epoch": 0.2390528073318465, "grad_norm": 2.0919177532196045, "learning_rate": 4.00506735086594e-06, "loss": 0.8382, "step": 19615 }, { "epoch": 0.2391137435559943, "grad_norm": 2.0388519763946533, "learning_rate": 4.004746632456704e-06, "loss": 0.9185, "step": 19620 }, { "epoch": 0.2391746797801421, "grad_norm": 2.0555949211120605, "learning_rate": 4.004425914047467e-06, "loss": 0.8776, "step": 19625 }, { "epoch": 0.2392356160042899, "grad_norm": 2.2792623043060303, "learning_rate": 4.00410519563823e-06, "loss": 0.9017, "step": 19630 }, { "epoch": 0.2392965522284377, "grad_norm": 1.8325453996658325, "learning_rate": 4.003784477228994e-06, "loss": 0.8635, "step": 19635 }, { "epoch": 0.23935748845258553, "grad_norm": 2.069856643676758, "learning_rate": 4.003463758819757e-06, "loss": 0.8149, "step": 19640 }, { "epoch": 0.23941842467673333, "grad_norm": 2.166717052459717, "learning_rate": 4.00314304041052e-06, "loss": 0.8569, "step": 19645 }, { "epoch": 0.23947936090088115, "grad_norm": 1.8062973022460938, "learning_rate": 4.002822322001283e-06, "loss": 0.8422, "step": 19650 }, { "epoch": 0.23954029712502894, "grad_norm": 1.5748616456985474, "learning_rate": 4.002501603592047e-06, "loss": 0.8163, "step": 19655 }, { "epoch": 0.23960123334917674, "grad_norm": 1.9272775650024414, "learning_rate": 4.00218088518281e-06, "loss": 0.9111, "step": 19660 }, { "epoch": 0.23966216957332456, "grad_norm": 1.7508589029312134, "learning_rate": 4.001860166773573e-06, "loss": 0.872, "step": 19665 }, { "epoch": 0.23972310579747236, "grad_norm": 1.7349071502685547, "learning_rate": 4.0015394483643365e-06, "loss": 0.84, "step": 19670 }, { "epoch": 0.23978404202162018, "grad_norm": 2.032625436782837, "learning_rate": 4.0012187299550995e-06, "loss": 0.8222, "step": 19675 }, { "epoch": 0.23984497824576798, "grad_norm": 2.2298882007598877, "learning_rate": 4.0008980115458625e-06, "loss": 0.8404, "step": 19680 }, { "epoch": 0.2399059144699158, "grad_norm": 2.13055682182312, "learning_rate": 4.000577293136626e-06, "loss": 0.8623, "step": 19685 }, { "epoch": 0.2399668506940636, "grad_norm": 1.958418369293213, "learning_rate": 4.000256574727389e-06, "loss": 0.8071, "step": 19690 }, { "epoch": 0.2400277869182114, "grad_norm": 1.9061728715896606, "learning_rate": 3.999935856318152e-06, "loss": 0.8802, "step": 19695 }, { "epoch": 0.2400887231423592, "grad_norm": 2.112729072570801, "learning_rate": 3.999615137908916e-06, "loss": 0.8817, "step": 19700 }, { "epoch": 0.240149659366507, "grad_norm": 1.8137043714523315, "learning_rate": 3.999294419499679e-06, "loss": 0.8244, "step": 19705 }, { "epoch": 0.24021059559065483, "grad_norm": 1.8908439874649048, "learning_rate": 3.998973701090443e-06, "loss": 0.8921, "step": 19710 }, { "epoch": 0.24027153181480262, "grad_norm": 2.2568154335021973, "learning_rate": 3.998652982681206e-06, "loss": 0.8607, "step": 19715 }, { "epoch": 0.24033246803895045, "grad_norm": 2.031553030014038, "learning_rate": 3.99833226427197e-06, "loss": 0.8605, "step": 19720 }, { "epoch": 0.24039340426309824, "grad_norm": 1.6654366254806519, "learning_rate": 3.998011545862733e-06, "loss": 0.8623, "step": 19725 }, { "epoch": 0.24045434048724604, "grad_norm": 1.8054828643798828, "learning_rate": 3.997690827453496e-06, "loss": 0.8726, "step": 19730 }, { "epoch": 0.24051527671139386, "grad_norm": 2.163785934448242, "learning_rate": 3.99737010904426e-06, "loss": 0.8783, "step": 19735 }, { "epoch": 0.24057621293554166, "grad_norm": 2.0084433555603027, "learning_rate": 3.997049390635023e-06, "loss": 0.8807, "step": 19740 }, { "epoch": 0.24063714915968948, "grad_norm": 1.999938726425171, "learning_rate": 3.996728672225786e-06, "loss": 0.8444, "step": 19745 }, { "epoch": 0.24069808538383727, "grad_norm": 1.7599295377731323, "learning_rate": 3.99640795381655e-06, "loss": 0.9192, "step": 19750 }, { "epoch": 0.2407590216079851, "grad_norm": 2.3643672466278076, "learning_rate": 3.996087235407313e-06, "loss": 0.9834, "step": 19755 }, { "epoch": 0.2408199578321329, "grad_norm": 2.097561836242676, "learning_rate": 3.995766516998076e-06, "loss": 0.9043, "step": 19760 }, { "epoch": 0.2408808940562807, "grad_norm": 1.9636703729629517, "learning_rate": 3.99544579858884e-06, "loss": 0.8297, "step": 19765 }, { "epoch": 0.2409418302804285, "grad_norm": 1.8893511295318604, "learning_rate": 3.995125080179603e-06, "loss": 0.8319, "step": 19770 }, { "epoch": 0.2410027665045763, "grad_norm": 1.8731379508972168, "learning_rate": 3.994804361770366e-06, "loss": 0.8984, "step": 19775 }, { "epoch": 0.24106370272872413, "grad_norm": 2.0021753311157227, "learning_rate": 3.99448364336113e-06, "loss": 0.9248, "step": 19780 }, { "epoch": 0.24112463895287192, "grad_norm": 2.0738954544067383, "learning_rate": 3.994162924951893e-06, "loss": 0.9259, "step": 19785 }, { "epoch": 0.24118557517701972, "grad_norm": 2.1361172199249268, "learning_rate": 3.993842206542656e-06, "loss": 0.8666, "step": 19790 }, { "epoch": 0.24124651140116754, "grad_norm": 1.6912981271743774, "learning_rate": 3.9935214881334195e-06, "loss": 0.9102, "step": 19795 }, { "epoch": 0.24130744762531534, "grad_norm": 2.2773053646087646, "learning_rate": 3.9932007697241826e-06, "loss": 0.9674, "step": 19800 }, { "epoch": 0.24136838384946316, "grad_norm": 1.9203500747680664, "learning_rate": 3.9928800513149456e-06, "loss": 0.8949, "step": 19805 }, { "epoch": 0.24142932007361095, "grad_norm": 3.0278825759887695, "learning_rate": 3.992559332905709e-06, "loss": 0.887, "step": 19810 }, { "epoch": 0.24149025629775878, "grad_norm": 1.9897805452346802, "learning_rate": 3.9922386144964724e-06, "loss": 0.7788, "step": 19815 }, { "epoch": 0.24155119252190657, "grad_norm": 1.9438616037368774, "learning_rate": 3.9919178960872355e-06, "loss": 0.9086, "step": 19820 }, { "epoch": 0.24161212874605437, "grad_norm": 1.823204755783081, "learning_rate": 3.9915971776779985e-06, "loss": 0.8734, "step": 19825 }, { "epoch": 0.2416730649702022, "grad_norm": 1.6896013021469116, "learning_rate": 3.991276459268762e-06, "loss": 0.878, "step": 19830 }, { "epoch": 0.24173400119434998, "grad_norm": 2.3057634830474854, "learning_rate": 3.990955740859525e-06, "loss": 0.8881, "step": 19835 }, { "epoch": 0.2417949374184978, "grad_norm": 1.7041480541229248, "learning_rate": 3.990635022450288e-06, "loss": 0.8329, "step": 19840 }, { "epoch": 0.2418558736426456, "grad_norm": 1.9940372705459595, "learning_rate": 3.990314304041052e-06, "loss": 0.8457, "step": 19845 }, { "epoch": 0.24191680986679343, "grad_norm": 2.272329092025757, "learning_rate": 3.989993585631815e-06, "loss": 0.8235, "step": 19850 }, { "epoch": 0.24197774609094122, "grad_norm": 2.1512868404388428, "learning_rate": 3.989672867222579e-06, "loss": 0.9064, "step": 19855 }, { "epoch": 0.24203868231508902, "grad_norm": 1.8232367038726807, "learning_rate": 3.989352148813342e-06, "loss": 0.8444, "step": 19860 }, { "epoch": 0.24209961853923684, "grad_norm": 3.07846736907959, "learning_rate": 3.989031430404105e-06, "loss": 0.9235, "step": 19865 }, { "epoch": 0.24216055476338463, "grad_norm": 2.252542018890381, "learning_rate": 3.988710711994869e-06, "loss": 0.8333, "step": 19870 }, { "epoch": 0.24222149098753246, "grad_norm": 2.261488437652588, "learning_rate": 3.988389993585632e-06, "loss": 0.774, "step": 19875 }, { "epoch": 0.24228242721168025, "grad_norm": 1.8444559574127197, "learning_rate": 3.988069275176396e-06, "loss": 0.8593, "step": 19880 }, { "epoch": 0.24234336343582807, "grad_norm": 2.369457721710205, "learning_rate": 3.987748556767159e-06, "loss": 0.9036, "step": 19885 }, { "epoch": 0.24240429965997587, "grad_norm": 2.0635063648223877, "learning_rate": 3.987427838357922e-06, "loss": 0.8797, "step": 19890 }, { "epoch": 0.24246523588412366, "grad_norm": 1.8108925819396973, "learning_rate": 3.987107119948686e-06, "loss": 0.8875, "step": 19895 }, { "epoch": 0.2425261721082715, "grad_norm": 1.9640955924987793, "learning_rate": 3.986786401539449e-06, "loss": 0.9553, "step": 19900 }, { "epoch": 0.24258710833241928, "grad_norm": 1.8387409448623657, "learning_rate": 3.986465683130212e-06, "loss": 0.9033, "step": 19905 }, { "epoch": 0.2426480445565671, "grad_norm": 2.1657378673553467, "learning_rate": 3.986144964720976e-06, "loss": 0.822, "step": 19910 }, { "epoch": 0.2427089807807149, "grad_norm": 1.9317892789840698, "learning_rate": 3.985824246311739e-06, "loss": 0.9689, "step": 19915 }, { "epoch": 0.24276991700486272, "grad_norm": 1.939085841178894, "learning_rate": 3.985503527902502e-06, "loss": 0.8699, "step": 19920 }, { "epoch": 0.24283085322901052, "grad_norm": 1.99168860912323, "learning_rate": 3.985182809493266e-06, "loss": 0.853, "step": 19925 }, { "epoch": 0.2428917894531583, "grad_norm": 1.704331398010254, "learning_rate": 3.984862091084029e-06, "loss": 0.8575, "step": 19930 }, { "epoch": 0.24295272567730614, "grad_norm": 1.8084568977355957, "learning_rate": 3.984541372674792e-06, "loss": 0.8345, "step": 19935 }, { "epoch": 0.24301366190145393, "grad_norm": 1.7955260276794434, "learning_rate": 3.9842206542655555e-06, "loss": 0.8783, "step": 19940 }, { "epoch": 0.24307459812560175, "grad_norm": 1.6253100633621216, "learning_rate": 3.9838999358563185e-06, "loss": 0.8991, "step": 19945 }, { "epoch": 0.24313553434974955, "grad_norm": 2.0713863372802734, "learning_rate": 3.9835792174470815e-06, "loss": 0.7887, "step": 19950 }, { "epoch": 0.24319647057389737, "grad_norm": 1.8154845237731934, "learning_rate": 3.983258499037845e-06, "loss": 0.8497, "step": 19955 }, { "epoch": 0.24325740679804517, "grad_norm": 1.8937398195266724, "learning_rate": 3.982937780628608e-06, "loss": 0.9566, "step": 19960 }, { "epoch": 0.24331834302219296, "grad_norm": 2.0144741535186768, "learning_rate": 3.982617062219371e-06, "loss": 0.8356, "step": 19965 }, { "epoch": 0.24337927924634079, "grad_norm": 1.7492940425872803, "learning_rate": 3.982296343810135e-06, "loss": 0.878, "step": 19970 }, { "epoch": 0.24344021547048858, "grad_norm": 1.879436731338501, "learning_rate": 3.981975625400898e-06, "loss": 0.8803, "step": 19975 }, { "epoch": 0.2435011516946364, "grad_norm": 2.4977118968963623, "learning_rate": 3.981654906991661e-06, "loss": 0.8012, "step": 19980 }, { "epoch": 0.2435620879187842, "grad_norm": 2.4708621501922607, "learning_rate": 3.981334188582424e-06, "loss": 0.8415, "step": 19985 }, { "epoch": 0.24362302414293202, "grad_norm": 1.7359098196029663, "learning_rate": 3.981013470173188e-06, "loss": 0.8734, "step": 19990 }, { "epoch": 0.24368396036707982, "grad_norm": 2.257596969604492, "learning_rate": 3.980692751763951e-06, "loss": 0.84, "step": 19995 }, { "epoch": 0.2437448965912276, "grad_norm": 2.101271867752075, "learning_rate": 3.980372033354715e-06, "loss": 0.8967, "step": 20000 }, { "epoch": 0.24380583281537543, "grad_norm": 2.0905001163482666, "learning_rate": 3.980051314945478e-06, "loss": 0.8229, "step": 20005 }, { "epoch": 0.24386676903952323, "grad_norm": 1.9016568660736084, "learning_rate": 3.979730596536241e-06, "loss": 0.8696, "step": 20010 }, { "epoch": 0.24392770526367105, "grad_norm": 2.1826019287109375, "learning_rate": 3.979409878127005e-06, "loss": 0.9456, "step": 20015 }, { "epoch": 0.24398864148781885, "grad_norm": 1.7798550128936768, "learning_rate": 3.979089159717768e-06, "loss": 0.9284, "step": 20020 }, { "epoch": 0.24404957771196664, "grad_norm": 2.2939257621765137, "learning_rate": 3.978768441308532e-06, "loss": 0.9373, "step": 20025 }, { "epoch": 0.24411051393611446, "grad_norm": 2.215803623199463, "learning_rate": 3.978447722899295e-06, "loss": 0.8593, "step": 20030 }, { "epoch": 0.24417145016026226, "grad_norm": 2.2049875259399414, "learning_rate": 3.978127004490058e-06, "loss": 0.8772, "step": 20035 }, { "epoch": 0.24423238638441008, "grad_norm": 1.8906055688858032, "learning_rate": 3.977806286080822e-06, "loss": 0.8716, "step": 20040 }, { "epoch": 0.24429332260855788, "grad_norm": 2.049525737762451, "learning_rate": 3.977485567671585e-06, "loss": 0.8357, "step": 20045 }, { "epoch": 0.2443542588327057, "grad_norm": 2.012098550796509, "learning_rate": 3.977164849262349e-06, "loss": 0.9118, "step": 20050 }, { "epoch": 0.2444151950568535, "grad_norm": 1.8973057270050049, "learning_rate": 3.976844130853112e-06, "loss": 0.8619, "step": 20055 }, { "epoch": 0.2444761312810013, "grad_norm": 1.9980216026306152, "learning_rate": 3.976523412443875e-06, "loss": 0.8878, "step": 20060 }, { "epoch": 0.2445370675051491, "grad_norm": 2.0917251110076904, "learning_rate": 3.976202694034638e-06, "loss": 0.8441, "step": 20065 }, { "epoch": 0.2445980037292969, "grad_norm": 1.7851181030273438, "learning_rate": 3.9758819756254016e-06, "loss": 0.8178, "step": 20070 }, { "epoch": 0.24465893995344473, "grad_norm": 1.8354523181915283, "learning_rate": 3.9755612572161646e-06, "loss": 0.8778, "step": 20075 }, { "epoch": 0.24471987617759253, "grad_norm": 1.6982712745666504, "learning_rate": 3.975240538806928e-06, "loss": 0.9144, "step": 20080 }, { "epoch": 0.24478081240174035, "grad_norm": 2.2253825664520264, "learning_rate": 3.9749198203976914e-06, "loss": 0.7952, "step": 20085 }, { "epoch": 0.24484174862588814, "grad_norm": 1.910660743713379, "learning_rate": 3.9745991019884545e-06, "loss": 0.8899, "step": 20090 }, { "epoch": 0.24490268485003594, "grad_norm": 2.44638991355896, "learning_rate": 3.9742783835792175e-06, "loss": 0.8277, "step": 20095 }, { "epoch": 0.24496362107418376, "grad_norm": 1.8824032545089722, "learning_rate": 3.973957665169981e-06, "loss": 0.9241, "step": 20100 }, { "epoch": 0.24502455729833156, "grad_norm": 2.0053019523620605, "learning_rate": 3.973636946760744e-06, "loss": 0.8947, "step": 20105 }, { "epoch": 0.24508549352247938, "grad_norm": 2.021008253097534, "learning_rate": 3.973316228351507e-06, "loss": 0.8569, "step": 20110 }, { "epoch": 0.24514642974662718, "grad_norm": 1.9714910984039307, "learning_rate": 3.972995509942271e-06, "loss": 0.8602, "step": 20115 }, { "epoch": 0.245207365970775, "grad_norm": 2.1408469676971436, "learning_rate": 3.972674791533034e-06, "loss": 0.9135, "step": 20120 }, { "epoch": 0.2452683021949228, "grad_norm": 2.044102668762207, "learning_rate": 3.972354073123797e-06, "loss": 0.875, "step": 20125 }, { "epoch": 0.2453292384190706, "grad_norm": 1.8303337097167969, "learning_rate": 3.972033354714561e-06, "loss": 0.8829, "step": 20130 }, { "epoch": 0.2453901746432184, "grad_norm": 1.8730727434158325, "learning_rate": 3.971712636305324e-06, "loss": 0.8299, "step": 20135 }, { "epoch": 0.2454511108673662, "grad_norm": 1.6662976741790771, "learning_rate": 3.971391917896087e-06, "loss": 0.8914, "step": 20140 }, { "epoch": 0.24551204709151403, "grad_norm": 2.213547706604004, "learning_rate": 3.97107119948685e-06, "loss": 0.8688, "step": 20145 }, { "epoch": 0.24557298331566182, "grad_norm": 1.9667208194732666, "learning_rate": 3.970750481077614e-06, "loss": 0.8871, "step": 20150 }, { "epoch": 0.24563391953980965, "grad_norm": 2.7964224815368652, "learning_rate": 3.970429762668377e-06, "loss": 0.9587, "step": 20155 }, { "epoch": 0.24569485576395744, "grad_norm": 2.0893547534942627, "learning_rate": 3.970109044259141e-06, "loss": 0.8271, "step": 20160 }, { "epoch": 0.24575579198810524, "grad_norm": 1.8167531490325928, "learning_rate": 3.969788325849904e-06, "loss": 0.8458, "step": 20165 }, { "epoch": 0.24581672821225306, "grad_norm": 2.0208323001861572, "learning_rate": 3.969467607440668e-06, "loss": 0.889, "step": 20170 }, { "epoch": 0.24587766443640086, "grad_norm": 1.9900057315826416, "learning_rate": 3.969146889031431e-06, "loss": 0.8773, "step": 20175 }, { "epoch": 0.24593860066054868, "grad_norm": 2.7711403369903564, "learning_rate": 3.968826170622194e-06, "loss": 0.8412, "step": 20180 }, { "epoch": 0.24599953688469647, "grad_norm": 2.610698938369751, "learning_rate": 3.968505452212958e-06, "loss": 0.8466, "step": 20185 }, { "epoch": 0.2460604731088443, "grad_norm": 1.8968451023101807, "learning_rate": 3.968184733803721e-06, "loss": 0.8485, "step": 20190 }, { "epoch": 0.2461214093329921, "grad_norm": 1.8757811784744263, "learning_rate": 3.967864015394485e-06, "loss": 0.8983, "step": 20195 }, { "epoch": 0.2461823455571399, "grad_norm": 2.0803885459899902, "learning_rate": 3.967543296985248e-06, "loss": 0.8845, "step": 20200 }, { "epoch": 0.2462432817812877, "grad_norm": 1.7355930805206299, "learning_rate": 3.967222578576011e-06, "loss": 0.8231, "step": 20205 }, { "epoch": 0.2463042180054355, "grad_norm": 1.888188362121582, "learning_rate": 3.9669018601667745e-06, "loss": 0.9373, "step": 20210 }, { "epoch": 0.24636515422958333, "grad_norm": 1.9611009359359741, "learning_rate": 3.9665811417575375e-06, "loss": 0.9007, "step": 20215 }, { "epoch": 0.24642609045373112, "grad_norm": 2.1420106887817383, "learning_rate": 3.9662604233483005e-06, "loss": 0.8049, "step": 20220 }, { "epoch": 0.24648702667787895, "grad_norm": 1.9114516973495483, "learning_rate": 3.9659397049390635e-06, "loss": 0.8763, "step": 20225 }, { "epoch": 0.24654796290202674, "grad_norm": 2.103421211242676, "learning_rate": 3.965618986529827e-06, "loss": 0.8584, "step": 20230 }, { "epoch": 0.24660889912617454, "grad_norm": 1.8120564222335815, "learning_rate": 3.96529826812059e-06, "loss": 0.8617, "step": 20235 }, { "epoch": 0.24666983535032236, "grad_norm": 2.0189051628112793, "learning_rate": 3.9649775497113534e-06, "loss": 0.92, "step": 20240 }, { "epoch": 0.24673077157447015, "grad_norm": 1.7100919485092163, "learning_rate": 3.964656831302117e-06, "loss": 0.8888, "step": 20245 }, { "epoch": 0.24679170779861798, "grad_norm": 1.7010751962661743, "learning_rate": 3.96433611289288e-06, "loss": 0.878, "step": 20250 }, { "epoch": 0.24685264402276577, "grad_norm": 1.9818346500396729, "learning_rate": 3.964015394483643e-06, "loss": 0.8255, "step": 20255 }, { "epoch": 0.24691358024691357, "grad_norm": 2.2381792068481445, "learning_rate": 3.963694676074407e-06, "loss": 0.8706, "step": 20260 }, { "epoch": 0.2469745164710614, "grad_norm": 2.06204891204834, "learning_rate": 3.96337395766517e-06, "loss": 0.9104, "step": 20265 }, { "epoch": 0.24703545269520918, "grad_norm": 2.1997172832489014, "learning_rate": 3.963053239255933e-06, "loss": 0.9168, "step": 20270 }, { "epoch": 0.247096388919357, "grad_norm": 1.7947367429733276, "learning_rate": 3.962732520846697e-06, "loss": 0.931, "step": 20275 }, { "epoch": 0.2471573251435048, "grad_norm": 1.9506750106811523, "learning_rate": 3.96241180243746e-06, "loss": 0.864, "step": 20280 }, { "epoch": 0.24721826136765263, "grad_norm": 1.8549553155899048, "learning_rate": 3.962091084028223e-06, "loss": 0.8558, "step": 20285 }, { "epoch": 0.24727919759180042, "grad_norm": 1.913120150566101, "learning_rate": 3.961770365618987e-06, "loss": 0.9012, "step": 20290 }, { "epoch": 0.24734013381594822, "grad_norm": 1.9867440462112427, "learning_rate": 3.96144964720975e-06, "loss": 0.8731, "step": 20295 }, { "epoch": 0.24740107004009604, "grad_norm": 2.115830183029175, "learning_rate": 3.961128928800513e-06, "loss": 0.9036, "step": 20300 }, { "epoch": 0.24746200626424383, "grad_norm": 1.779584288597107, "learning_rate": 3.960808210391277e-06, "loss": 0.8463, "step": 20305 }, { "epoch": 0.24752294248839166, "grad_norm": 1.8221848011016846, "learning_rate": 3.96048749198204e-06, "loss": 0.8442, "step": 20310 }, { "epoch": 0.24758387871253945, "grad_norm": 1.8415415287017822, "learning_rate": 3.960166773572803e-06, "loss": 0.9106, "step": 20315 }, { "epoch": 0.24764481493668727, "grad_norm": 1.7721322774887085, "learning_rate": 3.959846055163567e-06, "loss": 0.9182, "step": 20320 }, { "epoch": 0.24770575116083507, "grad_norm": 2.314324378967285, "learning_rate": 3.95952533675433e-06, "loss": 0.8796, "step": 20325 }, { "epoch": 0.24776668738498286, "grad_norm": 1.871891975402832, "learning_rate": 3.959204618345094e-06, "loss": 0.8916, "step": 20330 }, { "epoch": 0.2478276236091307, "grad_norm": 2.0179593563079834, "learning_rate": 3.958883899935857e-06, "loss": 0.9069, "step": 20335 }, { "epoch": 0.24788855983327848, "grad_norm": 2.004131555557251, "learning_rate": 3.95856318152662e-06, "loss": 0.9388, "step": 20340 }, { "epoch": 0.2479494960574263, "grad_norm": 1.8060946464538574, "learning_rate": 3.9582424631173836e-06, "loss": 0.8555, "step": 20345 }, { "epoch": 0.2480104322815741, "grad_norm": 1.90669846534729, "learning_rate": 3.957921744708147e-06, "loss": 0.9066, "step": 20350 }, { "epoch": 0.24807136850572192, "grad_norm": 1.7960171699523926, "learning_rate": 3.9576010262989104e-06, "loss": 0.8929, "step": 20355 }, { "epoch": 0.24813230472986972, "grad_norm": 2.042306661605835, "learning_rate": 3.9572803078896735e-06, "loss": 0.9198, "step": 20360 }, { "epoch": 0.2481932409540175, "grad_norm": 2.0898313522338867, "learning_rate": 3.9569595894804365e-06, "loss": 0.8328, "step": 20365 }, { "epoch": 0.24825417717816534, "grad_norm": 2.0205576419830322, "learning_rate": 3.9566388710712e-06, "loss": 0.9005, "step": 20370 }, { "epoch": 0.24831511340231313, "grad_norm": 1.9837173223495483, "learning_rate": 3.956318152661963e-06, "loss": 0.9613, "step": 20375 }, { "epoch": 0.24837604962646095, "grad_norm": 2.326122999191284, "learning_rate": 3.955997434252726e-06, "loss": 0.8167, "step": 20380 }, { "epoch": 0.24843698585060875, "grad_norm": 2.185067892074585, "learning_rate": 3.95567671584349e-06, "loss": 0.8333, "step": 20385 }, { "epoch": 0.24849792207475657, "grad_norm": 2.3737313747406006, "learning_rate": 3.955355997434253e-06, "loss": 0.9506, "step": 20390 }, { "epoch": 0.24855885829890437, "grad_norm": 2.053720712661743, "learning_rate": 3.955035279025016e-06, "loss": 0.8729, "step": 20395 }, { "epoch": 0.24861979452305216, "grad_norm": 2.1456971168518066, "learning_rate": 3.954714560615779e-06, "loss": 0.8953, "step": 20400 }, { "epoch": 0.24868073074719999, "grad_norm": 1.8431895971298218, "learning_rate": 3.954393842206543e-06, "loss": 0.81, "step": 20405 }, { "epoch": 0.24874166697134778, "grad_norm": 2.1497702598571777, "learning_rate": 3.954073123797306e-06, "loss": 0.8896, "step": 20410 }, { "epoch": 0.2488026031954956, "grad_norm": 1.9030544757843018, "learning_rate": 3.953752405388069e-06, "loss": 0.8667, "step": 20415 }, { "epoch": 0.2488635394196434, "grad_norm": 1.6439902782440186, "learning_rate": 3.953431686978833e-06, "loss": 0.8926, "step": 20420 }, { "epoch": 0.24892447564379122, "grad_norm": 1.944155216217041, "learning_rate": 3.953110968569596e-06, "loss": 0.9293, "step": 20425 }, { "epoch": 0.24898541186793902, "grad_norm": 1.8028484582901, "learning_rate": 3.952790250160359e-06, "loss": 0.8859, "step": 20430 }, { "epoch": 0.2490463480920868, "grad_norm": 1.8033865690231323, "learning_rate": 3.952469531751123e-06, "loss": 0.8839, "step": 20435 }, { "epoch": 0.24910728431623463, "grad_norm": 1.938604474067688, "learning_rate": 3.952148813341886e-06, "loss": 0.8687, "step": 20440 }, { "epoch": 0.24916822054038243, "grad_norm": 2.2116260528564453, "learning_rate": 3.951828094932649e-06, "loss": 0.9375, "step": 20445 }, { "epoch": 0.24922915676453025, "grad_norm": 2.8309104442596436, "learning_rate": 3.951507376523413e-06, "loss": 0.8553, "step": 20450 }, { "epoch": 0.24929009298867805, "grad_norm": 1.9825389385223389, "learning_rate": 3.951186658114176e-06, "loss": 0.9098, "step": 20455 }, { "epoch": 0.24935102921282587, "grad_norm": 2.1736209392547607, "learning_rate": 3.950865939704939e-06, "loss": 0.9293, "step": 20460 }, { "epoch": 0.24941196543697366, "grad_norm": 2.041499137878418, "learning_rate": 3.950545221295703e-06, "loss": 0.856, "step": 20465 }, { "epoch": 0.24947290166112146, "grad_norm": 1.965883493423462, "learning_rate": 3.950224502886466e-06, "loss": 0.8684, "step": 20470 }, { "epoch": 0.24953383788526928, "grad_norm": 2.034339189529419, "learning_rate": 3.94990378447723e-06, "loss": 0.8368, "step": 20475 }, { "epoch": 0.24959477410941708, "grad_norm": 1.7974947690963745, "learning_rate": 3.949583066067993e-06, "loss": 0.835, "step": 20480 }, { "epoch": 0.2496557103335649, "grad_norm": 1.8866875171661377, "learning_rate": 3.949262347658756e-06, "loss": 0.914, "step": 20485 }, { "epoch": 0.2497166465577127, "grad_norm": 1.7850273847579956, "learning_rate": 3.9489416292495195e-06, "loss": 0.812, "step": 20490 }, { "epoch": 0.2497775827818605, "grad_norm": 2.140653610229492, "learning_rate": 3.9486209108402825e-06, "loss": 0.8644, "step": 20495 }, { "epoch": 0.2498385190060083, "grad_norm": 1.7365961074829102, "learning_rate": 3.948300192431046e-06, "loss": 0.8905, "step": 20500 }, { "epoch": 0.2498994552301561, "grad_norm": 1.672214150428772, "learning_rate": 3.947979474021809e-06, "loss": 0.8165, "step": 20505 }, { "epoch": 0.24996039145430393, "grad_norm": 1.9118249416351318, "learning_rate": 3.9476587556125724e-06, "loss": 0.9143, "step": 20510 }, { "epoch": 0.25002132767845175, "grad_norm": 2.238084077835083, "learning_rate": 3.947338037203336e-06, "loss": 0.9393, "step": 20515 }, { "epoch": 0.2500822639025995, "grad_norm": 2.258120536804199, "learning_rate": 3.947017318794099e-06, "loss": 0.9493, "step": 20520 }, { "epoch": 0.25014320012674734, "grad_norm": 2.279426336288452, "learning_rate": 3.946696600384862e-06, "loss": 0.845, "step": 20525 }, { "epoch": 0.25020413635089517, "grad_norm": 1.874711036682129, "learning_rate": 3.946375881975626e-06, "loss": 0.8656, "step": 20530 }, { "epoch": 0.25026507257504294, "grad_norm": 1.8317508697509766, "learning_rate": 3.946055163566389e-06, "loss": 0.9413, "step": 20535 }, { "epoch": 0.25032600879919076, "grad_norm": 1.7275134325027466, "learning_rate": 3.945734445157152e-06, "loss": 0.8497, "step": 20540 }, { "epoch": 0.2503869450233386, "grad_norm": 1.7537471055984497, "learning_rate": 3.945413726747916e-06, "loss": 0.8464, "step": 20545 }, { "epoch": 0.2504478812474864, "grad_norm": 1.721104383468628, "learning_rate": 3.945093008338679e-06, "loss": 0.84, "step": 20550 }, { "epoch": 0.25050881747163417, "grad_norm": 2.037761926651001, "learning_rate": 3.944772289929442e-06, "loss": 0.838, "step": 20555 }, { "epoch": 0.250569753695782, "grad_norm": 2.0541491508483887, "learning_rate": 3.944451571520206e-06, "loss": 0.9086, "step": 20560 }, { "epoch": 0.2506306899199298, "grad_norm": 2.18768310546875, "learning_rate": 3.944130853110969e-06, "loss": 0.9186, "step": 20565 }, { "epoch": 0.2506916261440776, "grad_norm": 1.9970712661743164, "learning_rate": 3.943810134701732e-06, "loss": 0.7869, "step": 20570 }, { "epoch": 0.2507525623682254, "grad_norm": 2.012892484664917, "learning_rate": 3.943489416292495e-06, "loss": 0.8343, "step": 20575 }, { "epoch": 0.25081349859237323, "grad_norm": 1.9931546449661255, "learning_rate": 3.943168697883259e-06, "loss": 0.926, "step": 20580 }, { "epoch": 0.25087443481652105, "grad_norm": 1.9360313415527344, "learning_rate": 3.942847979474022e-06, "loss": 0.8688, "step": 20585 }, { "epoch": 0.2509353710406688, "grad_norm": 2.0229296684265137, "learning_rate": 3.942527261064785e-06, "loss": 0.8606, "step": 20590 }, { "epoch": 0.25099630726481664, "grad_norm": 1.7345850467681885, "learning_rate": 3.942206542655549e-06, "loss": 0.93, "step": 20595 }, { "epoch": 0.25105724348896447, "grad_norm": 1.8325965404510498, "learning_rate": 3.941885824246312e-06, "loss": 0.9033, "step": 20600 }, { "epoch": 0.25111817971311223, "grad_norm": 2.1138665676116943, "learning_rate": 3.941565105837075e-06, "loss": 0.9227, "step": 20605 }, { "epoch": 0.25117911593726006, "grad_norm": 1.7135008573532104, "learning_rate": 3.941244387427839e-06, "loss": 0.8467, "step": 20610 }, { "epoch": 0.2512400521614079, "grad_norm": 1.7539314031600952, "learning_rate": 3.940923669018602e-06, "loss": 0.9236, "step": 20615 }, { "epoch": 0.2513009883855557, "grad_norm": 1.9259377717971802, "learning_rate": 3.940602950609365e-06, "loss": 0.8884, "step": 20620 }, { "epoch": 0.25136192460970347, "grad_norm": 1.7605352401733398, "learning_rate": 3.940282232200129e-06, "loss": 0.845, "step": 20625 }, { "epoch": 0.2514228608338513, "grad_norm": 1.9458211660385132, "learning_rate": 3.939961513790892e-06, "loss": 0.9028, "step": 20630 }, { "epoch": 0.2514837970579991, "grad_norm": 2.178375720977783, "learning_rate": 3.9396407953816555e-06, "loss": 0.882, "step": 20635 }, { "epoch": 0.2515447332821469, "grad_norm": 2.1010782718658447, "learning_rate": 3.9393200769724185e-06, "loss": 0.8635, "step": 20640 }, { "epoch": 0.2516056695062947, "grad_norm": 1.8574018478393555, "learning_rate": 3.938999358563182e-06, "loss": 0.8163, "step": 20645 }, { "epoch": 0.2516666057304425, "grad_norm": 2.100407361984253, "learning_rate": 3.938678640153945e-06, "loss": 0.8707, "step": 20650 }, { "epoch": 0.25172754195459035, "grad_norm": 1.9937467575073242, "learning_rate": 3.938357921744708e-06, "loss": 0.8134, "step": 20655 }, { "epoch": 0.2517884781787381, "grad_norm": 2.3155477046966553, "learning_rate": 3.938037203335472e-06, "loss": 0.8243, "step": 20660 }, { "epoch": 0.25184941440288594, "grad_norm": 1.8894050121307373, "learning_rate": 3.937716484926235e-06, "loss": 0.8126, "step": 20665 }, { "epoch": 0.25191035062703376, "grad_norm": 1.788356900215149, "learning_rate": 3.937395766516998e-06, "loss": 0.9157, "step": 20670 }, { "epoch": 0.25197128685118153, "grad_norm": 2.2871267795562744, "learning_rate": 3.937075048107762e-06, "loss": 0.8456, "step": 20675 }, { "epoch": 0.25203222307532935, "grad_norm": 1.930370807647705, "learning_rate": 3.936754329698525e-06, "loss": 0.9828, "step": 20680 }, { "epoch": 0.2520931592994772, "grad_norm": 1.9849876165390015, "learning_rate": 3.936433611289288e-06, "loss": 0.9061, "step": 20685 }, { "epoch": 0.252154095523625, "grad_norm": 1.6857826709747314, "learning_rate": 3.936112892880052e-06, "loss": 0.8157, "step": 20690 }, { "epoch": 0.25221503174777277, "grad_norm": 1.971266508102417, "learning_rate": 3.935792174470815e-06, "loss": 0.8867, "step": 20695 }, { "epoch": 0.2522759679719206, "grad_norm": 1.8201146125793457, "learning_rate": 3.935471456061578e-06, "loss": 0.8342, "step": 20700 }, { "epoch": 0.2523369041960684, "grad_norm": 2.2724781036376953, "learning_rate": 3.935150737652342e-06, "loss": 0.9084, "step": 20705 }, { "epoch": 0.2523978404202162, "grad_norm": 2.2737748622894287, "learning_rate": 3.934830019243105e-06, "loss": 0.9457, "step": 20710 }, { "epoch": 0.252458776644364, "grad_norm": 2.4306089878082275, "learning_rate": 3.934509300833868e-06, "loss": 0.8695, "step": 20715 }, { "epoch": 0.2525197128685118, "grad_norm": 2.0240252017974854, "learning_rate": 3.934188582424632e-06, "loss": 0.8766, "step": 20720 }, { "epoch": 0.25258064909265965, "grad_norm": 1.9409259557724, "learning_rate": 3.933867864015395e-06, "loss": 0.7981, "step": 20725 }, { "epoch": 0.2526415853168074, "grad_norm": 1.9620230197906494, "learning_rate": 3.933547145606158e-06, "loss": 0.8608, "step": 20730 }, { "epoch": 0.25270252154095524, "grad_norm": 1.9319623708724976, "learning_rate": 3.933226427196921e-06, "loss": 0.9229, "step": 20735 }, { "epoch": 0.25276345776510306, "grad_norm": 2.076225757598877, "learning_rate": 3.932905708787685e-06, "loss": 0.9021, "step": 20740 }, { "epoch": 0.25282439398925083, "grad_norm": 1.9258075952529907, "learning_rate": 3.932584990378448e-06, "loss": 0.8591, "step": 20745 }, { "epoch": 0.25288533021339865, "grad_norm": 2.0167078971862793, "learning_rate": 3.932264271969211e-06, "loss": 0.885, "step": 20750 }, { "epoch": 0.2529462664375465, "grad_norm": 1.863008737564087, "learning_rate": 3.931943553559975e-06, "loss": 0.9166, "step": 20755 }, { "epoch": 0.2530072026616943, "grad_norm": 1.8341844081878662, "learning_rate": 3.931622835150738e-06, "loss": 0.9135, "step": 20760 }, { "epoch": 0.25306813888584206, "grad_norm": 1.5778694152832031, "learning_rate": 3.931302116741501e-06, "loss": 0.8641, "step": 20765 }, { "epoch": 0.2531290751099899, "grad_norm": 1.8541117906570435, "learning_rate": 3.9309813983322646e-06, "loss": 0.8519, "step": 20770 }, { "epoch": 0.2531900113341377, "grad_norm": 1.9842627048492432, "learning_rate": 3.9306606799230276e-06, "loss": 0.8652, "step": 20775 }, { "epoch": 0.2532509475582855, "grad_norm": 2.234964609146118, "learning_rate": 3.9303399615137914e-06, "loss": 0.8375, "step": 20780 }, { "epoch": 0.2533118837824333, "grad_norm": 2.1882736682891846, "learning_rate": 3.9300192431045545e-06, "loss": 0.8007, "step": 20785 }, { "epoch": 0.2533728200065811, "grad_norm": 2.264559745788574, "learning_rate": 3.9296985246953175e-06, "loss": 0.8017, "step": 20790 }, { "epoch": 0.25343375623072895, "grad_norm": 1.6918330192565918, "learning_rate": 3.929377806286081e-06, "loss": 0.7609, "step": 20795 }, { "epoch": 0.2534946924548767, "grad_norm": 1.8245352506637573, "learning_rate": 3.929057087876844e-06, "loss": 0.8779, "step": 20800 }, { "epoch": 0.25355562867902454, "grad_norm": 1.853847622871399, "learning_rate": 3.928736369467608e-06, "loss": 0.844, "step": 20805 }, { "epoch": 0.25361656490317236, "grad_norm": 1.9413106441497803, "learning_rate": 3.928415651058371e-06, "loss": 0.8343, "step": 20810 }, { "epoch": 0.2536775011273201, "grad_norm": 2.219999074935913, "learning_rate": 3.928094932649134e-06, "loss": 0.8906, "step": 20815 }, { "epoch": 0.25373843735146795, "grad_norm": 2.0475685596466064, "learning_rate": 3.927774214239898e-06, "loss": 0.8343, "step": 20820 }, { "epoch": 0.25379937357561577, "grad_norm": 1.96064293384552, "learning_rate": 3.927453495830661e-06, "loss": 0.853, "step": 20825 }, { "epoch": 0.2538603097997636, "grad_norm": 1.7361083030700684, "learning_rate": 3.927132777421424e-06, "loss": 0.8318, "step": 20830 }, { "epoch": 0.25392124602391136, "grad_norm": 1.8865361213684082, "learning_rate": 3.926812059012188e-06, "loss": 0.8331, "step": 20835 }, { "epoch": 0.2539821822480592, "grad_norm": 2.0524914264678955, "learning_rate": 3.926491340602951e-06, "loss": 0.8588, "step": 20840 }, { "epoch": 0.254043118472207, "grad_norm": 1.9469540119171143, "learning_rate": 3.926170622193714e-06, "loss": 0.8114, "step": 20845 }, { "epoch": 0.2541040546963548, "grad_norm": 1.9711449146270752, "learning_rate": 3.925849903784478e-06, "loss": 0.8947, "step": 20850 }, { "epoch": 0.2541649909205026, "grad_norm": 1.7453713417053223, "learning_rate": 3.925529185375241e-06, "loss": 0.9619, "step": 20855 }, { "epoch": 0.2542259271446504, "grad_norm": 2.013662099838257, "learning_rate": 3.925208466966004e-06, "loss": 0.925, "step": 20860 }, { "epoch": 0.2542868633687982, "grad_norm": 1.824202060699463, "learning_rate": 3.924887748556768e-06, "loss": 0.8594, "step": 20865 }, { "epoch": 0.254347799592946, "grad_norm": 2.083495855331421, "learning_rate": 3.924567030147531e-06, "loss": 0.8333, "step": 20870 }, { "epoch": 0.25440873581709383, "grad_norm": 1.9956657886505127, "learning_rate": 3.924246311738294e-06, "loss": 0.8193, "step": 20875 }, { "epoch": 0.25446967204124166, "grad_norm": 2.2074942588806152, "learning_rate": 3.923925593329058e-06, "loss": 0.8983, "step": 20880 }, { "epoch": 0.2545306082653894, "grad_norm": 1.968729019165039, "learning_rate": 3.923604874919821e-06, "loss": 0.8594, "step": 20885 }, { "epoch": 0.25459154448953725, "grad_norm": 1.909103274345398, "learning_rate": 3.923284156510584e-06, "loss": 0.8544, "step": 20890 }, { "epoch": 0.25465248071368507, "grad_norm": 2.146594285964966, "learning_rate": 3.922963438101348e-06, "loss": 0.8445, "step": 20895 }, { "epoch": 0.25471341693783284, "grad_norm": 2.050238847732544, "learning_rate": 3.922642719692111e-06, "loss": 0.8777, "step": 20900 }, { "epoch": 0.25477435316198066, "grad_norm": 2.0038180351257324, "learning_rate": 3.922322001282874e-06, "loss": 0.8385, "step": 20905 }, { "epoch": 0.2548352893861285, "grad_norm": 1.9311127662658691, "learning_rate": 3.922001282873637e-06, "loss": 0.891, "step": 20910 }, { "epoch": 0.2548962256102763, "grad_norm": 1.878934621810913, "learning_rate": 3.9216805644644005e-06, "loss": 0.8951, "step": 20915 }, { "epoch": 0.2549571618344241, "grad_norm": 1.9751380681991577, "learning_rate": 3.9213598460551635e-06, "loss": 0.9014, "step": 20920 }, { "epoch": 0.2550180980585719, "grad_norm": 1.8672444820404053, "learning_rate": 3.921039127645927e-06, "loss": 0.9458, "step": 20925 }, { "epoch": 0.2550790342827197, "grad_norm": 1.6714637279510498, "learning_rate": 3.92071840923669e-06, "loss": 0.919, "step": 20930 }, { "epoch": 0.2551399705068675, "grad_norm": 1.9006502628326416, "learning_rate": 3.9203976908274534e-06, "loss": 0.8485, "step": 20935 }, { "epoch": 0.2552009067310153, "grad_norm": 2.082820177078247, "learning_rate": 3.920076972418217e-06, "loss": 0.829, "step": 20940 }, { "epoch": 0.25526184295516313, "grad_norm": 1.6866569519042969, "learning_rate": 3.91975625400898e-06, "loss": 0.8548, "step": 20945 }, { "epoch": 0.25532277917931095, "grad_norm": 2.4315197467803955, "learning_rate": 3.919435535599744e-06, "loss": 0.9184, "step": 20950 }, { "epoch": 0.2553837154034587, "grad_norm": 1.8427889347076416, "learning_rate": 3.919114817190507e-06, "loss": 0.8453, "step": 20955 }, { "epoch": 0.25544465162760654, "grad_norm": 2.2335190773010254, "learning_rate": 3.91879409878127e-06, "loss": 0.8641, "step": 20960 }, { "epoch": 0.25550558785175437, "grad_norm": 2.058795213699341, "learning_rate": 3.918473380372034e-06, "loss": 0.8834, "step": 20965 }, { "epoch": 0.25556652407590214, "grad_norm": 2.231555700302124, "learning_rate": 3.918152661962797e-06, "loss": 0.853, "step": 20970 }, { "epoch": 0.25562746030004996, "grad_norm": 1.8810755014419556, "learning_rate": 3.917831943553561e-06, "loss": 0.9109, "step": 20975 }, { "epoch": 0.2556883965241978, "grad_norm": 2.118870496749878, "learning_rate": 3.917511225144324e-06, "loss": 0.8877, "step": 20980 }, { "epoch": 0.2557493327483456, "grad_norm": 1.90227210521698, "learning_rate": 3.917190506735087e-06, "loss": 0.908, "step": 20985 }, { "epoch": 0.25581026897249337, "grad_norm": 2.0872867107391357, "learning_rate": 3.91686978832585e-06, "loss": 0.899, "step": 20990 }, { "epoch": 0.2558712051966412, "grad_norm": 1.9177403450012207, "learning_rate": 3.916549069916614e-06, "loss": 0.8871, "step": 20995 }, { "epoch": 0.255932141420789, "grad_norm": 1.9372472763061523, "learning_rate": 3.916228351507377e-06, "loss": 0.8604, "step": 21000 }, { "epoch": 0.2559930776449368, "grad_norm": 1.8794547319412231, "learning_rate": 3.91590763309814e-06, "loss": 0.8827, "step": 21005 }, { "epoch": 0.2560540138690846, "grad_norm": 2.4133694171905518, "learning_rate": 3.915586914688904e-06, "loss": 0.903, "step": 21010 }, { "epoch": 0.25611495009323243, "grad_norm": 1.6716033220291138, "learning_rate": 3.915266196279667e-06, "loss": 0.8871, "step": 21015 }, { "epoch": 0.25617588631738025, "grad_norm": 1.835924744606018, "learning_rate": 3.91494547787043e-06, "loss": 0.8678, "step": 21020 }, { "epoch": 0.256236822541528, "grad_norm": 1.705031156539917, "learning_rate": 3.914624759461194e-06, "loss": 0.8824, "step": 21025 }, { "epoch": 0.25629775876567584, "grad_norm": 1.7123932838439941, "learning_rate": 3.914304041051957e-06, "loss": 0.911, "step": 21030 }, { "epoch": 0.25635869498982367, "grad_norm": 2.0263030529022217, "learning_rate": 3.91398332264272e-06, "loss": 0.852, "step": 21035 }, { "epoch": 0.25641963121397143, "grad_norm": 2.53005313873291, "learning_rate": 3.9136626042334836e-06, "loss": 0.9103, "step": 21040 }, { "epoch": 0.25648056743811926, "grad_norm": 2.3366684913635254, "learning_rate": 3.9133418858242466e-06, "loss": 0.8511, "step": 21045 }, { "epoch": 0.2565415036622671, "grad_norm": 1.8434561491012573, "learning_rate": 3.91302116741501e-06, "loss": 0.8977, "step": 21050 }, { "epoch": 0.2566024398864149, "grad_norm": 4.632591247558594, "learning_rate": 3.9127004490057735e-06, "loss": 0.8839, "step": 21055 }, { "epoch": 0.25666337611056267, "grad_norm": 1.9692310094833374, "learning_rate": 3.9123797305965365e-06, "loss": 0.8647, "step": 21060 }, { "epoch": 0.2567243123347105, "grad_norm": 1.84557044506073, "learning_rate": 3.9120590121872995e-06, "loss": 0.86, "step": 21065 }, { "epoch": 0.2567852485588583, "grad_norm": 2.3328347206115723, "learning_rate": 3.9117382937780625e-06, "loss": 0.9101, "step": 21070 }, { "epoch": 0.2568461847830061, "grad_norm": 1.9205299615859985, "learning_rate": 3.911417575368826e-06, "loss": 0.8121, "step": 21075 }, { "epoch": 0.2569071210071539, "grad_norm": 1.9158040285110474, "learning_rate": 3.911096856959589e-06, "loss": 0.933, "step": 21080 }, { "epoch": 0.2569680572313017, "grad_norm": 1.9597370624542236, "learning_rate": 3.910776138550353e-06, "loss": 0.8331, "step": 21085 }, { "epoch": 0.25702899345544955, "grad_norm": 1.9404858350753784, "learning_rate": 3.910455420141116e-06, "loss": 0.9052, "step": 21090 }, { "epoch": 0.2570899296795973, "grad_norm": 1.9880619049072266, "learning_rate": 3.910134701731879e-06, "loss": 0.924, "step": 21095 }, { "epoch": 0.25715086590374514, "grad_norm": 1.7659704685211182, "learning_rate": 3.909813983322643e-06, "loss": 0.8976, "step": 21100 }, { "epoch": 0.25721180212789296, "grad_norm": 1.825416922569275, "learning_rate": 3.909493264913406e-06, "loss": 0.9055, "step": 21105 }, { "epoch": 0.25727273835204073, "grad_norm": 1.733480453491211, "learning_rate": 3.90917254650417e-06, "loss": 0.8978, "step": 21110 }, { "epoch": 0.25733367457618855, "grad_norm": 2.8356990814208984, "learning_rate": 3.908851828094933e-06, "loss": 0.9126, "step": 21115 }, { "epoch": 0.2573946108003364, "grad_norm": 1.8776382207870483, "learning_rate": 3.908531109685697e-06, "loss": 0.829, "step": 21120 }, { "epoch": 0.2574555470244842, "grad_norm": 1.9158709049224854, "learning_rate": 3.90821039127646e-06, "loss": 0.8719, "step": 21125 }, { "epoch": 0.25751648324863197, "grad_norm": 1.984753131866455, "learning_rate": 3.907889672867223e-06, "loss": 0.9375, "step": 21130 }, { "epoch": 0.2575774194727798, "grad_norm": 1.7145193815231323, "learning_rate": 3.907568954457987e-06, "loss": 0.8282, "step": 21135 }, { "epoch": 0.2576383556969276, "grad_norm": 1.9662268161773682, "learning_rate": 3.90724823604875e-06, "loss": 0.9344, "step": 21140 }, { "epoch": 0.2576992919210754, "grad_norm": 1.9580079317092896, "learning_rate": 3.906927517639513e-06, "loss": 0.8733, "step": 21145 }, { "epoch": 0.2577602281452232, "grad_norm": 2.0118496417999268, "learning_rate": 3.906606799230277e-06, "loss": 0.8282, "step": 21150 }, { "epoch": 0.257821164369371, "grad_norm": 1.7442920207977295, "learning_rate": 3.90628608082104e-06, "loss": 0.8728, "step": 21155 }, { "epoch": 0.25788210059351885, "grad_norm": 2.2252886295318604, "learning_rate": 3.905965362411803e-06, "loss": 0.8778, "step": 21160 }, { "epoch": 0.2579430368176666, "grad_norm": 2.2144291400909424, "learning_rate": 3.905644644002566e-06, "loss": 0.8429, "step": 21165 }, { "epoch": 0.25800397304181444, "grad_norm": 2.2976315021514893, "learning_rate": 3.90532392559333e-06, "loss": 0.9139, "step": 21170 }, { "epoch": 0.25806490926596226, "grad_norm": 2.045898914337158, "learning_rate": 3.905003207184093e-06, "loss": 0.8882, "step": 21175 }, { "epoch": 0.25812584549011003, "grad_norm": 1.8773729801177979, "learning_rate": 3.904682488774856e-06, "loss": 0.7752, "step": 21180 }, { "epoch": 0.25818678171425785, "grad_norm": 2.0278656482696533, "learning_rate": 3.9043617703656195e-06, "loss": 0.8288, "step": 21185 }, { "epoch": 0.2582477179384057, "grad_norm": 2.082430124282837, "learning_rate": 3.9040410519563825e-06, "loss": 0.888, "step": 21190 }, { "epoch": 0.2583086541625535, "grad_norm": 2.120124578475952, "learning_rate": 3.9037203335471455e-06, "loss": 0.9165, "step": 21195 }, { "epoch": 0.25836959038670126, "grad_norm": 1.7759979963302612, "learning_rate": 3.903399615137909e-06, "loss": 0.8568, "step": 21200 }, { "epoch": 0.2584305266108491, "grad_norm": 2.2558772563934326, "learning_rate": 3.9030788967286724e-06, "loss": 0.8415, "step": 21205 }, { "epoch": 0.2584914628349969, "grad_norm": 2.1040585041046143, "learning_rate": 3.9027581783194354e-06, "loss": 0.8886, "step": 21210 }, { "epoch": 0.2585523990591447, "grad_norm": 1.6748569011688232, "learning_rate": 3.902437459910199e-06, "loss": 0.7828, "step": 21215 }, { "epoch": 0.2586133352832925, "grad_norm": 2.2203943729400635, "learning_rate": 3.902116741500962e-06, "loss": 0.8238, "step": 21220 }, { "epoch": 0.2586742715074403, "grad_norm": 2.0554370880126953, "learning_rate": 3.901796023091725e-06, "loss": 0.8479, "step": 21225 }, { "epoch": 0.25873520773158815, "grad_norm": 2.0920753479003906, "learning_rate": 3.901475304682489e-06, "loss": 0.8797, "step": 21230 }, { "epoch": 0.2587961439557359, "grad_norm": 1.854504108428955, "learning_rate": 3.901154586273252e-06, "loss": 0.8219, "step": 21235 }, { "epoch": 0.25885708017988374, "grad_norm": 2.0409786701202393, "learning_rate": 3.900833867864015e-06, "loss": 0.9175, "step": 21240 }, { "epoch": 0.25891801640403156, "grad_norm": 1.8156459331512451, "learning_rate": 3.900513149454779e-06, "loss": 0.9027, "step": 21245 }, { "epoch": 0.2589789526281793, "grad_norm": 2.000734329223633, "learning_rate": 3.900192431045542e-06, "loss": 0.943, "step": 21250 }, { "epoch": 0.25903988885232715, "grad_norm": 2.156165599822998, "learning_rate": 3.899871712636306e-06, "loss": 0.8549, "step": 21255 }, { "epoch": 0.25910082507647497, "grad_norm": 2.0395846366882324, "learning_rate": 3.899550994227069e-06, "loss": 0.9119, "step": 21260 }, { "epoch": 0.2591617613006228, "grad_norm": 2.0539863109588623, "learning_rate": 3.899230275817832e-06, "loss": 0.8168, "step": 21265 }, { "epoch": 0.25922269752477056, "grad_norm": 2.1016669273376465, "learning_rate": 3.898909557408596e-06, "loss": 0.8526, "step": 21270 }, { "epoch": 0.2592836337489184, "grad_norm": 2.0120904445648193, "learning_rate": 3.898588838999359e-06, "loss": 0.9337, "step": 21275 }, { "epoch": 0.2593445699730662, "grad_norm": 2.2136504650115967, "learning_rate": 3.898268120590123e-06, "loss": 0.9431, "step": 21280 }, { "epoch": 0.259405506197214, "grad_norm": 1.8871954679489136, "learning_rate": 3.897947402180886e-06, "loss": 0.9197, "step": 21285 }, { "epoch": 0.2594664424213618, "grad_norm": 1.849717617034912, "learning_rate": 3.897626683771649e-06, "loss": 0.9182, "step": 21290 }, { "epoch": 0.2595273786455096, "grad_norm": 2.055572271347046, "learning_rate": 3.897305965362413e-06, "loss": 0.8087, "step": 21295 }, { "epoch": 0.25958831486965744, "grad_norm": 2.009894371032715, "learning_rate": 3.896985246953176e-06, "loss": 0.8877, "step": 21300 }, { "epoch": 0.2596492510938052, "grad_norm": 1.946226716041565, "learning_rate": 3.896664528543939e-06, "loss": 0.8609, "step": 21305 }, { "epoch": 0.25971018731795303, "grad_norm": 1.6131269931793213, "learning_rate": 3.8963438101347026e-06, "loss": 0.8578, "step": 21310 }, { "epoch": 0.25977112354210086, "grad_norm": 1.9987306594848633, "learning_rate": 3.8960230917254656e-06, "loss": 0.9632, "step": 21315 }, { "epoch": 0.2598320597662486, "grad_norm": 1.9672776460647583, "learning_rate": 3.895702373316229e-06, "loss": 0.9424, "step": 21320 }, { "epoch": 0.25989299599039645, "grad_norm": 1.7092235088348389, "learning_rate": 3.895381654906992e-06, "loss": 0.904, "step": 21325 }, { "epoch": 0.25995393221454427, "grad_norm": 2.061830997467041, "learning_rate": 3.8950609364977555e-06, "loss": 0.9102, "step": 21330 }, { "epoch": 0.26001486843869204, "grad_norm": 1.8459534645080566, "learning_rate": 3.8947402180885185e-06, "loss": 0.8539, "step": 21335 }, { "epoch": 0.26007580466283986, "grad_norm": 2.07741641998291, "learning_rate": 3.8944194996792815e-06, "loss": 0.8785, "step": 21340 }, { "epoch": 0.2601367408869877, "grad_norm": 2.053924560546875, "learning_rate": 3.894098781270045e-06, "loss": 0.8922, "step": 21345 }, { "epoch": 0.2601976771111355, "grad_norm": 1.8261617422103882, "learning_rate": 3.893778062860808e-06, "loss": 0.8911, "step": 21350 }, { "epoch": 0.2602586133352833, "grad_norm": 2.0253543853759766, "learning_rate": 3.893457344451571e-06, "loss": 0.8759, "step": 21355 }, { "epoch": 0.2603195495594311, "grad_norm": 2.000549077987671, "learning_rate": 3.893136626042335e-06, "loss": 0.8828, "step": 21360 }, { "epoch": 0.2603804857835789, "grad_norm": 2.2859108448028564, "learning_rate": 3.892815907633098e-06, "loss": 0.8617, "step": 21365 }, { "epoch": 0.2604414220077267, "grad_norm": 2.2698557376861572, "learning_rate": 3.892495189223861e-06, "loss": 0.8045, "step": 21370 }, { "epoch": 0.2605023582318745, "grad_norm": 1.8876579999923706, "learning_rate": 3.892174470814625e-06, "loss": 0.8535, "step": 21375 }, { "epoch": 0.26056329445602233, "grad_norm": 2.297499179840088, "learning_rate": 3.891853752405388e-06, "loss": 0.8451, "step": 21380 }, { "epoch": 0.26062423068017015, "grad_norm": 2.3030593395233154, "learning_rate": 3.891533033996151e-06, "loss": 0.938, "step": 21385 }, { "epoch": 0.2606851669043179, "grad_norm": 2.0858960151672363, "learning_rate": 3.891212315586915e-06, "loss": 0.9058, "step": 21390 }, { "epoch": 0.26074610312846574, "grad_norm": 1.8054107427597046, "learning_rate": 3.890891597177678e-06, "loss": 0.8963, "step": 21395 }, { "epoch": 0.26080703935261357, "grad_norm": 2.208709716796875, "learning_rate": 3.890570878768442e-06, "loss": 0.9265, "step": 21400 }, { "epoch": 0.26086797557676134, "grad_norm": 2.1665055751800537, "learning_rate": 3.890250160359205e-06, "loss": 0.9454, "step": 21405 }, { "epoch": 0.26092891180090916, "grad_norm": 1.966644048690796, "learning_rate": 3.889929441949968e-06, "loss": 0.9377, "step": 21410 }, { "epoch": 0.260989848025057, "grad_norm": 1.7047568559646606, "learning_rate": 3.889608723540732e-06, "loss": 0.8568, "step": 21415 }, { "epoch": 0.2610507842492048, "grad_norm": 1.727269172668457, "learning_rate": 3.889288005131495e-06, "loss": 0.8725, "step": 21420 }, { "epoch": 0.26111172047335257, "grad_norm": 2.054450750350952, "learning_rate": 3.888967286722259e-06, "loss": 0.8609, "step": 21425 }, { "epoch": 0.2611726566975004, "grad_norm": 1.8372048139572144, "learning_rate": 3.888646568313022e-06, "loss": 0.8268, "step": 21430 }, { "epoch": 0.2612335929216482, "grad_norm": 1.8132858276367188, "learning_rate": 3.888325849903785e-06, "loss": 0.8338, "step": 21435 }, { "epoch": 0.261294529145796, "grad_norm": 1.7586662769317627, "learning_rate": 3.888005131494549e-06, "loss": 0.8535, "step": 21440 }, { "epoch": 0.2613554653699438, "grad_norm": 1.8590219020843506, "learning_rate": 3.887684413085312e-06, "loss": 0.8477, "step": 21445 }, { "epoch": 0.26141640159409163, "grad_norm": 1.8787450790405273, "learning_rate": 3.887363694676075e-06, "loss": 0.8478, "step": 21450 }, { "epoch": 0.26147733781823945, "grad_norm": 2.1010091304779053, "learning_rate": 3.8870429762668385e-06, "loss": 0.9672, "step": 21455 }, { "epoch": 0.2615382740423872, "grad_norm": 1.9502464532852173, "learning_rate": 3.8867222578576015e-06, "loss": 0.9404, "step": 21460 }, { "epoch": 0.26159921026653504, "grad_norm": 1.7454179525375366, "learning_rate": 3.8864015394483645e-06, "loss": 0.7965, "step": 21465 }, { "epoch": 0.26166014649068287, "grad_norm": 2.2505252361297607, "learning_rate": 3.886080821039128e-06, "loss": 0.8214, "step": 21470 }, { "epoch": 0.26172108271483063, "grad_norm": 2.0230672359466553, "learning_rate": 3.8857601026298914e-06, "loss": 0.8694, "step": 21475 }, { "epoch": 0.26178201893897846, "grad_norm": 2.313774585723877, "learning_rate": 3.8854393842206544e-06, "loss": 0.8443, "step": 21480 }, { "epoch": 0.2618429551631263, "grad_norm": 1.8533148765563965, "learning_rate": 3.885118665811418e-06, "loss": 0.8586, "step": 21485 }, { "epoch": 0.2619038913872741, "grad_norm": 1.8797513246536255, "learning_rate": 3.884797947402181e-06, "loss": 0.8387, "step": 21490 }, { "epoch": 0.26196482761142187, "grad_norm": 1.9312751293182373, "learning_rate": 3.884477228992944e-06, "loss": 0.8387, "step": 21495 }, { "epoch": 0.2620257638355697, "grad_norm": 2.506558895111084, "learning_rate": 3.884156510583707e-06, "loss": 0.9153, "step": 21500 }, { "epoch": 0.2620867000597175, "grad_norm": 2.0712764263153076, "learning_rate": 3.883835792174471e-06, "loss": 0.886, "step": 21505 }, { "epoch": 0.2621476362838653, "grad_norm": 2.2753522396087646, "learning_rate": 3.883515073765234e-06, "loss": 0.8748, "step": 21510 }, { "epoch": 0.2622085725080131, "grad_norm": 2.3778462409973145, "learning_rate": 3.883194355355997e-06, "loss": 0.8978, "step": 21515 }, { "epoch": 0.2622695087321609, "grad_norm": 1.999204158782959, "learning_rate": 3.882873636946761e-06, "loss": 0.9257, "step": 21520 }, { "epoch": 0.26233044495630875, "grad_norm": 2.232180595397949, "learning_rate": 3.882552918537524e-06, "loss": 0.8593, "step": 21525 }, { "epoch": 0.2623913811804565, "grad_norm": 2.6412832736968994, "learning_rate": 3.882232200128287e-06, "loss": 0.9111, "step": 21530 }, { "epoch": 0.26245231740460434, "grad_norm": 1.9000846147537231, "learning_rate": 3.881911481719051e-06, "loss": 0.86, "step": 21535 }, { "epoch": 0.26251325362875216, "grad_norm": 1.7622432708740234, "learning_rate": 3.881590763309814e-06, "loss": 0.9063, "step": 21540 }, { "epoch": 0.26257418985289993, "grad_norm": 2.1818361282348633, "learning_rate": 3.881270044900577e-06, "loss": 0.8123, "step": 21545 }, { "epoch": 0.26263512607704775, "grad_norm": 1.9785009622573853, "learning_rate": 3.880949326491341e-06, "loss": 0.8774, "step": 21550 }, { "epoch": 0.2626960623011956, "grad_norm": 2.010233163833618, "learning_rate": 3.880628608082104e-06, "loss": 0.9205, "step": 21555 }, { "epoch": 0.2627569985253434, "grad_norm": 1.5845195055007935, "learning_rate": 3.880307889672868e-06, "loss": 0.8045, "step": 21560 }, { "epoch": 0.26281793474949117, "grad_norm": 1.871262788772583, "learning_rate": 3.879987171263631e-06, "loss": 0.8539, "step": 21565 }, { "epoch": 0.262878870973639, "grad_norm": 1.7739230394363403, "learning_rate": 3.879666452854394e-06, "loss": 0.838, "step": 21570 }, { "epoch": 0.2629398071977868, "grad_norm": 2.3318281173706055, "learning_rate": 3.879345734445158e-06, "loss": 0.8854, "step": 21575 }, { "epoch": 0.2630007434219346, "grad_norm": 2.2763874530792236, "learning_rate": 3.879025016035921e-06, "loss": 0.8618, "step": 21580 }, { "epoch": 0.2630616796460824, "grad_norm": 2.1556050777435303, "learning_rate": 3.8787042976266846e-06, "loss": 0.8716, "step": 21585 }, { "epoch": 0.2631226158702302, "grad_norm": 1.808402419090271, "learning_rate": 3.878383579217448e-06, "loss": 0.8854, "step": 21590 }, { "epoch": 0.26318355209437805, "grad_norm": 2.480670213699341, "learning_rate": 3.878062860808211e-06, "loss": 0.9393, "step": 21595 }, { "epoch": 0.2632444883185258, "grad_norm": 1.915195107460022, "learning_rate": 3.8777421423989745e-06, "loss": 0.9185, "step": 21600 }, { "epoch": 0.26330542454267364, "grad_norm": 2.0938074588775635, "learning_rate": 3.8774214239897375e-06, "loss": 0.9251, "step": 21605 }, { "epoch": 0.26336636076682146, "grad_norm": 2.061654806137085, "learning_rate": 3.8771007055805005e-06, "loss": 0.8705, "step": 21610 }, { "epoch": 0.26342729699096923, "grad_norm": 1.901198387145996, "learning_rate": 3.876779987171264e-06, "loss": 0.8875, "step": 21615 }, { "epoch": 0.26348823321511705, "grad_norm": 1.8654803037643433, "learning_rate": 3.876459268762027e-06, "loss": 0.8656, "step": 21620 }, { "epoch": 0.2635491694392649, "grad_norm": 2.356513738632202, "learning_rate": 3.87613855035279e-06, "loss": 0.8709, "step": 21625 }, { "epoch": 0.2636101056634127, "grad_norm": 2.061527729034424, "learning_rate": 3.875817831943554e-06, "loss": 0.9262, "step": 21630 }, { "epoch": 0.26367104188756046, "grad_norm": 2.230318307876587, "learning_rate": 3.875497113534317e-06, "loss": 0.9541, "step": 21635 }, { "epoch": 0.2637319781117083, "grad_norm": 1.8616585731506348, "learning_rate": 3.87517639512508e-06, "loss": 0.8963, "step": 21640 }, { "epoch": 0.2637929143358561, "grad_norm": 1.7296905517578125, "learning_rate": 3.874855676715844e-06, "loss": 0.9433, "step": 21645 }, { "epoch": 0.2638538505600039, "grad_norm": 1.8265832662582397, "learning_rate": 3.874534958306607e-06, "loss": 0.8048, "step": 21650 }, { "epoch": 0.2639147867841517, "grad_norm": 2.34468936920166, "learning_rate": 3.87421423989737e-06, "loss": 0.8744, "step": 21655 }, { "epoch": 0.2639757230082995, "grad_norm": 2.106045961380005, "learning_rate": 3.873893521488133e-06, "loss": 0.8475, "step": 21660 }, { "epoch": 0.26403665923244735, "grad_norm": 1.8546377420425415, "learning_rate": 3.873572803078897e-06, "loss": 0.9043, "step": 21665 }, { "epoch": 0.2640975954565951, "grad_norm": 1.9291560649871826, "learning_rate": 3.87325208466966e-06, "loss": 0.884, "step": 21670 }, { "epoch": 0.26415853168074294, "grad_norm": 1.8665558099746704, "learning_rate": 3.872931366260423e-06, "loss": 0.79, "step": 21675 }, { "epoch": 0.26421946790489076, "grad_norm": 2.188843011856079, "learning_rate": 3.872610647851187e-06, "loss": 0.8364, "step": 21680 }, { "epoch": 0.2642804041290385, "grad_norm": 1.6467827558517456, "learning_rate": 3.87228992944195e-06, "loss": 0.8401, "step": 21685 }, { "epoch": 0.26434134035318635, "grad_norm": 1.8770462274551392, "learning_rate": 3.871969211032713e-06, "loss": 0.8772, "step": 21690 }, { "epoch": 0.26440227657733417, "grad_norm": 1.8098417520523071, "learning_rate": 3.871648492623477e-06, "loss": 0.9247, "step": 21695 }, { "epoch": 0.264463212801482, "grad_norm": 1.59162437915802, "learning_rate": 3.87132777421424e-06, "loss": 0.8538, "step": 21700 }, { "epoch": 0.26452414902562976, "grad_norm": 1.9725528955459595, "learning_rate": 3.871007055805004e-06, "loss": 0.9029, "step": 21705 }, { "epoch": 0.2645850852497776, "grad_norm": 1.9610133171081543, "learning_rate": 3.870686337395767e-06, "loss": 0.9188, "step": 21710 }, { "epoch": 0.2646460214739254, "grad_norm": 2.2027719020843506, "learning_rate": 3.87036561898653e-06, "loss": 0.838, "step": 21715 }, { "epoch": 0.2647069576980732, "grad_norm": 1.9681150913238525, "learning_rate": 3.870044900577294e-06, "loss": 0.8523, "step": 21720 }, { "epoch": 0.264767893922221, "grad_norm": 2.257938861846924, "learning_rate": 3.869724182168057e-06, "loss": 0.8548, "step": 21725 }, { "epoch": 0.2648288301463688, "grad_norm": 2.2408711910247803, "learning_rate": 3.8694034637588205e-06, "loss": 0.9254, "step": 21730 }, { "epoch": 0.26488976637051664, "grad_norm": 1.9144749641418457, "learning_rate": 3.8690827453495835e-06, "loss": 0.8565, "step": 21735 }, { "epoch": 0.2649507025946644, "grad_norm": 1.916056513786316, "learning_rate": 3.8687620269403466e-06, "loss": 0.8545, "step": 21740 }, { "epoch": 0.26501163881881223, "grad_norm": 1.9034961462020874, "learning_rate": 3.8684413085311104e-06, "loss": 0.907, "step": 21745 }, { "epoch": 0.26507257504296006, "grad_norm": 1.758979082107544, "learning_rate": 3.8681205901218734e-06, "loss": 0.8405, "step": 21750 }, { "epoch": 0.2651335112671078, "grad_norm": 1.8804042339324951, "learning_rate": 3.8677998717126365e-06, "loss": 0.8496, "step": 21755 }, { "epoch": 0.26519444749125565, "grad_norm": 2.0374133586883545, "learning_rate": 3.8674791533034e-06, "loss": 0.8599, "step": 21760 }, { "epoch": 0.26525538371540347, "grad_norm": 2.154789447784424, "learning_rate": 3.867158434894163e-06, "loss": 0.9089, "step": 21765 }, { "epoch": 0.2653163199395513, "grad_norm": 1.9030601978302002, "learning_rate": 3.866837716484926e-06, "loss": 0.8957, "step": 21770 }, { "epoch": 0.26537725616369906, "grad_norm": 1.8778860569000244, "learning_rate": 3.86651699807569e-06, "loss": 0.8509, "step": 21775 }, { "epoch": 0.2654381923878469, "grad_norm": 1.8867557048797607, "learning_rate": 3.866196279666453e-06, "loss": 0.8604, "step": 21780 }, { "epoch": 0.2654991286119947, "grad_norm": 2.0626566410064697, "learning_rate": 3.865875561257216e-06, "loss": 0.9235, "step": 21785 }, { "epoch": 0.2655600648361425, "grad_norm": 1.9719375371932983, "learning_rate": 3.86555484284798e-06, "loss": 0.8958, "step": 21790 }, { "epoch": 0.2656210010602903, "grad_norm": 1.9179638624191284, "learning_rate": 3.865234124438743e-06, "loss": 0.9309, "step": 21795 }, { "epoch": 0.2656819372844381, "grad_norm": 1.9620311260223389, "learning_rate": 3.864913406029506e-06, "loss": 0.871, "step": 21800 }, { "epoch": 0.26574287350858594, "grad_norm": 1.8814647197723389, "learning_rate": 3.86459268762027e-06, "loss": 0.8776, "step": 21805 }, { "epoch": 0.2658038097327337, "grad_norm": 1.867824673652649, "learning_rate": 3.864271969211033e-06, "loss": 0.8297, "step": 21810 }, { "epoch": 0.26586474595688153, "grad_norm": 2.595336437225342, "learning_rate": 3.863951250801796e-06, "loss": 0.8647, "step": 21815 }, { "epoch": 0.26592568218102935, "grad_norm": 1.6803505420684814, "learning_rate": 3.86363053239256e-06, "loss": 0.8149, "step": 21820 }, { "epoch": 0.2659866184051771, "grad_norm": 1.7947202920913696, "learning_rate": 3.863309813983323e-06, "loss": 0.8647, "step": 21825 }, { "epoch": 0.26604755462932494, "grad_norm": 1.8314123153686523, "learning_rate": 3.862989095574086e-06, "loss": 0.8761, "step": 21830 }, { "epoch": 0.26610849085347277, "grad_norm": 1.7307662963867188, "learning_rate": 3.862668377164849e-06, "loss": 0.8282, "step": 21835 }, { "epoch": 0.26616942707762054, "grad_norm": 1.7616180181503296, "learning_rate": 3.862347658755613e-06, "loss": 0.8627, "step": 21840 }, { "epoch": 0.26623036330176836, "grad_norm": 1.7649625539779663, "learning_rate": 3.862026940346376e-06, "loss": 0.8737, "step": 21845 }, { "epoch": 0.2662912995259162, "grad_norm": 1.735832929611206, "learning_rate": 3.86170622193714e-06, "loss": 0.8747, "step": 21850 }, { "epoch": 0.266352235750064, "grad_norm": 1.9075387716293335, "learning_rate": 3.861385503527903e-06, "loss": 0.8067, "step": 21855 }, { "epoch": 0.26641317197421177, "grad_norm": 1.5101149082183838, "learning_rate": 3.861064785118666e-06, "loss": 0.9016, "step": 21860 }, { "epoch": 0.2664741081983596, "grad_norm": 1.823230266571045, "learning_rate": 3.86074406670943e-06, "loss": 0.8062, "step": 21865 }, { "epoch": 0.2665350444225074, "grad_norm": 1.9158889055252075, "learning_rate": 3.860423348300193e-06, "loss": 0.8822, "step": 21870 }, { "epoch": 0.2665959806466552, "grad_norm": 2.214285135269165, "learning_rate": 3.8601026298909565e-06, "loss": 0.8622, "step": 21875 }, { "epoch": 0.266656916870803, "grad_norm": 1.961940050125122, "learning_rate": 3.8597819114817195e-06, "loss": 0.9176, "step": 21880 }, { "epoch": 0.26671785309495083, "grad_norm": 2.435563325881958, "learning_rate": 3.8594611930724825e-06, "loss": 0.9106, "step": 21885 }, { "epoch": 0.26677878931909865, "grad_norm": 1.7952654361724854, "learning_rate": 3.859140474663246e-06, "loss": 0.8746, "step": 21890 }, { "epoch": 0.2668397255432464, "grad_norm": 1.9960452318191528, "learning_rate": 3.858819756254009e-06, "loss": 0.8339, "step": 21895 }, { "epoch": 0.26690066176739424, "grad_norm": 1.9827933311462402, "learning_rate": 3.858499037844773e-06, "loss": 0.872, "step": 21900 }, { "epoch": 0.26696159799154207, "grad_norm": 2.236254930496216, "learning_rate": 3.858178319435536e-06, "loss": 0.9099, "step": 21905 }, { "epoch": 0.26702253421568983, "grad_norm": 2.0427472591400146, "learning_rate": 3.857857601026299e-06, "loss": 0.8924, "step": 21910 }, { "epoch": 0.26708347043983766, "grad_norm": 1.9919941425323486, "learning_rate": 3.857536882617062e-06, "loss": 0.9449, "step": 21915 }, { "epoch": 0.2671444066639855, "grad_norm": 1.9600416421890259, "learning_rate": 3.857216164207826e-06, "loss": 0.8531, "step": 21920 }, { "epoch": 0.2672053428881333, "grad_norm": 1.9008315801620483, "learning_rate": 3.856895445798589e-06, "loss": 0.8744, "step": 21925 }, { "epoch": 0.26726627911228107, "grad_norm": 2.1330409049987793, "learning_rate": 3.856574727389352e-06, "loss": 0.8772, "step": 21930 }, { "epoch": 0.2673272153364289, "grad_norm": 2.524387836456299, "learning_rate": 3.856254008980116e-06, "loss": 0.8644, "step": 21935 }, { "epoch": 0.2673881515605767, "grad_norm": 1.9398655891418457, "learning_rate": 3.855933290570879e-06, "loss": 0.8887, "step": 21940 }, { "epoch": 0.2674490877847245, "grad_norm": 1.9251116514205933, "learning_rate": 3.855612572161642e-06, "loss": 0.9145, "step": 21945 }, { "epoch": 0.2675100240088723, "grad_norm": 2.57415509223938, "learning_rate": 3.855291853752406e-06, "loss": 0.8743, "step": 21950 }, { "epoch": 0.2675709602330201, "grad_norm": 1.787964940071106, "learning_rate": 3.854971135343169e-06, "loss": 0.926, "step": 21955 }, { "epoch": 0.26763189645716795, "grad_norm": 1.9668984413146973, "learning_rate": 3.854650416933932e-06, "loss": 0.8443, "step": 21960 }, { "epoch": 0.2676928326813157, "grad_norm": 2.224605083465576, "learning_rate": 3.854329698524696e-06, "loss": 0.8759, "step": 21965 }, { "epoch": 0.26775376890546354, "grad_norm": 1.7872549295425415, "learning_rate": 3.854008980115459e-06, "loss": 0.8814, "step": 21970 }, { "epoch": 0.26781470512961136, "grad_norm": 1.853757381439209, "learning_rate": 3.853688261706222e-06, "loss": 0.8809, "step": 21975 }, { "epoch": 0.26787564135375913, "grad_norm": 1.8367068767547607, "learning_rate": 3.853367543296986e-06, "loss": 0.8226, "step": 21980 }, { "epoch": 0.26793657757790695, "grad_norm": 2.075876474380493, "learning_rate": 3.853046824887749e-06, "loss": 0.9133, "step": 21985 }, { "epoch": 0.2679975138020548, "grad_norm": 2.1732025146484375, "learning_rate": 3.852726106478512e-06, "loss": 0.8375, "step": 21990 }, { "epoch": 0.2680584500262026, "grad_norm": 2.162642240524292, "learning_rate": 3.852405388069275e-06, "loss": 0.8398, "step": 21995 }, { "epoch": 0.26811938625035037, "grad_norm": 2.3064048290252686, "learning_rate": 3.852084669660039e-06, "loss": 0.9029, "step": 22000 }, { "epoch": 0.2681803224744982, "grad_norm": 1.969679355621338, "learning_rate": 3.851763951250802e-06, "loss": 0.8798, "step": 22005 }, { "epoch": 0.268241258698646, "grad_norm": 2.084744930267334, "learning_rate": 3.8514432328415656e-06, "loss": 0.8774, "step": 22010 }, { "epoch": 0.2683021949227938, "grad_norm": 1.9530880451202393, "learning_rate": 3.851122514432329e-06, "loss": 0.8216, "step": 22015 }, { "epoch": 0.2683631311469416, "grad_norm": 1.869128942489624, "learning_rate": 3.850801796023092e-06, "loss": 0.909, "step": 22020 }, { "epoch": 0.2684240673710894, "grad_norm": 2.1155059337615967, "learning_rate": 3.8504810776138555e-06, "loss": 0.8265, "step": 22025 }, { "epoch": 0.26848500359523725, "grad_norm": 1.8578675985336304, "learning_rate": 3.8501603592046185e-06, "loss": 0.905, "step": 22030 }, { "epoch": 0.268545939819385, "grad_norm": 1.8156101703643799, "learning_rate": 3.849839640795382e-06, "loss": 0.9066, "step": 22035 }, { "epoch": 0.26860687604353284, "grad_norm": 1.84060537815094, "learning_rate": 3.849518922386145e-06, "loss": 0.8195, "step": 22040 }, { "epoch": 0.26866781226768066, "grad_norm": 1.9354969263076782, "learning_rate": 3.849198203976909e-06, "loss": 0.8004, "step": 22045 }, { "epoch": 0.26872874849182843, "grad_norm": 1.8357981443405151, "learning_rate": 3.848877485567672e-06, "loss": 0.895, "step": 22050 }, { "epoch": 0.26878968471597625, "grad_norm": 1.8807264566421509, "learning_rate": 3.848556767158435e-06, "loss": 0.8798, "step": 22055 }, { "epoch": 0.2688506209401241, "grad_norm": 2.3020224571228027, "learning_rate": 3.848236048749199e-06, "loss": 0.8091, "step": 22060 }, { "epoch": 0.2689115571642719, "grad_norm": 1.866274118423462, "learning_rate": 3.847915330339962e-06, "loss": 0.8454, "step": 22065 }, { "epoch": 0.26897249338841966, "grad_norm": 2.1413145065307617, "learning_rate": 3.847594611930725e-06, "loss": 0.7945, "step": 22070 }, { "epoch": 0.2690334296125675, "grad_norm": 1.8709440231323242, "learning_rate": 3.847273893521489e-06, "loss": 0.8876, "step": 22075 }, { "epoch": 0.2690943658367153, "grad_norm": 2.049039363861084, "learning_rate": 3.846953175112252e-06, "loss": 0.9095, "step": 22080 }, { "epoch": 0.2691553020608631, "grad_norm": 2.156078338623047, "learning_rate": 3.846632456703015e-06, "loss": 0.9324, "step": 22085 }, { "epoch": 0.2692162382850109, "grad_norm": 2.1604418754577637, "learning_rate": 3.846311738293778e-06, "loss": 0.9149, "step": 22090 }, { "epoch": 0.2692771745091587, "grad_norm": 1.9077963829040527, "learning_rate": 3.845991019884542e-06, "loss": 0.8407, "step": 22095 }, { "epoch": 0.26933811073330655, "grad_norm": 1.959753155708313, "learning_rate": 3.845670301475305e-06, "loss": 0.9038, "step": 22100 }, { "epoch": 0.2693990469574543, "grad_norm": 2.051967144012451, "learning_rate": 3.845349583066068e-06, "loss": 0.8958, "step": 22105 }, { "epoch": 0.26945998318160214, "grad_norm": 2.2886786460876465, "learning_rate": 3.845028864656832e-06, "loss": 0.8595, "step": 22110 }, { "epoch": 0.26952091940574996, "grad_norm": 2.0960798263549805, "learning_rate": 3.844708146247595e-06, "loss": 0.8695, "step": 22115 }, { "epoch": 0.2695818556298977, "grad_norm": 2.2578468322753906, "learning_rate": 3.844387427838358e-06, "loss": 0.8684, "step": 22120 }, { "epoch": 0.26964279185404555, "grad_norm": 2.087275505065918, "learning_rate": 3.844066709429122e-06, "loss": 0.9064, "step": 22125 }, { "epoch": 0.26970372807819337, "grad_norm": 1.7357317209243774, "learning_rate": 3.843745991019885e-06, "loss": 0.9283, "step": 22130 }, { "epoch": 0.2697646643023412, "grad_norm": 1.9974030256271362, "learning_rate": 3.843425272610648e-06, "loss": 0.9729, "step": 22135 }, { "epoch": 0.26982560052648896, "grad_norm": 2.025723457336426, "learning_rate": 3.843104554201412e-06, "loss": 0.8685, "step": 22140 }, { "epoch": 0.2698865367506368, "grad_norm": 1.9524956941604614, "learning_rate": 3.842783835792175e-06, "loss": 0.8114, "step": 22145 }, { "epoch": 0.2699474729747846, "grad_norm": 2.440565586090088, "learning_rate": 3.842463117382938e-06, "loss": 0.8403, "step": 22150 }, { "epoch": 0.2700084091989324, "grad_norm": 2.0007033348083496, "learning_rate": 3.8421423989737015e-06, "loss": 0.8764, "step": 22155 }, { "epoch": 0.2700693454230802, "grad_norm": 1.763930320739746, "learning_rate": 3.8418216805644645e-06, "loss": 0.9417, "step": 22160 }, { "epoch": 0.270130281647228, "grad_norm": 1.8565434217453003, "learning_rate": 3.8415009621552276e-06, "loss": 0.8449, "step": 22165 }, { "epoch": 0.27019121787137584, "grad_norm": 1.882733941078186, "learning_rate": 3.841180243745991e-06, "loss": 0.8571, "step": 22170 }, { "epoch": 0.2702521540955236, "grad_norm": 2.0218687057495117, "learning_rate": 3.8408595253367544e-06, "loss": 0.8433, "step": 22175 }, { "epoch": 0.27031309031967143, "grad_norm": 1.9313859939575195, "learning_rate": 3.840538806927518e-06, "loss": 0.9758, "step": 22180 }, { "epoch": 0.27037402654381926, "grad_norm": 2.055421829223633, "learning_rate": 3.840218088518281e-06, "loss": 0.9134, "step": 22185 }, { "epoch": 0.270434962767967, "grad_norm": 1.8191591501235962, "learning_rate": 3.839897370109044e-06, "loss": 0.876, "step": 22190 }, { "epoch": 0.27049589899211485, "grad_norm": 1.7290277481079102, "learning_rate": 3.839576651699808e-06, "loss": 0.8889, "step": 22195 }, { "epoch": 0.27055683521626267, "grad_norm": 2.190650701522827, "learning_rate": 3.839255933290571e-06, "loss": 0.9024, "step": 22200 }, { "epoch": 0.2706177714404105, "grad_norm": 1.7764970064163208, "learning_rate": 3.838935214881335e-06, "loss": 0.8703, "step": 22205 }, { "epoch": 0.27067870766455826, "grad_norm": 1.9921249151229858, "learning_rate": 3.838614496472098e-06, "loss": 0.8584, "step": 22210 }, { "epoch": 0.2707396438887061, "grad_norm": 2.0684969425201416, "learning_rate": 3.838293778062861e-06, "loss": 0.8676, "step": 22215 }, { "epoch": 0.2708005801128539, "grad_norm": 2.122849225997925, "learning_rate": 3.837973059653625e-06, "loss": 0.9247, "step": 22220 }, { "epoch": 0.2708615163370017, "grad_norm": 2.2330479621887207, "learning_rate": 3.837652341244388e-06, "loss": 0.8727, "step": 22225 }, { "epoch": 0.2709224525611495, "grad_norm": 2.2572953701019287, "learning_rate": 3.837331622835151e-06, "loss": 0.8804, "step": 22230 }, { "epoch": 0.2709833887852973, "grad_norm": 1.967348337173462, "learning_rate": 3.837010904425915e-06, "loss": 0.8819, "step": 22235 }, { "epoch": 0.27104432500944514, "grad_norm": 2.1188085079193115, "learning_rate": 3.836690186016678e-06, "loss": 0.8491, "step": 22240 }, { "epoch": 0.2711052612335929, "grad_norm": 1.956618309020996, "learning_rate": 3.836369467607441e-06, "loss": 0.9118, "step": 22245 }, { "epoch": 0.27116619745774073, "grad_norm": 2.2325820922851562, "learning_rate": 3.836048749198204e-06, "loss": 0.849, "step": 22250 }, { "epoch": 0.27122713368188855, "grad_norm": 1.7347931861877441, "learning_rate": 3.835728030788968e-06, "loss": 0.8611, "step": 22255 }, { "epoch": 0.2712880699060363, "grad_norm": 1.6271198987960815, "learning_rate": 3.835407312379731e-06, "loss": 0.8388, "step": 22260 }, { "epoch": 0.27134900613018414, "grad_norm": 1.7572566270828247, "learning_rate": 3.835086593970494e-06, "loss": 0.7737, "step": 22265 }, { "epoch": 0.27140994235433197, "grad_norm": 2.0757672786712646, "learning_rate": 3.834765875561258e-06, "loss": 0.9108, "step": 22270 }, { "epoch": 0.2714708785784798, "grad_norm": 2.328341484069824, "learning_rate": 3.834445157152021e-06, "loss": 0.9443, "step": 22275 }, { "epoch": 0.27153181480262756, "grad_norm": 1.7660337686538696, "learning_rate": 3.834124438742784e-06, "loss": 0.8362, "step": 22280 }, { "epoch": 0.2715927510267754, "grad_norm": 2.132768392562866, "learning_rate": 3.833803720333548e-06, "loss": 0.9251, "step": 22285 }, { "epoch": 0.2716536872509232, "grad_norm": 2.0621187686920166, "learning_rate": 3.833483001924311e-06, "loss": 0.8807, "step": 22290 }, { "epoch": 0.27171462347507097, "grad_norm": 1.9333354234695435, "learning_rate": 3.833162283515074e-06, "loss": 0.9073, "step": 22295 }, { "epoch": 0.2717755596992188, "grad_norm": 2.002763509750366, "learning_rate": 3.8328415651058375e-06, "loss": 0.9163, "step": 22300 }, { "epoch": 0.2718364959233666, "grad_norm": 2.0232584476470947, "learning_rate": 3.8325208466966005e-06, "loss": 0.9263, "step": 22305 }, { "epoch": 0.2718974321475144, "grad_norm": 1.9618580341339111, "learning_rate": 3.8322001282873635e-06, "loss": 0.792, "step": 22310 }, { "epoch": 0.2719583683716622, "grad_norm": 1.900873064994812, "learning_rate": 3.831879409878127e-06, "loss": 0.8544, "step": 22315 }, { "epoch": 0.27201930459581003, "grad_norm": 1.9980098009109497, "learning_rate": 3.83155869146889e-06, "loss": 0.9519, "step": 22320 }, { "epoch": 0.27208024081995785, "grad_norm": 1.790958285331726, "learning_rate": 3.831237973059654e-06, "loss": 0.8802, "step": 22325 }, { "epoch": 0.2721411770441056, "grad_norm": 1.8736774921417236, "learning_rate": 3.830917254650417e-06, "loss": 0.9395, "step": 22330 }, { "epoch": 0.27220211326825344, "grad_norm": 1.511267900466919, "learning_rate": 3.83059653624118e-06, "loss": 0.8118, "step": 22335 }, { "epoch": 0.27226304949240127, "grad_norm": 2.1433238983154297, "learning_rate": 3.830275817831944e-06, "loss": 0.9894, "step": 22340 }, { "epoch": 0.27232398571654903, "grad_norm": 2.037708282470703, "learning_rate": 3.829955099422707e-06, "loss": 0.8358, "step": 22345 }, { "epoch": 0.27238492194069686, "grad_norm": 2.06121826171875, "learning_rate": 3.829634381013471e-06, "loss": 0.8903, "step": 22350 }, { "epoch": 0.2724458581648447, "grad_norm": 2.2662158012390137, "learning_rate": 3.829313662604234e-06, "loss": 0.8047, "step": 22355 }, { "epoch": 0.2725067943889925, "grad_norm": 2.081822156906128, "learning_rate": 3.828992944194997e-06, "loss": 0.9005, "step": 22360 }, { "epoch": 0.27256773061314027, "grad_norm": 1.7978228330612183, "learning_rate": 3.828672225785761e-06, "loss": 0.8785, "step": 22365 }, { "epoch": 0.2726286668372881, "grad_norm": 1.671509861946106, "learning_rate": 3.828351507376524e-06, "loss": 0.9135, "step": 22370 }, { "epoch": 0.2726896030614359, "grad_norm": 2.168203115463257, "learning_rate": 3.828030788967287e-06, "loss": 0.9164, "step": 22375 }, { "epoch": 0.2727505392855837, "grad_norm": 1.9096161127090454, "learning_rate": 3.827710070558051e-06, "loss": 0.8913, "step": 22380 }, { "epoch": 0.2728114755097315, "grad_norm": 2.274489641189575, "learning_rate": 3.827389352148814e-06, "loss": 0.8918, "step": 22385 }, { "epoch": 0.2728724117338793, "grad_norm": 2.07700777053833, "learning_rate": 3.827068633739577e-06, "loss": 0.9708, "step": 22390 }, { "epoch": 0.27293334795802715, "grad_norm": 2.1852145195007324, "learning_rate": 3.826747915330341e-06, "loss": 0.848, "step": 22395 }, { "epoch": 0.2729942841821749, "grad_norm": 1.8657892942428589, "learning_rate": 3.826427196921104e-06, "loss": 0.9227, "step": 22400 }, { "epoch": 0.27305522040632274, "grad_norm": 2.1088671684265137, "learning_rate": 3.826106478511867e-06, "loss": 0.9415, "step": 22405 }, { "epoch": 0.27311615663047056, "grad_norm": 2.0519256591796875, "learning_rate": 3.825785760102631e-06, "loss": 0.8219, "step": 22410 }, { "epoch": 0.27317709285461833, "grad_norm": 2.198899984359741, "learning_rate": 3.825465041693394e-06, "loss": 0.834, "step": 22415 }, { "epoch": 0.27323802907876615, "grad_norm": 2.102717876434326, "learning_rate": 3.825144323284157e-06, "loss": 0.8697, "step": 22420 }, { "epoch": 0.273298965302914, "grad_norm": 1.8873497247695923, "learning_rate": 3.82482360487492e-06, "loss": 0.8198, "step": 22425 }, { "epoch": 0.2733599015270618, "grad_norm": 1.9442020654678345, "learning_rate": 3.8245028864656835e-06, "loss": 0.9618, "step": 22430 }, { "epoch": 0.27342083775120957, "grad_norm": 2.138763427734375, "learning_rate": 3.8241821680564466e-06, "loss": 0.8497, "step": 22435 }, { "epoch": 0.2734817739753574, "grad_norm": 1.9131309986114502, "learning_rate": 3.8238614496472096e-06, "loss": 0.9205, "step": 22440 }, { "epoch": 0.2735427101995052, "grad_norm": 1.924826741218567, "learning_rate": 3.8235407312379734e-06, "loss": 0.8551, "step": 22445 }, { "epoch": 0.273603646423653, "grad_norm": 1.5864022970199585, "learning_rate": 3.8232200128287364e-06, "loss": 0.8961, "step": 22450 }, { "epoch": 0.2736645826478008, "grad_norm": 2.075946092605591, "learning_rate": 3.8228992944194995e-06, "loss": 0.8279, "step": 22455 }, { "epoch": 0.2737255188719486, "grad_norm": 1.8858088254928589, "learning_rate": 3.822578576010263e-06, "loss": 0.861, "step": 22460 }, { "epoch": 0.27378645509609645, "grad_norm": 2.012157440185547, "learning_rate": 3.822257857601026e-06, "loss": 0.8929, "step": 22465 }, { "epoch": 0.2738473913202442, "grad_norm": 1.9524897336959839, "learning_rate": 3.821937139191789e-06, "loss": 0.9093, "step": 22470 }, { "epoch": 0.27390832754439204, "grad_norm": 1.602781057357788, "learning_rate": 3.821616420782553e-06, "loss": 0.8954, "step": 22475 }, { "epoch": 0.27396926376853986, "grad_norm": 1.7824257612228394, "learning_rate": 3.821295702373316e-06, "loss": 0.8462, "step": 22480 }, { "epoch": 0.27403019999268763, "grad_norm": 1.5873188972473145, "learning_rate": 3.82097498396408e-06, "loss": 0.9548, "step": 22485 }, { "epoch": 0.27409113621683545, "grad_norm": 1.9011361598968506, "learning_rate": 3.820654265554843e-06, "loss": 0.9367, "step": 22490 }, { "epoch": 0.2741520724409833, "grad_norm": 1.837632417678833, "learning_rate": 3.820333547145606e-06, "loss": 0.9318, "step": 22495 }, { "epoch": 0.2742130086651311, "grad_norm": 1.8398422002792358, "learning_rate": 3.82001282873637e-06, "loss": 0.7772, "step": 22500 }, { "epoch": 0.27427394488927886, "grad_norm": 2.1362709999084473, "learning_rate": 3.819692110327133e-06, "loss": 0.8967, "step": 22505 }, { "epoch": 0.2743348811134267, "grad_norm": 1.830372929573059, "learning_rate": 3.819371391917897e-06, "loss": 0.9096, "step": 22510 }, { "epoch": 0.2743958173375745, "grad_norm": 2.0496127605438232, "learning_rate": 3.81905067350866e-06, "loss": 0.8458, "step": 22515 }, { "epoch": 0.2744567535617223, "grad_norm": 2.1269381046295166, "learning_rate": 3.818729955099423e-06, "loss": 0.9313, "step": 22520 }, { "epoch": 0.2745176897858701, "grad_norm": 1.8134000301361084, "learning_rate": 3.818409236690187e-06, "loss": 0.8732, "step": 22525 }, { "epoch": 0.2745786260100179, "grad_norm": 1.6247549057006836, "learning_rate": 3.81808851828095e-06, "loss": 0.8257, "step": 22530 }, { "epoch": 0.27463956223416575, "grad_norm": 2.032025098800659, "learning_rate": 3.817767799871713e-06, "loss": 0.7819, "step": 22535 }, { "epoch": 0.2747004984583135, "grad_norm": 1.6047428846359253, "learning_rate": 3.817447081462477e-06, "loss": 0.8569, "step": 22540 }, { "epoch": 0.27476143468246134, "grad_norm": 2.896078109741211, "learning_rate": 3.81712636305324e-06, "loss": 0.948, "step": 22545 }, { "epoch": 0.27482237090660916, "grad_norm": 2.2014739513397217, "learning_rate": 3.816805644644003e-06, "loss": 0.9378, "step": 22550 }, { "epoch": 0.2748833071307569, "grad_norm": 1.8397897481918335, "learning_rate": 3.816484926234767e-06, "loss": 0.9224, "step": 22555 }, { "epoch": 0.27494424335490475, "grad_norm": 2.204094886779785, "learning_rate": 3.81616420782553e-06, "loss": 0.8811, "step": 22560 }, { "epoch": 0.27500517957905257, "grad_norm": 1.8462958335876465, "learning_rate": 3.815843489416293e-06, "loss": 0.8755, "step": 22565 }, { "epoch": 0.2750661158032004, "grad_norm": 1.9093769788742065, "learning_rate": 3.8155227710070565e-06, "loss": 0.864, "step": 22570 }, { "epoch": 0.27512705202734816, "grad_norm": 1.7846001386642456, "learning_rate": 3.8152020525978195e-06, "loss": 0.884, "step": 22575 }, { "epoch": 0.275187988251496, "grad_norm": 2.017510414123535, "learning_rate": 3.8148813341885825e-06, "loss": 0.8464, "step": 22580 }, { "epoch": 0.2752489244756438, "grad_norm": 2.1280765533447266, "learning_rate": 3.814560615779346e-06, "loss": 0.8426, "step": 22585 }, { "epoch": 0.2753098606997916, "grad_norm": 1.7564913034439087, "learning_rate": 3.8142398973701094e-06, "loss": 0.8717, "step": 22590 }, { "epoch": 0.2753707969239394, "grad_norm": 2.0221173763275146, "learning_rate": 3.8139191789608724e-06, "loss": 0.8718, "step": 22595 }, { "epoch": 0.2754317331480872, "grad_norm": 2.1096997261047363, "learning_rate": 3.813598460551636e-06, "loss": 0.8708, "step": 22600 }, { "epoch": 0.27549266937223504, "grad_norm": 1.88653564453125, "learning_rate": 3.8132777421423993e-06, "loss": 0.8548, "step": 22605 }, { "epoch": 0.2755536055963828, "grad_norm": 1.7737561464309692, "learning_rate": 3.8129570237331627e-06, "loss": 0.8541, "step": 22610 }, { "epoch": 0.27561454182053063, "grad_norm": 1.7407350540161133, "learning_rate": 3.8126363053239257e-06, "loss": 0.8304, "step": 22615 }, { "epoch": 0.27567547804467846, "grad_norm": 1.8042768239974976, "learning_rate": 3.812315586914689e-06, "loss": 0.798, "step": 22620 }, { "epoch": 0.2757364142688262, "grad_norm": 1.9471873044967651, "learning_rate": 3.8119948685054526e-06, "loss": 0.9354, "step": 22625 }, { "epoch": 0.27579735049297405, "grad_norm": 1.5744975805282593, "learning_rate": 3.8116741500962156e-06, "loss": 0.7871, "step": 22630 }, { "epoch": 0.27585828671712187, "grad_norm": 1.8895827531814575, "learning_rate": 3.8113534316869795e-06, "loss": 0.829, "step": 22635 }, { "epoch": 0.2759192229412697, "grad_norm": 2.087979555130005, "learning_rate": 3.8110327132777425e-06, "loss": 0.8406, "step": 22640 }, { "epoch": 0.27598015916541746, "grad_norm": 1.806153655052185, "learning_rate": 3.8107119948685055e-06, "loss": 0.8808, "step": 22645 }, { "epoch": 0.2760410953895653, "grad_norm": 2.0286924839019775, "learning_rate": 3.8103912764592694e-06, "loss": 0.9443, "step": 22650 }, { "epoch": 0.2761020316137131, "grad_norm": 1.9503554105758667, "learning_rate": 3.8100705580500324e-06, "loss": 0.8148, "step": 22655 }, { "epoch": 0.2761629678378609, "grad_norm": 2.022031784057617, "learning_rate": 3.8097498396407954e-06, "loss": 0.8626, "step": 22660 }, { "epoch": 0.2762239040620087, "grad_norm": 2.4256234169006348, "learning_rate": 3.8094291212315593e-06, "loss": 0.8823, "step": 22665 }, { "epoch": 0.2762848402861565, "grad_norm": 1.7867249250411987, "learning_rate": 3.8091084028223223e-06, "loss": 0.8865, "step": 22670 }, { "epoch": 0.27634577651030434, "grad_norm": 1.7800132036209106, "learning_rate": 3.8087876844130858e-06, "loss": 0.862, "step": 22675 }, { "epoch": 0.2764067127344521, "grad_norm": 2.121394634246826, "learning_rate": 3.8084669660038488e-06, "loss": 0.8893, "step": 22680 }, { "epoch": 0.27646764895859993, "grad_norm": 2.6633710861206055, "learning_rate": 3.8081462475946122e-06, "loss": 0.7823, "step": 22685 }, { "epoch": 0.27652858518274775, "grad_norm": 1.6731208562850952, "learning_rate": 3.8078255291853757e-06, "loss": 0.9629, "step": 22690 }, { "epoch": 0.2765895214068955, "grad_norm": 1.9870141744613647, "learning_rate": 3.8075048107761387e-06, "loss": 0.866, "step": 22695 }, { "epoch": 0.27665045763104334, "grad_norm": 1.9268893003463745, "learning_rate": 3.8071840923669025e-06, "loss": 0.8649, "step": 22700 }, { "epoch": 0.27671139385519117, "grad_norm": 1.9957650899887085, "learning_rate": 3.8068633739576656e-06, "loss": 0.8126, "step": 22705 }, { "epoch": 0.276772330079339, "grad_norm": 2.3358120918273926, "learning_rate": 3.8065426555484286e-06, "loss": 0.8456, "step": 22710 }, { "epoch": 0.27683326630348676, "grad_norm": 2.0705738067626953, "learning_rate": 3.8062219371391924e-06, "loss": 0.8682, "step": 22715 }, { "epoch": 0.2768942025276346, "grad_norm": 2.121063470840454, "learning_rate": 3.8059012187299554e-06, "loss": 0.7566, "step": 22720 }, { "epoch": 0.2769551387517824, "grad_norm": 2.1446380615234375, "learning_rate": 3.8055805003207185e-06, "loss": 0.8484, "step": 22725 }, { "epoch": 0.27701607497593017, "grad_norm": 2.1043126583099365, "learning_rate": 3.8052597819114823e-06, "loss": 0.8743, "step": 22730 }, { "epoch": 0.277077011200078, "grad_norm": 1.6702736616134644, "learning_rate": 3.8049390635022453e-06, "loss": 0.8575, "step": 22735 }, { "epoch": 0.2771379474242258, "grad_norm": 1.9452879428863525, "learning_rate": 3.8046183450930084e-06, "loss": 0.8194, "step": 22740 }, { "epoch": 0.27719888364837364, "grad_norm": 1.9399573802947998, "learning_rate": 3.8042976266837722e-06, "loss": 0.897, "step": 22745 }, { "epoch": 0.2772598198725214, "grad_norm": 2.085798740386963, "learning_rate": 3.8039769082745352e-06, "loss": 0.9114, "step": 22750 }, { "epoch": 0.27732075609666923, "grad_norm": 1.8447554111480713, "learning_rate": 3.8036561898652987e-06, "loss": 0.9035, "step": 22755 }, { "epoch": 0.27738169232081705, "grad_norm": 2.5782268047332764, "learning_rate": 3.8033354714560617e-06, "loss": 0.835, "step": 22760 }, { "epoch": 0.2774426285449648, "grad_norm": 1.9005478620529175, "learning_rate": 3.803014753046825e-06, "loss": 0.8664, "step": 22765 }, { "epoch": 0.27750356476911264, "grad_norm": 2.1722769737243652, "learning_rate": 3.8026940346375886e-06, "loss": 0.8544, "step": 22770 }, { "epoch": 0.27756450099326047, "grad_norm": 1.810164451599121, "learning_rate": 3.8023733162283516e-06, "loss": 0.8334, "step": 22775 }, { "epoch": 0.27762543721740823, "grad_norm": 1.9333233833312988, "learning_rate": 3.8020525978191155e-06, "loss": 0.8784, "step": 22780 }, { "epoch": 0.27768637344155606, "grad_norm": 2.0131516456604004, "learning_rate": 3.8017318794098785e-06, "loss": 0.9242, "step": 22785 }, { "epoch": 0.2777473096657039, "grad_norm": 2.000941038131714, "learning_rate": 3.8014111610006415e-06, "loss": 0.8942, "step": 22790 }, { "epoch": 0.2778082458898517, "grad_norm": 2.1047327518463135, "learning_rate": 3.8010904425914054e-06, "loss": 0.9257, "step": 22795 }, { "epoch": 0.27786918211399947, "grad_norm": 2.149930238723755, "learning_rate": 3.8007697241821684e-06, "loss": 0.8653, "step": 22800 }, { "epoch": 0.2779301183381473, "grad_norm": 2.0308218002319336, "learning_rate": 3.8004490057729314e-06, "loss": 0.9012, "step": 22805 }, { "epoch": 0.2779910545622951, "grad_norm": 2.022562265396118, "learning_rate": 3.8001282873636953e-06, "loss": 0.8549, "step": 22810 }, { "epoch": 0.2780519907864429, "grad_norm": 2.048259735107422, "learning_rate": 3.7998075689544583e-06, "loss": 0.9093, "step": 22815 }, { "epoch": 0.2781129270105907, "grad_norm": 2.1169488430023193, "learning_rate": 3.7994868505452213e-06, "loss": 0.8899, "step": 22820 }, { "epoch": 0.2781738632347385, "grad_norm": 2.1668827533721924, "learning_rate": 3.799166132135985e-06, "loss": 0.858, "step": 22825 }, { "epoch": 0.27823479945888635, "grad_norm": 1.8206050395965576, "learning_rate": 3.798845413726748e-06, "loss": 0.7742, "step": 22830 }, { "epoch": 0.2782957356830341, "grad_norm": 2.0696628093719482, "learning_rate": 3.7985246953175116e-06, "loss": 0.884, "step": 22835 }, { "epoch": 0.27835667190718194, "grad_norm": 2.154928207397461, "learning_rate": 3.7982039769082746e-06, "loss": 0.943, "step": 22840 }, { "epoch": 0.27841760813132976, "grad_norm": 2.2313425540924072, "learning_rate": 3.797883258499038e-06, "loss": 0.8182, "step": 22845 }, { "epoch": 0.27847854435547753, "grad_norm": 1.8322629928588867, "learning_rate": 3.7975625400898015e-06, "loss": 0.8739, "step": 22850 }, { "epoch": 0.27853948057962535, "grad_norm": 1.947848916053772, "learning_rate": 3.7972418216805645e-06, "loss": 0.8107, "step": 22855 }, { "epoch": 0.2786004168037732, "grad_norm": 1.9502283334732056, "learning_rate": 3.7969211032713284e-06, "loss": 0.8735, "step": 22860 }, { "epoch": 0.278661353027921, "grad_norm": 2.3363804817199707, "learning_rate": 3.7966003848620914e-06, "loss": 0.8204, "step": 22865 }, { "epoch": 0.27872228925206877, "grad_norm": 1.8628123998641968, "learning_rate": 3.7962796664528544e-06, "loss": 0.8262, "step": 22870 }, { "epoch": 0.2787832254762166, "grad_norm": 1.8215060234069824, "learning_rate": 3.7959589480436183e-06, "loss": 0.8958, "step": 22875 }, { "epoch": 0.2788441617003644, "grad_norm": 1.8726441860198975, "learning_rate": 3.7956382296343813e-06, "loss": 0.8503, "step": 22880 }, { "epoch": 0.2789050979245122, "grad_norm": 2.1457080841064453, "learning_rate": 3.7953175112251443e-06, "loss": 0.8837, "step": 22885 }, { "epoch": 0.27896603414866, "grad_norm": 1.8135900497436523, "learning_rate": 3.794996792815908e-06, "loss": 0.838, "step": 22890 }, { "epoch": 0.2790269703728078, "grad_norm": 2.2218692302703857, "learning_rate": 3.794676074406671e-06, "loss": 0.9243, "step": 22895 }, { "epoch": 0.27908790659695565, "grad_norm": 2.043919086456299, "learning_rate": 3.7943553559974346e-06, "loss": 0.8369, "step": 22900 }, { "epoch": 0.2791488428211034, "grad_norm": 1.9777882099151611, "learning_rate": 3.794034637588198e-06, "loss": 0.9142, "step": 22905 }, { "epoch": 0.27920977904525124, "grad_norm": 1.8181885480880737, "learning_rate": 3.793713919178961e-06, "loss": 0.8383, "step": 22910 }, { "epoch": 0.27927071526939906, "grad_norm": 1.9702949523925781, "learning_rate": 3.7933932007697245e-06, "loss": 0.8791, "step": 22915 }, { "epoch": 0.27933165149354683, "grad_norm": 1.7905645370483398, "learning_rate": 3.7930724823604876e-06, "loss": 0.8146, "step": 22920 }, { "epoch": 0.27939258771769465, "grad_norm": 1.9851433038711548, "learning_rate": 3.7927517639512514e-06, "loss": 0.934, "step": 22925 }, { "epoch": 0.2794535239418425, "grad_norm": 2.0505878925323486, "learning_rate": 3.7924310455420144e-06, "loss": 0.8402, "step": 22930 }, { "epoch": 0.2795144601659903, "grad_norm": 2.0483505725860596, "learning_rate": 3.7921103271327775e-06, "loss": 0.852, "step": 22935 }, { "epoch": 0.27957539639013806, "grad_norm": 2.3266639709472656, "learning_rate": 3.7917896087235413e-06, "loss": 0.9215, "step": 22940 }, { "epoch": 0.2796363326142859, "grad_norm": 2.109605550765991, "learning_rate": 3.7914688903143043e-06, "loss": 0.8519, "step": 22945 }, { "epoch": 0.2796972688384337, "grad_norm": 3.0600457191467285, "learning_rate": 3.7911481719050673e-06, "loss": 0.8505, "step": 22950 }, { "epoch": 0.2797582050625815, "grad_norm": 1.9974982738494873, "learning_rate": 3.7908274534958312e-06, "loss": 0.907, "step": 22955 }, { "epoch": 0.2798191412867293, "grad_norm": 2.1885147094726562, "learning_rate": 3.7905067350865942e-06, "loss": 0.8835, "step": 22960 }, { "epoch": 0.2798800775108771, "grad_norm": 1.9399237632751465, "learning_rate": 3.7901860166773572e-06, "loss": 0.8796, "step": 22965 }, { "epoch": 0.27994101373502495, "grad_norm": 2.173985719680786, "learning_rate": 3.789865298268121e-06, "loss": 0.9021, "step": 22970 }, { "epoch": 0.2800019499591727, "grad_norm": 1.68014657497406, "learning_rate": 3.789544579858884e-06, "loss": 0.8936, "step": 22975 }, { "epoch": 0.28006288618332054, "grad_norm": 1.731215476989746, "learning_rate": 3.7892238614496476e-06, "loss": 0.8561, "step": 22980 }, { "epoch": 0.28012382240746836, "grad_norm": 1.8045769929885864, "learning_rate": 3.788903143040411e-06, "loss": 0.9024, "step": 22985 }, { "epoch": 0.2801847586316161, "grad_norm": 2.4344100952148438, "learning_rate": 3.788582424631174e-06, "loss": 0.9218, "step": 22990 }, { "epoch": 0.28024569485576395, "grad_norm": 1.9823901653289795, "learning_rate": 3.7882617062219375e-06, "loss": 0.9018, "step": 22995 }, { "epoch": 0.28030663107991177, "grad_norm": 1.9805597066879272, "learning_rate": 3.787940987812701e-06, "loss": 0.8596, "step": 23000 }, { "epoch": 0.2803675673040596, "grad_norm": 2.046083450317383, "learning_rate": 3.7876202694034643e-06, "loss": 1.0061, "step": 23005 }, { "epoch": 0.28042850352820736, "grad_norm": 2.432448148727417, "learning_rate": 3.7872995509942274e-06, "loss": 0.8616, "step": 23010 }, { "epoch": 0.2804894397523552, "grad_norm": 1.9100431203842163, "learning_rate": 3.7869788325849904e-06, "loss": 0.8388, "step": 23015 }, { "epoch": 0.280550375976503, "grad_norm": 2.003288984298706, "learning_rate": 3.7866581141757542e-06, "loss": 0.8287, "step": 23020 }, { "epoch": 0.2806113122006508, "grad_norm": 2.2084262371063232, "learning_rate": 3.7863373957665173e-06, "loss": 0.8413, "step": 23025 }, { "epoch": 0.2806722484247986, "grad_norm": 2.1319711208343506, "learning_rate": 3.7860166773572803e-06, "loss": 0.8526, "step": 23030 }, { "epoch": 0.2807331846489464, "grad_norm": 2.219055652618408, "learning_rate": 3.785695958948044e-06, "loss": 0.9348, "step": 23035 }, { "epoch": 0.28079412087309424, "grad_norm": 2.2670645713806152, "learning_rate": 3.785375240538807e-06, "loss": 0.9032, "step": 23040 }, { "epoch": 0.280855057097242, "grad_norm": 1.9241989850997925, "learning_rate": 3.78505452212957e-06, "loss": 0.9578, "step": 23045 }, { "epoch": 0.28091599332138983, "grad_norm": 2.124614953994751, "learning_rate": 3.784733803720334e-06, "loss": 0.8425, "step": 23050 }, { "epoch": 0.28097692954553766, "grad_norm": 1.8321105241775513, "learning_rate": 3.784413085311097e-06, "loss": 0.859, "step": 23055 }, { "epoch": 0.2810378657696854, "grad_norm": 2.0811662673950195, "learning_rate": 3.7840923669018605e-06, "loss": 0.9066, "step": 23060 }, { "epoch": 0.28109880199383325, "grad_norm": 1.6915942430496216, "learning_rate": 3.783771648492624e-06, "loss": 0.8267, "step": 23065 }, { "epoch": 0.28115973821798107, "grad_norm": 1.749672532081604, "learning_rate": 3.783450930083387e-06, "loss": 0.9198, "step": 23070 }, { "epoch": 0.2812206744421289, "grad_norm": 2.2750885486602783, "learning_rate": 3.7831302116741504e-06, "loss": 0.8795, "step": 23075 }, { "epoch": 0.28128161066627666, "grad_norm": 1.9092859029769897, "learning_rate": 3.782809493264914e-06, "loss": 0.8669, "step": 23080 }, { "epoch": 0.2813425468904245, "grad_norm": 1.873694896697998, "learning_rate": 3.7824887748556773e-06, "loss": 0.8217, "step": 23085 }, { "epoch": 0.2814034831145723, "grad_norm": 1.802986741065979, "learning_rate": 3.7821680564464403e-06, "loss": 0.7885, "step": 23090 }, { "epoch": 0.2814644193387201, "grad_norm": 1.8062068223953247, "learning_rate": 3.7818473380372033e-06, "loss": 0.8896, "step": 23095 }, { "epoch": 0.2815253555628679, "grad_norm": 1.9291640520095825, "learning_rate": 3.781526619627967e-06, "loss": 0.9067, "step": 23100 }, { "epoch": 0.2815862917870157, "grad_norm": 1.8302890062332153, "learning_rate": 3.78120590121873e-06, "loss": 0.8988, "step": 23105 }, { "epoch": 0.28164722801116354, "grad_norm": 1.9129784107208252, "learning_rate": 3.780885182809493e-06, "loss": 0.7774, "step": 23110 }, { "epoch": 0.2817081642353113, "grad_norm": 2.050243854522705, "learning_rate": 3.780564464400257e-06, "loss": 0.9423, "step": 23115 }, { "epoch": 0.28176910045945913, "grad_norm": 2.0327460765838623, "learning_rate": 3.78024374599102e-06, "loss": 0.9347, "step": 23120 }, { "epoch": 0.28183003668360695, "grad_norm": 1.934704303741455, "learning_rate": 3.779923027581783e-06, "loss": 0.7433, "step": 23125 }, { "epoch": 0.2818909729077547, "grad_norm": 2.2361626625061035, "learning_rate": 3.779602309172547e-06, "loss": 0.8738, "step": 23130 }, { "epoch": 0.28195190913190255, "grad_norm": 1.7090868949890137, "learning_rate": 3.77928159076331e-06, "loss": 0.8674, "step": 23135 }, { "epoch": 0.28201284535605037, "grad_norm": 2.159484624862671, "learning_rate": 3.7789608723540734e-06, "loss": 0.8162, "step": 23140 }, { "epoch": 0.2820737815801982, "grad_norm": 1.8803900480270386, "learning_rate": 3.778640153944837e-06, "loss": 0.891, "step": 23145 }, { "epoch": 0.28213471780434596, "grad_norm": 2.021907091140747, "learning_rate": 3.7783194355356003e-06, "loss": 0.8646, "step": 23150 }, { "epoch": 0.2821956540284938, "grad_norm": 1.9463156461715698, "learning_rate": 3.7779987171263633e-06, "loss": 0.8311, "step": 23155 }, { "epoch": 0.2822565902526416, "grad_norm": 2.1713521480560303, "learning_rate": 3.7776779987171268e-06, "loss": 0.8705, "step": 23160 }, { "epoch": 0.28231752647678937, "grad_norm": 1.8971922397613525, "learning_rate": 3.77735728030789e-06, "loss": 0.8429, "step": 23165 }, { "epoch": 0.2823784627009372, "grad_norm": 2.1248831748962402, "learning_rate": 3.7770365618986532e-06, "loss": 0.8416, "step": 23170 }, { "epoch": 0.282439398925085, "grad_norm": 2.082288980484009, "learning_rate": 3.7767158434894162e-06, "loss": 0.8441, "step": 23175 }, { "epoch": 0.28250033514923284, "grad_norm": 2.048569679260254, "learning_rate": 3.77639512508018e-06, "loss": 0.8896, "step": 23180 }, { "epoch": 0.2825612713733806, "grad_norm": 2.276498317718506, "learning_rate": 3.776074406670943e-06, "loss": 0.9632, "step": 23185 }, { "epoch": 0.28262220759752843, "grad_norm": 1.8699157238006592, "learning_rate": 3.775753688261706e-06, "loss": 0.8716, "step": 23190 }, { "epoch": 0.28268314382167625, "grad_norm": 1.78324294090271, "learning_rate": 3.77543296985247e-06, "loss": 0.8728, "step": 23195 }, { "epoch": 0.282744080045824, "grad_norm": 2.4743893146514893, "learning_rate": 3.775112251443233e-06, "loss": 0.9379, "step": 23200 }, { "epoch": 0.28280501626997184, "grad_norm": 1.888056755065918, "learning_rate": 3.7747915330339965e-06, "loss": 0.894, "step": 23205 }, { "epoch": 0.28286595249411967, "grad_norm": 2.1524205207824707, "learning_rate": 3.77447081462476e-06, "loss": 0.9539, "step": 23210 }, { "epoch": 0.2829268887182675, "grad_norm": 1.998055100440979, "learning_rate": 3.774150096215523e-06, "loss": 0.8877, "step": 23215 }, { "epoch": 0.28298782494241526, "grad_norm": 1.7937155961990356, "learning_rate": 3.7738293778062863e-06, "loss": 0.8891, "step": 23220 }, { "epoch": 0.2830487611665631, "grad_norm": 1.7108120918273926, "learning_rate": 3.77350865939705e-06, "loss": 0.8826, "step": 23225 }, { "epoch": 0.2831096973907109, "grad_norm": 2.3464269638061523, "learning_rate": 3.7731879409878132e-06, "loss": 0.9527, "step": 23230 }, { "epoch": 0.28317063361485867, "grad_norm": 1.9230812788009644, "learning_rate": 3.7728672225785762e-06, "loss": 0.8755, "step": 23235 }, { "epoch": 0.2832315698390065, "grad_norm": 1.7174983024597168, "learning_rate": 3.7725465041693397e-06, "loss": 0.811, "step": 23240 }, { "epoch": 0.2832925060631543, "grad_norm": 1.9379056692123413, "learning_rate": 3.772225785760103e-06, "loss": 0.8796, "step": 23245 }, { "epoch": 0.2833534422873021, "grad_norm": 1.8550212383270264, "learning_rate": 3.771905067350866e-06, "loss": 0.8548, "step": 23250 }, { "epoch": 0.2834143785114499, "grad_norm": 1.960894227027893, "learning_rate": 3.771584348941629e-06, "loss": 0.8903, "step": 23255 }, { "epoch": 0.2834753147355977, "grad_norm": 2.142106771469116, "learning_rate": 3.771263630532393e-06, "loss": 0.8711, "step": 23260 }, { "epoch": 0.28353625095974555, "grad_norm": 1.839383840560913, "learning_rate": 3.770942912123156e-06, "loss": 0.8755, "step": 23265 }, { "epoch": 0.2835971871838933, "grad_norm": 2.065288543701172, "learning_rate": 3.770622193713919e-06, "loss": 0.904, "step": 23270 }, { "epoch": 0.28365812340804114, "grad_norm": 1.8576412200927734, "learning_rate": 3.770301475304683e-06, "loss": 0.8836, "step": 23275 }, { "epoch": 0.28371905963218896, "grad_norm": 1.9336040019989014, "learning_rate": 3.769980756895446e-06, "loss": 0.9032, "step": 23280 }, { "epoch": 0.28377999585633673, "grad_norm": 1.9100536108016968, "learning_rate": 3.7696600384862094e-06, "loss": 0.8348, "step": 23285 }, { "epoch": 0.28384093208048455, "grad_norm": 2.099891424179077, "learning_rate": 3.769339320076973e-06, "loss": 0.8709, "step": 23290 }, { "epoch": 0.2839018683046324, "grad_norm": 1.768758773803711, "learning_rate": 3.769018601667736e-06, "loss": 0.8108, "step": 23295 }, { "epoch": 0.2839628045287802, "grad_norm": 2.215243339538574, "learning_rate": 3.7686978832584993e-06, "loss": 0.8612, "step": 23300 }, { "epoch": 0.28402374075292797, "grad_norm": 1.7450817823410034, "learning_rate": 3.7683771648492627e-06, "loss": 0.9376, "step": 23305 }, { "epoch": 0.2840846769770758, "grad_norm": 1.8735167980194092, "learning_rate": 3.768056446440026e-06, "loss": 0.9112, "step": 23310 }, { "epoch": 0.2841456132012236, "grad_norm": 2.0298075675964355, "learning_rate": 3.767735728030789e-06, "loss": 0.8279, "step": 23315 }, { "epoch": 0.2842065494253714, "grad_norm": 2.05245041847229, "learning_rate": 3.7674150096215526e-06, "loss": 0.8933, "step": 23320 }, { "epoch": 0.2842674856495192, "grad_norm": 1.9891263246536255, "learning_rate": 3.767094291212316e-06, "loss": 0.9575, "step": 23325 }, { "epoch": 0.284328421873667, "grad_norm": 1.7113076448440552, "learning_rate": 3.766773572803079e-06, "loss": 0.8582, "step": 23330 }, { "epoch": 0.28438935809781485, "grad_norm": 2.0194602012634277, "learning_rate": 3.766452854393843e-06, "loss": 0.8471, "step": 23335 }, { "epoch": 0.2844502943219626, "grad_norm": 1.9493980407714844, "learning_rate": 3.766132135984606e-06, "loss": 0.8718, "step": 23340 }, { "epoch": 0.28451123054611044, "grad_norm": 1.7496161460876465, "learning_rate": 3.765811417575369e-06, "loss": 0.8133, "step": 23345 }, { "epoch": 0.28457216677025826, "grad_norm": 1.876419186592102, "learning_rate": 3.765490699166132e-06, "loss": 0.9082, "step": 23350 }, { "epoch": 0.28463310299440603, "grad_norm": 1.7759650945663452, "learning_rate": 3.765169980756896e-06, "loss": 0.8508, "step": 23355 }, { "epoch": 0.28469403921855385, "grad_norm": 1.8218427896499634, "learning_rate": 3.764849262347659e-06, "loss": 0.8947, "step": 23360 }, { "epoch": 0.2847549754427017, "grad_norm": 1.816251516342163, "learning_rate": 3.7645285439384223e-06, "loss": 0.9363, "step": 23365 }, { "epoch": 0.2848159116668495, "grad_norm": 2.136603832244873, "learning_rate": 3.7642078255291857e-06, "loss": 0.8166, "step": 23370 }, { "epoch": 0.28487684789099726, "grad_norm": 2.2878060340881348, "learning_rate": 3.763887107119949e-06, "loss": 0.8557, "step": 23375 }, { "epoch": 0.2849377841151451, "grad_norm": 1.7006958723068237, "learning_rate": 3.763566388710712e-06, "loss": 0.8811, "step": 23380 }, { "epoch": 0.2849987203392929, "grad_norm": 1.9346401691436768, "learning_rate": 3.7632456703014756e-06, "loss": 0.8875, "step": 23385 }, { "epoch": 0.2850596565634407, "grad_norm": 1.9578078985214233, "learning_rate": 3.762924951892239e-06, "loss": 0.8758, "step": 23390 }, { "epoch": 0.2851205927875885, "grad_norm": 2.520540237426758, "learning_rate": 3.762604233483002e-06, "loss": 0.9051, "step": 23395 }, { "epoch": 0.2851815290117363, "grad_norm": 2.06024432182312, "learning_rate": 3.762283515073766e-06, "loss": 0.8444, "step": 23400 }, { "epoch": 0.28524246523588415, "grad_norm": 1.861111044883728, "learning_rate": 3.761962796664529e-06, "loss": 0.8057, "step": 23405 }, { "epoch": 0.2853034014600319, "grad_norm": 1.808388113975525, "learning_rate": 3.761642078255292e-06, "loss": 0.8215, "step": 23410 }, { "epoch": 0.28536433768417974, "grad_norm": 1.7830890417099, "learning_rate": 3.761321359846056e-06, "loss": 0.8558, "step": 23415 }, { "epoch": 0.28542527390832756, "grad_norm": 2.0376062393188477, "learning_rate": 3.761000641436819e-06, "loss": 0.8217, "step": 23420 }, { "epoch": 0.2854862101324753, "grad_norm": 2.4258930683135986, "learning_rate": 3.760679923027582e-06, "loss": 0.8333, "step": 23425 }, { "epoch": 0.28554714635662315, "grad_norm": 2.063598871231079, "learning_rate": 3.7603592046183453e-06, "loss": 0.8362, "step": 23430 }, { "epoch": 0.28560808258077097, "grad_norm": 1.9858990907669067, "learning_rate": 3.7600384862091088e-06, "loss": 0.8293, "step": 23435 }, { "epoch": 0.2856690188049188, "grad_norm": 1.869364619255066, "learning_rate": 3.759717767799872e-06, "loss": 0.8262, "step": 23440 }, { "epoch": 0.28572995502906656, "grad_norm": 2.170456647872925, "learning_rate": 3.7593970493906352e-06, "loss": 0.8954, "step": 23445 }, { "epoch": 0.2857908912532144, "grad_norm": 1.8869285583496094, "learning_rate": 3.7590763309813987e-06, "loss": 0.8565, "step": 23450 }, { "epoch": 0.2858518274773622, "grad_norm": 1.7128525972366333, "learning_rate": 3.758755612572162e-06, "loss": 0.867, "step": 23455 }, { "epoch": 0.28591276370151, "grad_norm": 1.948769211769104, "learning_rate": 3.758434894162925e-06, "loss": 0.8692, "step": 23460 }, { "epoch": 0.2859736999256578, "grad_norm": 1.6814748048782349, "learning_rate": 3.7581141757536886e-06, "loss": 0.8466, "step": 23465 }, { "epoch": 0.2860346361498056, "grad_norm": 1.838809609413147, "learning_rate": 3.757793457344452e-06, "loss": 0.8208, "step": 23470 }, { "epoch": 0.28609557237395344, "grad_norm": 1.8869938850402832, "learning_rate": 3.757472738935215e-06, "loss": 0.8701, "step": 23475 }, { "epoch": 0.2861565085981012, "grad_norm": 1.8677295446395874, "learning_rate": 3.757152020525979e-06, "loss": 0.8729, "step": 23480 }, { "epoch": 0.28621744482224903, "grad_norm": 2.0072925090789795, "learning_rate": 3.756831302116742e-06, "loss": 0.9261, "step": 23485 }, { "epoch": 0.28627838104639686, "grad_norm": 1.9169068336486816, "learning_rate": 3.756510583707505e-06, "loss": 0.8614, "step": 23490 }, { "epoch": 0.2863393172705446, "grad_norm": 2.2409417629241943, "learning_rate": 3.756189865298269e-06, "loss": 0.8678, "step": 23495 }, { "epoch": 0.28640025349469245, "grad_norm": 2.0401084423065186, "learning_rate": 3.755869146889032e-06, "loss": 0.9417, "step": 23500 }, { "epoch": 0.28646118971884027, "grad_norm": 2.0538899898529053, "learning_rate": 3.755548428479795e-06, "loss": 0.8953, "step": 23505 }, { "epoch": 0.2865221259429881, "grad_norm": 1.6937835216522217, "learning_rate": 3.7552277100705583e-06, "loss": 0.8929, "step": 23510 }, { "epoch": 0.28658306216713586, "grad_norm": 2.0087335109710693, "learning_rate": 3.7549069916613217e-06, "loss": 0.8518, "step": 23515 }, { "epoch": 0.2866439983912837, "grad_norm": 1.7515109777450562, "learning_rate": 3.7545862732520847e-06, "loss": 0.9241, "step": 23520 }, { "epoch": 0.2867049346154315, "grad_norm": 1.9971482753753662, "learning_rate": 3.754265554842848e-06, "loss": 0.8475, "step": 23525 }, { "epoch": 0.2867658708395793, "grad_norm": 1.8399931192398071, "learning_rate": 3.7539448364336116e-06, "loss": 0.8896, "step": 23530 }, { "epoch": 0.2868268070637271, "grad_norm": 1.8009968996047974, "learning_rate": 3.753624118024375e-06, "loss": 0.9053, "step": 23535 }, { "epoch": 0.2868877432878749, "grad_norm": 1.755307674407959, "learning_rate": 3.753303399615138e-06, "loss": 0.8327, "step": 23540 }, { "epoch": 0.28694867951202274, "grad_norm": 2.34873104095459, "learning_rate": 3.7529826812059015e-06, "loss": 0.9204, "step": 23545 }, { "epoch": 0.2870096157361705, "grad_norm": 1.621130347251892, "learning_rate": 3.752661962796665e-06, "loss": 0.8363, "step": 23550 }, { "epoch": 0.28707055196031833, "grad_norm": 1.8927240371704102, "learning_rate": 3.752341244387428e-06, "loss": 0.8644, "step": 23555 }, { "epoch": 0.28713148818446615, "grad_norm": 2.0816409587860107, "learning_rate": 3.752020525978192e-06, "loss": 0.9161, "step": 23560 }, { "epoch": 0.2871924244086139, "grad_norm": 2.3864119052886963, "learning_rate": 3.751699807568955e-06, "loss": 0.8808, "step": 23565 }, { "epoch": 0.28725336063276175, "grad_norm": 2.041292905807495, "learning_rate": 3.751379089159718e-06, "loss": 0.9245, "step": 23570 }, { "epoch": 0.28731429685690957, "grad_norm": 1.9789210557937622, "learning_rate": 3.7510583707504817e-06, "loss": 0.7933, "step": 23575 }, { "epoch": 0.2873752330810574, "grad_norm": 1.7344470024108887, "learning_rate": 3.7507376523412447e-06, "loss": 0.912, "step": 23580 }, { "epoch": 0.28743616930520516, "grad_norm": 1.782644271850586, "learning_rate": 3.7504169339320077e-06, "loss": 0.9287, "step": 23585 }, { "epoch": 0.287497105529353, "grad_norm": 1.647103190422058, "learning_rate": 3.7500962155227716e-06, "loss": 0.8807, "step": 23590 }, { "epoch": 0.2875580417535008, "grad_norm": 2.670868158340454, "learning_rate": 3.7497754971135346e-06, "loss": 0.8819, "step": 23595 }, { "epoch": 0.28761897797764857, "grad_norm": 1.881650686264038, "learning_rate": 3.749454778704298e-06, "loss": 0.8405, "step": 23600 }, { "epoch": 0.2876799142017964, "grad_norm": 1.7685080766677856, "learning_rate": 3.749134060295061e-06, "loss": 0.8975, "step": 23605 }, { "epoch": 0.2877408504259442, "grad_norm": 2.086850881576538, "learning_rate": 3.7488133418858245e-06, "loss": 0.912, "step": 23610 }, { "epoch": 0.28780178665009204, "grad_norm": 1.8839994668960571, "learning_rate": 3.748492623476588e-06, "loss": 0.9456, "step": 23615 }, { "epoch": 0.2878627228742398, "grad_norm": 2.0411300659179688, "learning_rate": 3.748171905067351e-06, "loss": 0.8914, "step": 23620 }, { "epoch": 0.28792365909838763, "grad_norm": 1.8772751092910767, "learning_rate": 3.747851186658115e-06, "loss": 0.8661, "step": 23625 }, { "epoch": 0.28798459532253545, "grad_norm": 2.074002504348755, "learning_rate": 3.747530468248878e-06, "loss": 0.8405, "step": 23630 }, { "epoch": 0.2880455315466832, "grad_norm": 1.9755195379257202, "learning_rate": 3.747209749839641e-06, "loss": 0.8088, "step": 23635 }, { "epoch": 0.28810646777083104, "grad_norm": 1.96689772605896, "learning_rate": 3.7468890314304047e-06, "loss": 0.9122, "step": 23640 }, { "epoch": 0.28816740399497887, "grad_norm": 1.806731104850769, "learning_rate": 3.7465683130211678e-06, "loss": 0.8628, "step": 23645 }, { "epoch": 0.2882283402191267, "grad_norm": 1.8571940660476685, "learning_rate": 3.7462475946119308e-06, "loss": 0.8525, "step": 23650 }, { "epoch": 0.28828927644327446, "grad_norm": 2.0232999324798584, "learning_rate": 3.7459268762026946e-06, "loss": 0.9144, "step": 23655 }, { "epoch": 0.2883502126674223, "grad_norm": 2.0831563472747803, "learning_rate": 3.7456061577934577e-06, "loss": 0.8496, "step": 23660 }, { "epoch": 0.2884111488915701, "grad_norm": 1.9007498025894165, "learning_rate": 3.7452854393842207e-06, "loss": 0.8769, "step": 23665 }, { "epoch": 0.28847208511571787, "grad_norm": 1.7399755716323853, "learning_rate": 3.7449647209749845e-06, "loss": 0.8372, "step": 23670 }, { "epoch": 0.2885330213398657, "grad_norm": 1.9477726221084595, "learning_rate": 3.7446440025657476e-06, "loss": 0.9164, "step": 23675 }, { "epoch": 0.2885939575640135, "grad_norm": 1.9545371532440186, "learning_rate": 3.744323284156511e-06, "loss": 0.903, "step": 23680 }, { "epoch": 0.28865489378816134, "grad_norm": 1.9360884428024292, "learning_rate": 3.744002565747274e-06, "loss": 0.8581, "step": 23685 }, { "epoch": 0.2887158300123091, "grad_norm": 2.0185070037841797, "learning_rate": 3.7436818473380375e-06, "loss": 0.9383, "step": 23690 }, { "epoch": 0.2887767662364569, "grad_norm": 1.9086709022521973, "learning_rate": 3.743361128928801e-06, "loss": 0.7795, "step": 23695 }, { "epoch": 0.28883770246060475, "grad_norm": 2.0998048782348633, "learning_rate": 3.743040410519564e-06, "loss": 0.9499, "step": 23700 }, { "epoch": 0.2888986386847525, "grad_norm": 1.7128019332885742, "learning_rate": 3.7427196921103278e-06, "loss": 0.8955, "step": 23705 }, { "epoch": 0.28895957490890034, "grad_norm": 2.1789515018463135, "learning_rate": 3.742398973701091e-06, "loss": 0.9317, "step": 23710 }, { "epoch": 0.28902051113304816, "grad_norm": 1.9184530973434448, "learning_rate": 3.742078255291854e-06, "loss": 0.8289, "step": 23715 }, { "epoch": 0.289081447357196, "grad_norm": 2.1342945098876953, "learning_rate": 3.7417575368826177e-06, "loss": 0.8899, "step": 23720 }, { "epoch": 0.28914238358134375, "grad_norm": 1.855316400527954, "learning_rate": 3.7414368184733807e-06, "loss": 0.871, "step": 23725 }, { "epoch": 0.2892033198054916, "grad_norm": 2.167605400085449, "learning_rate": 3.7411161000641437e-06, "loss": 0.8436, "step": 23730 }, { "epoch": 0.2892642560296394, "grad_norm": 2.0404160022735596, "learning_rate": 3.7407953816549076e-06, "loss": 0.8341, "step": 23735 }, { "epoch": 0.28932519225378717, "grad_norm": 2.0160183906555176, "learning_rate": 3.7404746632456706e-06, "loss": 0.8703, "step": 23740 }, { "epoch": 0.289386128477935, "grad_norm": 1.8718327283859253, "learning_rate": 3.7401539448364336e-06, "loss": 0.8776, "step": 23745 }, { "epoch": 0.2894470647020828, "grad_norm": 1.6554638147354126, "learning_rate": 3.7398332264271975e-06, "loss": 0.9197, "step": 23750 }, { "epoch": 0.2895080009262306, "grad_norm": 2.2100727558135986, "learning_rate": 3.7395125080179605e-06, "loss": 0.8256, "step": 23755 }, { "epoch": 0.2895689371503784, "grad_norm": 1.5981903076171875, "learning_rate": 3.739191789608724e-06, "loss": 0.8353, "step": 23760 }, { "epoch": 0.2896298733745262, "grad_norm": 1.85274076461792, "learning_rate": 3.738871071199487e-06, "loss": 0.8178, "step": 23765 }, { "epoch": 0.28969080959867405, "grad_norm": 2.4368033409118652, "learning_rate": 3.7385503527902504e-06, "loss": 0.8469, "step": 23770 }, { "epoch": 0.2897517458228218, "grad_norm": 1.9342700242996216, "learning_rate": 3.738229634381014e-06, "loss": 0.9347, "step": 23775 }, { "epoch": 0.28981268204696964, "grad_norm": 1.8306779861450195, "learning_rate": 3.737908915971777e-06, "loss": 0.8278, "step": 23780 }, { "epoch": 0.28987361827111746, "grad_norm": 2.1056201457977295, "learning_rate": 3.7375881975625407e-06, "loss": 0.8751, "step": 23785 }, { "epoch": 0.28993455449526523, "grad_norm": 2.1076300144195557, "learning_rate": 3.7372674791533037e-06, "loss": 0.9147, "step": 23790 }, { "epoch": 0.28999549071941305, "grad_norm": 2.0555942058563232, "learning_rate": 3.7369467607440667e-06, "loss": 0.9755, "step": 23795 }, { "epoch": 0.2900564269435609, "grad_norm": 2.031036615371704, "learning_rate": 3.7366260423348306e-06, "loss": 0.9086, "step": 23800 }, { "epoch": 0.2901173631677087, "grad_norm": 1.8519785404205322, "learning_rate": 3.7363053239255936e-06, "loss": 0.8592, "step": 23805 }, { "epoch": 0.29017829939185646, "grad_norm": 1.8070833683013916, "learning_rate": 3.7359846055163566e-06, "loss": 0.8766, "step": 23810 }, { "epoch": 0.2902392356160043, "grad_norm": 2.1392571926116943, "learning_rate": 3.7356638871071205e-06, "loss": 0.8809, "step": 23815 }, { "epoch": 0.2903001718401521, "grad_norm": 1.9215087890625, "learning_rate": 3.7353431686978835e-06, "loss": 0.8445, "step": 23820 }, { "epoch": 0.2903611080642999, "grad_norm": 1.8264415264129639, "learning_rate": 3.7350224502886465e-06, "loss": 0.8708, "step": 23825 }, { "epoch": 0.2904220442884477, "grad_norm": 1.877798080444336, "learning_rate": 3.7347017318794104e-06, "loss": 0.9127, "step": 23830 }, { "epoch": 0.2904829805125955, "grad_norm": 2.0072550773620605, "learning_rate": 3.7343810134701734e-06, "loss": 0.8408, "step": 23835 }, { "epoch": 0.29054391673674335, "grad_norm": 1.9116266965866089, "learning_rate": 3.734060295060937e-06, "loss": 0.7749, "step": 23840 }, { "epoch": 0.2906048529608911, "grad_norm": 1.654213547706604, "learning_rate": 3.7337395766517e-06, "loss": 0.8935, "step": 23845 }, { "epoch": 0.29066578918503894, "grad_norm": 1.7814511060714722, "learning_rate": 3.7334188582424637e-06, "loss": 0.7763, "step": 23850 }, { "epoch": 0.29072672540918676, "grad_norm": 2.1272263526916504, "learning_rate": 3.7330981398332267e-06, "loss": 0.8831, "step": 23855 }, { "epoch": 0.2907876616333345, "grad_norm": 1.5814588069915771, "learning_rate": 3.7327774214239898e-06, "loss": 0.845, "step": 23860 }, { "epoch": 0.29084859785748235, "grad_norm": 1.6525483131408691, "learning_rate": 3.7324567030147536e-06, "loss": 0.8914, "step": 23865 }, { "epoch": 0.29090953408163017, "grad_norm": 1.9072576761245728, "learning_rate": 3.7321359846055166e-06, "loss": 0.8459, "step": 23870 }, { "epoch": 0.290970470305778, "grad_norm": 2.2148425579071045, "learning_rate": 3.7318152661962797e-06, "loss": 0.8683, "step": 23875 }, { "epoch": 0.29103140652992576, "grad_norm": 1.9408347606658936, "learning_rate": 3.7314945477870435e-06, "loss": 0.8543, "step": 23880 }, { "epoch": 0.2910923427540736, "grad_norm": 1.6281036138534546, "learning_rate": 3.7311738293778065e-06, "loss": 0.902, "step": 23885 }, { "epoch": 0.2911532789782214, "grad_norm": 2.0305302143096924, "learning_rate": 3.7308531109685696e-06, "loss": 0.9092, "step": 23890 }, { "epoch": 0.2912142152023692, "grad_norm": 1.7619951963424683, "learning_rate": 3.7305323925593334e-06, "loss": 0.9099, "step": 23895 }, { "epoch": 0.291275151426517, "grad_norm": 1.9560236930847168, "learning_rate": 3.7302116741500964e-06, "loss": 0.8268, "step": 23900 }, { "epoch": 0.2913360876506648, "grad_norm": 1.8983116149902344, "learning_rate": 3.72989095574086e-06, "loss": 0.8516, "step": 23905 }, { "epoch": 0.29139702387481264, "grad_norm": 2.0367462635040283, "learning_rate": 3.7295702373316233e-06, "loss": 0.8879, "step": 23910 }, { "epoch": 0.2914579600989604, "grad_norm": 1.9438921213150024, "learning_rate": 3.7292495189223863e-06, "loss": 0.8977, "step": 23915 }, { "epoch": 0.29151889632310823, "grad_norm": 1.990500807762146, "learning_rate": 3.7289288005131498e-06, "loss": 0.8648, "step": 23920 }, { "epoch": 0.29157983254725606, "grad_norm": 1.829709768295288, "learning_rate": 3.7286080821039132e-06, "loss": 0.9258, "step": 23925 }, { "epoch": 0.2916407687714038, "grad_norm": 2.028078079223633, "learning_rate": 3.7282873636946767e-06, "loss": 0.874, "step": 23930 }, { "epoch": 0.29170170499555165, "grad_norm": 2.3410685062408447, "learning_rate": 3.7279666452854397e-06, "loss": 0.8639, "step": 23935 }, { "epoch": 0.29176264121969947, "grad_norm": 1.8219773769378662, "learning_rate": 3.7276459268762027e-06, "loss": 0.8357, "step": 23940 }, { "epoch": 0.2918235774438473, "grad_norm": 1.998922348022461, "learning_rate": 3.7273252084669666e-06, "loss": 0.8696, "step": 23945 }, { "epoch": 0.29188451366799506, "grad_norm": 2.246788740158081, "learning_rate": 3.7270044900577296e-06, "loss": 0.9272, "step": 23950 }, { "epoch": 0.2919454498921429, "grad_norm": 2.348254442214966, "learning_rate": 3.7266837716484926e-06, "loss": 0.8413, "step": 23955 }, { "epoch": 0.2920063861162907, "grad_norm": 1.91172194480896, "learning_rate": 3.7263630532392565e-06, "loss": 0.8947, "step": 23960 }, { "epoch": 0.2920673223404385, "grad_norm": 1.9519436359405518, "learning_rate": 3.7260423348300195e-06, "loss": 0.8508, "step": 23965 }, { "epoch": 0.2921282585645863, "grad_norm": 2.7209179401397705, "learning_rate": 3.7257216164207825e-06, "loss": 0.8274, "step": 23970 }, { "epoch": 0.2921891947887341, "grad_norm": 2.0255637168884277, "learning_rate": 3.7254008980115464e-06, "loss": 0.8191, "step": 23975 }, { "epoch": 0.29225013101288194, "grad_norm": 1.9203038215637207, "learning_rate": 3.7250801796023094e-06, "loss": 0.8745, "step": 23980 }, { "epoch": 0.2923110672370297, "grad_norm": 2.4781107902526855, "learning_rate": 3.724759461193073e-06, "loss": 0.8466, "step": 23985 }, { "epoch": 0.29237200346117753, "grad_norm": 2.216747522354126, "learning_rate": 3.7244387427838362e-06, "loss": 0.8219, "step": 23990 }, { "epoch": 0.29243293968532535, "grad_norm": 1.8970342874526978, "learning_rate": 3.7241180243745993e-06, "loss": 0.9127, "step": 23995 }, { "epoch": 0.2924938759094731, "grad_norm": 2.296504020690918, "learning_rate": 3.7237973059653627e-06, "loss": 0.8773, "step": 24000 }, { "epoch": 0.29255481213362095, "grad_norm": 2.0149877071380615, "learning_rate": 3.723476587556126e-06, "loss": 0.8365, "step": 24005 }, { "epoch": 0.29261574835776877, "grad_norm": 2.2523341178894043, "learning_rate": 3.7231558691468896e-06, "loss": 0.8095, "step": 24010 }, { "epoch": 0.2926766845819166, "grad_norm": 2.2992422580718994, "learning_rate": 3.7228351507376526e-06, "loss": 0.8382, "step": 24015 }, { "epoch": 0.29273762080606436, "grad_norm": 1.903193473815918, "learning_rate": 3.7225144323284156e-06, "loss": 0.8858, "step": 24020 }, { "epoch": 0.2927985570302122, "grad_norm": 1.7657471895217896, "learning_rate": 3.7221937139191795e-06, "loss": 0.7771, "step": 24025 }, { "epoch": 0.29285949325436, "grad_norm": 1.902084231376648, "learning_rate": 3.7218729955099425e-06, "loss": 0.8583, "step": 24030 }, { "epoch": 0.29292042947850777, "grad_norm": 2.246018886566162, "learning_rate": 3.7215522771007055e-06, "loss": 0.8736, "step": 24035 }, { "epoch": 0.2929813657026556, "grad_norm": 1.677645206451416, "learning_rate": 3.7212315586914694e-06, "loss": 0.8649, "step": 24040 }, { "epoch": 0.2930423019268034, "grad_norm": 2.5654537677764893, "learning_rate": 3.7209108402822324e-06, "loss": 0.9285, "step": 24045 }, { "epoch": 0.29310323815095124, "grad_norm": 2.1786065101623535, "learning_rate": 3.7205901218729954e-06, "loss": 0.8409, "step": 24050 }, { "epoch": 0.293164174375099, "grad_norm": 2.2884862422943115, "learning_rate": 3.7202694034637593e-06, "loss": 0.9132, "step": 24055 }, { "epoch": 0.29322511059924683, "grad_norm": 1.8632811307907104, "learning_rate": 3.7199486850545223e-06, "loss": 0.8335, "step": 24060 }, { "epoch": 0.29328604682339465, "grad_norm": 1.8399640321731567, "learning_rate": 3.7196279666452857e-06, "loss": 0.801, "step": 24065 }, { "epoch": 0.2933469830475424, "grad_norm": 2.1152892112731934, "learning_rate": 3.719307248236049e-06, "loss": 0.8638, "step": 24070 }, { "epoch": 0.29340791927169024, "grad_norm": 1.9860060214996338, "learning_rate": 3.7189865298268126e-06, "loss": 0.9152, "step": 24075 }, { "epoch": 0.29346885549583807, "grad_norm": 2.270815134048462, "learning_rate": 3.7186658114175756e-06, "loss": 0.898, "step": 24080 }, { "epoch": 0.2935297917199859, "grad_norm": 1.9187593460083008, "learning_rate": 3.718345093008339e-06, "loss": 0.8483, "step": 24085 }, { "epoch": 0.29359072794413366, "grad_norm": 1.7642766237258911, "learning_rate": 3.7180243745991025e-06, "loss": 0.8813, "step": 24090 }, { "epoch": 0.2936516641682815, "grad_norm": 1.782196283340454, "learning_rate": 3.7177036561898655e-06, "loss": 0.8607, "step": 24095 }, { "epoch": 0.2937126003924293, "grad_norm": 1.8898533582687378, "learning_rate": 3.7173829377806285e-06, "loss": 0.8652, "step": 24100 }, { "epoch": 0.29377353661657707, "grad_norm": 1.7552568912506104, "learning_rate": 3.7170622193713924e-06, "loss": 0.8842, "step": 24105 }, { "epoch": 0.2938344728407249, "grad_norm": 1.8708094358444214, "learning_rate": 3.7167415009621554e-06, "loss": 0.8601, "step": 24110 }, { "epoch": 0.2938954090648727, "grad_norm": 2.0167431831359863, "learning_rate": 3.7164207825529184e-06, "loss": 0.8945, "step": 24115 }, { "epoch": 0.29395634528902054, "grad_norm": 2.3615317344665527, "learning_rate": 3.7161000641436823e-06, "loss": 0.9277, "step": 24120 }, { "epoch": 0.2940172815131683, "grad_norm": 2.063323497772217, "learning_rate": 3.7157793457344453e-06, "loss": 0.8216, "step": 24125 }, { "epoch": 0.2940782177373161, "grad_norm": 1.8995171785354614, "learning_rate": 3.7154586273252088e-06, "loss": 0.8083, "step": 24130 }, { "epoch": 0.29413915396146395, "grad_norm": 1.742525577545166, "learning_rate": 3.715137908915972e-06, "loss": 0.8655, "step": 24135 }, { "epoch": 0.2942000901856117, "grad_norm": 1.915773868560791, "learning_rate": 3.7148171905067352e-06, "loss": 0.8026, "step": 24140 }, { "epoch": 0.29426102640975954, "grad_norm": 1.8858970403671265, "learning_rate": 3.7144964720974987e-06, "loss": 0.8925, "step": 24145 }, { "epoch": 0.29432196263390736, "grad_norm": 1.9184173345565796, "learning_rate": 3.714175753688262e-06, "loss": 0.8201, "step": 24150 }, { "epoch": 0.2943828988580552, "grad_norm": 2.0030901432037354, "learning_rate": 3.7138550352790255e-06, "loss": 0.8466, "step": 24155 }, { "epoch": 0.29444383508220295, "grad_norm": 2.2004055976867676, "learning_rate": 3.7135343168697886e-06, "loss": 0.9025, "step": 24160 }, { "epoch": 0.2945047713063508, "grad_norm": 2.076866388320923, "learning_rate": 3.713213598460552e-06, "loss": 0.8371, "step": 24165 }, { "epoch": 0.2945657075304986, "grad_norm": 1.8412386178970337, "learning_rate": 3.7128928800513154e-06, "loss": 0.9488, "step": 24170 }, { "epoch": 0.29462664375464637, "grad_norm": 1.8391180038452148, "learning_rate": 3.7125721616420785e-06, "loss": 0.8548, "step": 24175 }, { "epoch": 0.2946875799787942, "grad_norm": 1.8545855283737183, "learning_rate": 3.7122514432328423e-06, "loss": 0.8427, "step": 24180 }, { "epoch": 0.294748516202942, "grad_norm": 2.5690839290618896, "learning_rate": 3.7119307248236053e-06, "loss": 0.8439, "step": 24185 }, { "epoch": 0.29480945242708984, "grad_norm": 2.028217077255249, "learning_rate": 3.7116100064143684e-06, "loss": 0.8361, "step": 24190 }, { "epoch": 0.2948703886512376, "grad_norm": 2.0921943187713623, "learning_rate": 3.7112892880051314e-06, "loss": 0.9365, "step": 24195 }, { "epoch": 0.2949313248753854, "grad_norm": 2.023336410522461, "learning_rate": 3.7109685695958952e-06, "loss": 0.8104, "step": 24200 }, { "epoch": 0.29499226109953325, "grad_norm": 2.0898501873016357, "learning_rate": 3.7106478511866583e-06, "loss": 0.9505, "step": 24205 }, { "epoch": 0.295053197323681, "grad_norm": 1.8070785999298096, "learning_rate": 3.7103271327774217e-06, "loss": 0.8287, "step": 24210 }, { "epoch": 0.29511413354782884, "grad_norm": 2.2118799686431885, "learning_rate": 3.710006414368185e-06, "loss": 0.874, "step": 24215 }, { "epoch": 0.29517506977197666, "grad_norm": 2.1052134037017822, "learning_rate": 3.709685695958948e-06, "loss": 0.837, "step": 24220 }, { "epoch": 0.29523600599612443, "grad_norm": 1.8779630661010742, "learning_rate": 3.7093649775497116e-06, "loss": 0.8391, "step": 24225 }, { "epoch": 0.29529694222027225, "grad_norm": 1.8587168455123901, "learning_rate": 3.709044259140475e-06, "loss": 0.871, "step": 24230 }, { "epoch": 0.2953578784444201, "grad_norm": 1.9119724035263062, "learning_rate": 3.7087235407312385e-06, "loss": 0.867, "step": 24235 }, { "epoch": 0.2954188146685679, "grad_norm": 2.2185075283050537, "learning_rate": 3.7084028223220015e-06, "loss": 0.8703, "step": 24240 }, { "epoch": 0.29547975089271566, "grad_norm": 1.9357861280441284, "learning_rate": 3.708082103912765e-06, "loss": 0.9, "step": 24245 }, { "epoch": 0.2955406871168635, "grad_norm": 1.8974497318267822, "learning_rate": 3.7077613855035284e-06, "loss": 0.8632, "step": 24250 }, { "epoch": 0.2956016233410113, "grad_norm": 1.7367795705795288, "learning_rate": 3.7074406670942914e-06, "loss": 0.8363, "step": 24255 }, { "epoch": 0.2956625595651591, "grad_norm": 2.3174800872802734, "learning_rate": 3.7071199486850552e-06, "loss": 0.9087, "step": 24260 }, { "epoch": 0.2957234957893069, "grad_norm": 1.919482946395874, "learning_rate": 3.7067992302758183e-06, "loss": 0.8754, "step": 24265 }, { "epoch": 0.2957844320134547, "grad_norm": 1.6827903985977173, "learning_rate": 3.7064785118665813e-06, "loss": 0.8569, "step": 24270 }, { "epoch": 0.29584536823760255, "grad_norm": 1.9963982105255127, "learning_rate": 3.7061577934573443e-06, "loss": 0.9045, "step": 24275 }, { "epoch": 0.2959063044617503, "grad_norm": 2.3119328022003174, "learning_rate": 3.705837075048108e-06, "loss": 0.8762, "step": 24280 }, { "epoch": 0.29596724068589814, "grad_norm": 2.255119800567627, "learning_rate": 3.705516356638871e-06, "loss": 0.9158, "step": 24285 }, { "epoch": 0.29602817691004596, "grad_norm": 1.9392675161361694, "learning_rate": 3.7051956382296346e-06, "loss": 0.8502, "step": 24290 }, { "epoch": 0.2960891131341937, "grad_norm": 1.859373927116394, "learning_rate": 3.704874919820398e-06, "loss": 0.9098, "step": 24295 }, { "epoch": 0.29615004935834155, "grad_norm": 2.5378341674804688, "learning_rate": 3.704554201411161e-06, "loss": 0.8908, "step": 24300 }, { "epoch": 0.2962109855824894, "grad_norm": 2.3425824642181396, "learning_rate": 3.7042334830019245e-06, "loss": 0.835, "step": 24305 }, { "epoch": 0.2962719218066372, "grad_norm": 1.7781848907470703, "learning_rate": 3.703912764592688e-06, "loss": 0.8254, "step": 24310 }, { "epoch": 0.29633285803078496, "grad_norm": 1.8686397075653076, "learning_rate": 3.7035920461834514e-06, "loss": 0.8327, "step": 24315 }, { "epoch": 0.2963937942549328, "grad_norm": 2.062108039855957, "learning_rate": 3.7032713277742144e-06, "loss": 0.8639, "step": 24320 }, { "epoch": 0.2964547304790806, "grad_norm": 2.056501626968384, "learning_rate": 3.7029506093649783e-06, "loss": 0.8952, "step": 24325 }, { "epoch": 0.2965156667032284, "grad_norm": 1.719427466392517, "learning_rate": 3.7026298909557413e-06, "loss": 0.8888, "step": 24330 }, { "epoch": 0.2965766029273762, "grad_norm": 2.278461217880249, "learning_rate": 3.7023091725465043e-06, "loss": 0.9369, "step": 24335 }, { "epoch": 0.296637539151524, "grad_norm": 2.035759210586548, "learning_rate": 3.701988454137268e-06, "loss": 0.809, "step": 24340 }, { "epoch": 0.29669847537567184, "grad_norm": 1.8525521755218506, "learning_rate": 3.701667735728031e-06, "loss": 0.9482, "step": 24345 }, { "epoch": 0.2967594115998196, "grad_norm": 2.3280370235443115, "learning_rate": 3.701347017318794e-06, "loss": 0.8962, "step": 24350 }, { "epoch": 0.29682034782396743, "grad_norm": 1.9143162965774536, "learning_rate": 3.7010262989095576e-06, "loss": 0.902, "step": 24355 }, { "epoch": 0.29688128404811526, "grad_norm": 2.197209358215332, "learning_rate": 3.700705580500321e-06, "loss": 0.8935, "step": 24360 }, { "epoch": 0.296942220272263, "grad_norm": 1.8822189569473267, "learning_rate": 3.700384862091084e-06, "loss": 0.8514, "step": 24365 }, { "epoch": 0.29700315649641085, "grad_norm": 1.844764232635498, "learning_rate": 3.7000641436818475e-06, "loss": 0.7913, "step": 24370 }, { "epoch": 0.29706409272055867, "grad_norm": 1.6713440418243408, "learning_rate": 3.699743425272611e-06, "loss": 0.877, "step": 24375 }, { "epoch": 0.2971250289447065, "grad_norm": 1.6634305715560913, "learning_rate": 3.6994227068633744e-06, "loss": 0.8573, "step": 24380 }, { "epoch": 0.29718596516885426, "grad_norm": 1.998214840888977, "learning_rate": 3.6991019884541374e-06, "loss": 0.8676, "step": 24385 }, { "epoch": 0.2972469013930021, "grad_norm": 2.0978212356567383, "learning_rate": 3.698781270044901e-06, "loss": 0.8528, "step": 24390 }, { "epoch": 0.2973078376171499, "grad_norm": 1.8228795528411865, "learning_rate": 3.6984605516356643e-06, "loss": 0.8815, "step": 24395 }, { "epoch": 0.2973687738412977, "grad_norm": 2.1029715538024902, "learning_rate": 3.6981398332264273e-06, "loss": 0.9015, "step": 24400 }, { "epoch": 0.2974297100654455, "grad_norm": 2.173858165740967, "learning_rate": 3.697819114817191e-06, "loss": 0.9258, "step": 24405 }, { "epoch": 0.2974906462895933, "grad_norm": 1.8675683736801147, "learning_rate": 3.6974983964079542e-06, "loss": 0.7984, "step": 24410 }, { "epoch": 0.29755158251374114, "grad_norm": 2.0076019763946533, "learning_rate": 3.6971776779987172e-06, "loss": 0.8834, "step": 24415 }, { "epoch": 0.2976125187378889, "grad_norm": 1.8971086740493774, "learning_rate": 3.696856959589481e-06, "loss": 0.9354, "step": 24420 }, { "epoch": 0.29767345496203673, "grad_norm": 2.0504963397979736, "learning_rate": 3.696536241180244e-06, "loss": 0.8458, "step": 24425 }, { "epoch": 0.29773439118618455, "grad_norm": 2.1383891105651855, "learning_rate": 3.696215522771007e-06, "loss": 0.9266, "step": 24430 }, { "epoch": 0.2977953274103323, "grad_norm": 1.7583351135253906, "learning_rate": 3.6958948043617706e-06, "loss": 0.8225, "step": 24435 }, { "epoch": 0.29785626363448015, "grad_norm": 2.2715904712677, "learning_rate": 3.695574085952534e-06, "loss": 0.8226, "step": 24440 }, { "epoch": 0.29791719985862797, "grad_norm": 2.033353805541992, "learning_rate": 3.695253367543297e-06, "loss": 0.8669, "step": 24445 }, { "epoch": 0.2979781360827758, "grad_norm": 1.9294620752334595, "learning_rate": 3.6949326491340605e-06, "loss": 0.8801, "step": 24450 }, { "epoch": 0.29803907230692356, "grad_norm": 1.9923789501190186, "learning_rate": 3.694611930724824e-06, "loss": 0.8058, "step": 24455 }, { "epoch": 0.2981000085310714, "grad_norm": 1.7398399114608765, "learning_rate": 3.6942912123155874e-06, "loss": 0.8064, "step": 24460 }, { "epoch": 0.2981609447552192, "grad_norm": 2.0153024196624756, "learning_rate": 3.6939704939063504e-06, "loss": 0.8365, "step": 24465 }, { "epoch": 0.29822188097936697, "grad_norm": 1.8172744512557983, "learning_rate": 3.693649775497114e-06, "loss": 0.8315, "step": 24470 }, { "epoch": 0.2982828172035148, "grad_norm": 2.077547311782837, "learning_rate": 3.6933290570878773e-06, "loss": 0.8803, "step": 24475 }, { "epoch": 0.2983437534276626, "grad_norm": 1.7432384490966797, "learning_rate": 3.6930083386786403e-06, "loss": 0.8016, "step": 24480 }, { "epoch": 0.29840468965181044, "grad_norm": 2.0568766593933105, "learning_rate": 3.692687620269404e-06, "loss": 0.8622, "step": 24485 }, { "epoch": 0.2984656258759582, "grad_norm": 1.5647459030151367, "learning_rate": 3.692366901860167e-06, "loss": 0.826, "step": 24490 }, { "epoch": 0.29852656210010603, "grad_norm": 2.1513943672180176, "learning_rate": 3.69204618345093e-06, "loss": 0.8606, "step": 24495 }, { "epoch": 0.29858749832425385, "grad_norm": 2.420496940612793, "learning_rate": 3.691725465041694e-06, "loss": 0.8797, "step": 24500 }, { "epoch": 0.2986484345484016, "grad_norm": 2.1098926067352295, "learning_rate": 3.691404746632457e-06, "loss": 0.919, "step": 24505 }, { "epoch": 0.29870937077254944, "grad_norm": 2.2621748447418213, "learning_rate": 3.69108402822322e-06, "loss": 0.8711, "step": 24510 }, { "epoch": 0.29877030699669727, "grad_norm": 2.1596314907073975, "learning_rate": 3.690763309813984e-06, "loss": 0.8345, "step": 24515 }, { "epoch": 0.2988312432208451, "grad_norm": 1.9043703079223633, "learning_rate": 3.690442591404747e-06, "loss": 0.8615, "step": 24520 }, { "epoch": 0.29889217944499286, "grad_norm": 1.8281052112579346, "learning_rate": 3.69012187299551e-06, "loss": 0.821, "step": 24525 }, { "epoch": 0.2989531156691407, "grad_norm": 1.7893280982971191, "learning_rate": 3.6898011545862734e-06, "loss": 0.833, "step": 24530 }, { "epoch": 0.2990140518932885, "grad_norm": 2.2185380458831787, "learning_rate": 3.689480436177037e-06, "loss": 0.8972, "step": 24535 }, { "epoch": 0.29907498811743627, "grad_norm": 1.676682949066162, "learning_rate": 3.6891597177678003e-06, "loss": 0.8945, "step": 24540 }, { "epoch": 0.2991359243415841, "grad_norm": 1.9970453977584839, "learning_rate": 3.6888389993585633e-06, "loss": 0.8176, "step": 24545 }, { "epoch": 0.2991968605657319, "grad_norm": 1.8343161344528198, "learning_rate": 3.688518280949327e-06, "loss": 0.8483, "step": 24550 }, { "epoch": 0.29925779678987974, "grad_norm": 2.0298752784729004, "learning_rate": 3.68819756254009e-06, "loss": 0.8307, "step": 24555 }, { "epoch": 0.2993187330140275, "grad_norm": 1.9346998929977417, "learning_rate": 3.687876844130853e-06, "loss": 0.9603, "step": 24560 }, { "epoch": 0.2993796692381753, "grad_norm": 2.0515224933624268, "learning_rate": 3.687556125721617e-06, "loss": 0.8357, "step": 24565 }, { "epoch": 0.29944060546232315, "grad_norm": 2.1651947498321533, "learning_rate": 3.68723540731238e-06, "loss": 0.8531, "step": 24570 }, { "epoch": 0.2995015416864709, "grad_norm": 2.600261926651001, "learning_rate": 3.686914688903143e-06, "loss": 0.9303, "step": 24575 }, { "epoch": 0.29956247791061874, "grad_norm": 1.8789443969726562, "learning_rate": 3.686593970493907e-06, "loss": 0.8574, "step": 24580 }, { "epoch": 0.29962341413476656, "grad_norm": 2.2272701263427734, "learning_rate": 3.68627325208467e-06, "loss": 0.834, "step": 24585 }, { "epoch": 0.2996843503589144, "grad_norm": 2.191862106323242, "learning_rate": 3.685952533675433e-06, "loss": 0.8867, "step": 24590 }, { "epoch": 0.29974528658306215, "grad_norm": 2.1980926990509033, "learning_rate": 3.685631815266197e-06, "loss": 0.8083, "step": 24595 }, { "epoch": 0.29980622280721, "grad_norm": 1.8688896894454956, "learning_rate": 3.68531109685696e-06, "loss": 1.0229, "step": 24600 }, { "epoch": 0.2998671590313578, "grad_norm": 1.5964583158493042, "learning_rate": 3.6849903784477233e-06, "loss": 0.8461, "step": 24605 }, { "epoch": 0.29992809525550557, "grad_norm": 1.7993055582046509, "learning_rate": 3.6846696600384863e-06, "loss": 0.8476, "step": 24610 }, { "epoch": 0.2999890314796534, "grad_norm": 2.043304920196533, "learning_rate": 3.6843489416292498e-06, "loss": 0.9002, "step": 24615 }, { "epoch": 0.3000499677038012, "grad_norm": 1.8391680717468262, "learning_rate": 3.684028223220013e-06, "loss": 0.8849, "step": 24620 }, { "epoch": 0.30011090392794904, "grad_norm": 1.8348491191864014, "learning_rate": 3.6837075048107762e-06, "loss": 0.8376, "step": 24625 }, { "epoch": 0.3001718401520968, "grad_norm": 1.7929670810699463, "learning_rate": 3.68338678640154e-06, "loss": 0.8363, "step": 24630 }, { "epoch": 0.3002327763762446, "grad_norm": 1.9213210344314575, "learning_rate": 3.683066067992303e-06, "loss": 0.8035, "step": 24635 }, { "epoch": 0.30029371260039245, "grad_norm": 1.887681245803833, "learning_rate": 3.682745349583066e-06, "loss": 0.9255, "step": 24640 }, { "epoch": 0.3003546488245402, "grad_norm": 2.112656593322754, "learning_rate": 3.68242463117383e-06, "loss": 0.9168, "step": 24645 }, { "epoch": 0.30041558504868804, "grad_norm": 2.057220697402954, "learning_rate": 3.682103912764593e-06, "loss": 0.8661, "step": 24650 }, { "epoch": 0.30047652127283586, "grad_norm": 1.810321569442749, "learning_rate": 3.681783194355356e-06, "loss": 0.8067, "step": 24655 }, { "epoch": 0.3005374574969837, "grad_norm": 2.112459182739258, "learning_rate": 3.68146247594612e-06, "loss": 0.8576, "step": 24660 }, { "epoch": 0.30059839372113145, "grad_norm": 2.076251268386841, "learning_rate": 3.681141757536883e-06, "loss": 0.8989, "step": 24665 }, { "epoch": 0.3006593299452793, "grad_norm": 1.927994966506958, "learning_rate": 3.680821039127646e-06, "loss": 0.89, "step": 24670 }, { "epoch": 0.3007202661694271, "grad_norm": 1.8558566570281982, "learning_rate": 3.6805003207184098e-06, "loss": 0.8755, "step": 24675 }, { "epoch": 0.30078120239357486, "grad_norm": 1.9240827560424805, "learning_rate": 3.680179602309173e-06, "loss": 0.8156, "step": 24680 }, { "epoch": 0.3008421386177227, "grad_norm": 1.858333706855774, "learning_rate": 3.6798588838999362e-06, "loss": 0.8274, "step": 24685 }, { "epoch": 0.3009030748418705, "grad_norm": 1.9097352027893066, "learning_rate": 3.6795381654906993e-06, "loss": 0.8482, "step": 24690 }, { "epoch": 0.3009640110660183, "grad_norm": 1.9968947172164917, "learning_rate": 3.6792174470814627e-06, "loss": 0.8921, "step": 24695 }, { "epoch": 0.3010249472901661, "grad_norm": 1.9129656553268433, "learning_rate": 3.678896728672226e-06, "loss": 0.9034, "step": 24700 }, { "epoch": 0.3010858835143139, "grad_norm": 1.9316003322601318, "learning_rate": 3.678576010262989e-06, "loss": 0.8837, "step": 24705 }, { "epoch": 0.30114681973846175, "grad_norm": 2.0615034103393555, "learning_rate": 3.678255291853753e-06, "loss": 0.8706, "step": 24710 }, { "epoch": 0.3012077559626095, "grad_norm": 2.137611150741577, "learning_rate": 3.677934573444516e-06, "loss": 0.8859, "step": 24715 }, { "epoch": 0.30126869218675734, "grad_norm": 1.9255601167678833, "learning_rate": 3.677613855035279e-06, "loss": 0.8839, "step": 24720 }, { "epoch": 0.30132962841090516, "grad_norm": 1.6860311031341553, "learning_rate": 3.677293136626043e-06, "loss": 0.8351, "step": 24725 }, { "epoch": 0.3013905646350529, "grad_norm": 1.822352409362793, "learning_rate": 3.676972418216806e-06, "loss": 0.842, "step": 24730 }, { "epoch": 0.30145150085920075, "grad_norm": 1.7433327436447144, "learning_rate": 3.676651699807569e-06, "loss": 0.7844, "step": 24735 }, { "epoch": 0.3015124370833486, "grad_norm": 2.018357992172241, "learning_rate": 3.676330981398333e-06, "loss": 0.8617, "step": 24740 }, { "epoch": 0.3015733733074964, "grad_norm": 2.140676736831665, "learning_rate": 3.676010262989096e-06, "loss": 0.8551, "step": 24745 }, { "epoch": 0.30163430953164416, "grad_norm": 2.7596511840820312, "learning_rate": 3.675689544579859e-06, "loss": 0.827, "step": 24750 }, { "epoch": 0.301695245755792, "grad_norm": 2.1670820713043213, "learning_rate": 3.6753688261706227e-06, "loss": 0.8648, "step": 24755 }, { "epoch": 0.3017561819799398, "grad_norm": 2.1527228355407715, "learning_rate": 3.6750481077613857e-06, "loss": 0.8818, "step": 24760 }, { "epoch": 0.3018171182040876, "grad_norm": 2.0738189220428467, "learning_rate": 3.674727389352149e-06, "loss": 0.8994, "step": 24765 }, { "epoch": 0.3018780544282354, "grad_norm": 1.9565485715866089, "learning_rate": 3.6744066709429126e-06, "loss": 0.9264, "step": 24770 }, { "epoch": 0.3019389906523832, "grad_norm": 1.7850655317306519, "learning_rate": 3.674085952533676e-06, "loss": 0.7908, "step": 24775 }, { "epoch": 0.30199992687653104, "grad_norm": 1.8884210586547852, "learning_rate": 3.673765234124439e-06, "loss": 0.8596, "step": 24780 }, { "epoch": 0.3020608631006788, "grad_norm": 2.1388590335845947, "learning_rate": 3.673444515715202e-06, "loss": 0.9407, "step": 24785 }, { "epoch": 0.30212179932482663, "grad_norm": 1.9976035356521606, "learning_rate": 3.673123797305966e-06, "loss": 0.8609, "step": 24790 }, { "epoch": 0.30218273554897446, "grad_norm": 1.8686338663101196, "learning_rate": 3.672803078896729e-06, "loss": 0.8469, "step": 24795 }, { "epoch": 0.3022436717731222, "grad_norm": 1.7763526439666748, "learning_rate": 3.672482360487492e-06, "loss": 0.8147, "step": 24800 }, { "epoch": 0.30230460799727005, "grad_norm": 1.9442689418792725, "learning_rate": 3.672161642078256e-06, "loss": 0.8457, "step": 24805 }, { "epoch": 0.30236554422141787, "grad_norm": 1.911030650138855, "learning_rate": 3.671840923669019e-06, "loss": 0.8959, "step": 24810 }, { "epoch": 0.3024264804455657, "grad_norm": 1.9539273977279663, "learning_rate": 3.671520205259782e-06, "loss": 0.8752, "step": 24815 }, { "epoch": 0.30248741666971346, "grad_norm": 1.7716434001922607, "learning_rate": 3.6711994868505457e-06, "loss": 0.8869, "step": 24820 }, { "epoch": 0.3025483528938613, "grad_norm": 2.4870355129241943, "learning_rate": 3.6708787684413088e-06, "loss": 0.9097, "step": 24825 }, { "epoch": 0.3026092891180091, "grad_norm": 1.9640945196151733, "learning_rate": 3.670558050032072e-06, "loss": 0.8667, "step": 24830 }, { "epoch": 0.3026702253421569, "grad_norm": 1.756464958190918, "learning_rate": 3.6702373316228356e-06, "loss": 0.901, "step": 24835 }, { "epoch": 0.3027311615663047, "grad_norm": 1.846242070198059, "learning_rate": 3.6699166132135987e-06, "loss": 0.8503, "step": 24840 }, { "epoch": 0.3027920977904525, "grad_norm": 1.9435670375823975, "learning_rate": 3.669595894804362e-06, "loss": 0.8574, "step": 24845 }, { "epoch": 0.30285303401460034, "grad_norm": 1.7161117792129517, "learning_rate": 3.6692751763951255e-06, "loss": 0.8734, "step": 24850 }, { "epoch": 0.3029139702387481, "grad_norm": 2.047844409942627, "learning_rate": 3.668954457985889e-06, "loss": 0.8624, "step": 24855 }, { "epoch": 0.30297490646289593, "grad_norm": 2.624750852584839, "learning_rate": 3.668633739576652e-06, "loss": 0.8466, "step": 24860 }, { "epoch": 0.30303584268704375, "grad_norm": 2.0743892192840576, "learning_rate": 3.668313021167415e-06, "loss": 0.8932, "step": 24865 }, { "epoch": 0.3030967789111915, "grad_norm": 2.5677108764648438, "learning_rate": 3.667992302758179e-06, "loss": 0.9306, "step": 24870 }, { "epoch": 0.30315771513533935, "grad_norm": 1.8319019079208374, "learning_rate": 3.667671584348942e-06, "loss": 0.7755, "step": 24875 }, { "epoch": 0.30321865135948717, "grad_norm": 2.162083864212036, "learning_rate": 3.667350865939705e-06, "loss": 0.8447, "step": 24880 }, { "epoch": 0.303279587583635, "grad_norm": 1.8169505596160889, "learning_rate": 3.6670301475304688e-06, "loss": 0.8457, "step": 24885 }, { "epoch": 0.30334052380778276, "grad_norm": 1.9070227146148682, "learning_rate": 3.6667094291212318e-06, "loss": 0.82, "step": 24890 }, { "epoch": 0.3034014600319306, "grad_norm": 1.8155330419540405, "learning_rate": 3.666388710711995e-06, "loss": 0.8755, "step": 24895 }, { "epoch": 0.3034623962560784, "grad_norm": 2.516688823699951, "learning_rate": 3.6660679923027587e-06, "loss": 0.8335, "step": 24900 }, { "epoch": 0.30352333248022617, "grad_norm": 1.7205538749694824, "learning_rate": 3.6657472738935217e-06, "loss": 0.9333, "step": 24905 }, { "epoch": 0.303584268704374, "grad_norm": 2.143152952194214, "learning_rate": 3.665426555484285e-06, "loss": 0.9069, "step": 24910 }, { "epoch": 0.3036452049285218, "grad_norm": 2.005506992340088, "learning_rate": 3.6651058370750486e-06, "loss": 0.8403, "step": 24915 }, { "epoch": 0.30370614115266964, "grad_norm": 1.854246973991394, "learning_rate": 3.6647851186658116e-06, "loss": 0.8382, "step": 24920 }, { "epoch": 0.3037670773768174, "grad_norm": 1.8849436044692993, "learning_rate": 3.664464400256575e-06, "loss": 0.8594, "step": 24925 }, { "epoch": 0.30382801360096523, "grad_norm": 1.79323148727417, "learning_rate": 3.6641436818473385e-06, "loss": 0.9276, "step": 24930 }, { "epoch": 0.30388894982511305, "grad_norm": 1.9610075950622559, "learning_rate": 3.663822963438102e-06, "loss": 0.8961, "step": 24935 }, { "epoch": 0.3039498860492608, "grad_norm": 2.634528160095215, "learning_rate": 3.663502245028865e-06, "loss": 0.8432, "step": 24940 }, { "epoch": 0.30401082227340864, "grad_norm": 2.5668282508850098, "learning_rate": 3.663181526619628e-06, "loss": 0.8818, "step": 24945 }, { "epoch": 0.30407175849755647, "grad_norm": 2.155299663543701, "learning_rate": 3.662860808210392e-06, "loss": 0.8623, "step": 24950 }, { "epoch": 0.3041326947217043, "grad_norm": 1.8244266510009766, "learning_rate": 3.662540089801155e-06, "loss": 0.8478, "step": 24955 }, { "epoch": 0.30419363094585206, "grad_norm": 1.9327342510223389, "learning_rate": 3.662219371391918e-06, "loss": 0.876, "step": 24960 }, { "epoch": 0.3042545671699999, "grad_norm": 2.100289821624756, "learning_rate": 3.6618986529826817e-06, "loss": 0.8527, "step": 24965 }, { "epoch": 0.3043155033941477, "grad_norm": 1.9001798629760742, "learning_rate": 3.6615779345734447e-06, "loss": 0.7934, "step": 24970 }, { "epoch": 0.30437643961829547, "grad_norm": 1.7251745462417603, "learning_rate": 3.6612572161642077e-06, "loss": 0.8909, "step": 24975 }, { "epoch": 0.3044373758424433, "grad_norm": 2.0096945762634277, "learning_rate": 3.6609364977549716e-06, "loss": 0.8271, "step": 24980 }, { "epoch": 0.3044983120665911, "grad_norm": 2.1046156883239746, "learning_rate": 3.6606157793457346e-06, "loss": 0.8425, "step": 24985 }, { "epoch": 0.30455924829073894, "grad_norm": 1.9242520332336426, "learning_rate": 3.660295060936498e-06, "loss": 0.8063, "step": 24990 }, { "epoch": 0.3046201845148867, "grad_norm": 1.9714354276657104, "learning_rate": 3.6599743425272615e-06, "loss": 0.9151, "step": 24995 }, { "epoch": 0.3046811207390345, "grad_norm": 2.0653457641601562, "learning_rate": 3.6596536241180245e-06, "loss": 0.9347, "step": 25000 }, { "epoch": 0.30474205696318235, "grad_norm": 2.0792384147644043, "learning_rate": 3.659332905708788e-06, "loss": 0.8705, "step": 25005 }, { "epoch": 0.3048029931873301, "grad_norm": 2.118990659713745, "learning_rate": 3.6590121872995514e-06, "loss": 0.8861, "step": 25010 }, { "epoch": 0.30486392941147794, "grad_norm": 1.7768837213516235, "learning_rate": 3.658691468890315e-06, "loss": 0.9132, "step": 25015 }, { "epoch": 0.30492486563562576, "grad_norm": 1.9335789680480957, "learning_rate": 3.658370750481078e-06, "loss": 0.8155, "step": 25020 }, { "epoch": 0.3049858018597736, "grad_norm": 1.6699918508529663, "learning_rate": 3.658050032071841e-06, "loss": 0.8756, "step": 25025 }, { "epoch": 0.30504673808392135, "grad_norm": 1.9161311388015747, "learning_rate": 3.6577293136626047e-06, "loss": 0.939, "step": 25030 }, { "epoch": 0.3051076743080692, "grad_norm": 1.9707361459732056, "learning_rate": 3.6574085952533677e-06, "loss": 0.8155, "step": 25035 }, { "epoch": 0.305168610532217, "grad_norm": 1.974718689918518, "learning_rate": 3.6570878768441308e-06, "loss": 0.7899, "step": 25040 }, { "epoch": 0.30522954675636477, "grad_norm": 1.7607587575912476, "learning_rate": 3.6567671584348946e-06, "loss": 0.9235, "step": 25045 }, { "epoch": 0.3052904829805126, "grad_norm": 1.9568760395050049, "learning_rate": 3.6564464400256576e-06, "loss": 0.8716, "step": 25050 }, { "epoch": 0.3053514192046604, "grad_norm": 1.758410930633545, "learning_rate": 3.656125721616421e-06, "loss": 0.8147, "step": 25055 }, { "epoch": 0.30541235542880824, "grad_norm": 1.7044768333435059, "learning_rate": 3.6558050032071845e-06, "loss": 0.9631, "step": 25060 }, { "epoch": 0.305473291652956, "grad_norm": 2.1522107124328613, "learning_rate": 3.6554842847979475e-06, "loss": 0.8754, "step": 25065 }, { "epoch": 0.3055342278771038, "grad_norm": 1.5614337921142578, "learning_rate": 3.655163566388711e-06, "loss": 0.8568, "step": 25070 }, { "epoch": 0.30559516410125165, "grad_norm": 2.0284056663513184, "learning_rate": 3.6548428479794744e-06, "loss": 0.9019, "step": 25075 }, { "epoch": 0.3056561003253994, "grad_norm": 1.6968562602996826, "learning_rate": 3.654522129570238e-06, "loss": 0.8952, "step": 25080 }, { "epoch": 0.30571703654954724, "grad_norm": 1.8934370279312134, "learning_rate": 3.654201411161001e-06, "loss": 0.9238, "step": 25085 }, { "epoch": 0.30577797277369506, "grad_norm": 1.7046165466308594, "learning_rate": 3.6538806927517643e-06, "loss": 0.8283, "step": 25090 }, { "epoch": 0.3058389089978429, "grad_norm": 2.355410575866699, "learning_rate": 3.6535599743425278e-06, "loss": 0.8769, "step": 25095 }, { "epoch": 0.30589984522199065, "grad_norm": 1.822818636894226, "learning_rate": 3.6532392559332908e-06, "loss": 0.8052, "step": 25100 }, { "epoch": 0.3059607814461385, "grad_norm": 1.7987377643585205, "learning_rate": 3.6529185375240546e-06, "loss": 0.8783, "step": 25105 }, { "epoch": 0.3060217176702863, "grad_norm": 1.7422692775726318, "learning_rate": 3.6525978191148177e-06, "loss": 0.881, "step": 25110 }, { "epoch": 0.30608265389443406, "grad_norm": 1.9337313175201416, "learning_rate": 3.6522771007055807e-06, "loss": 0.9411, "step": 25115 }, { "epoch": 0.3061435901185819, "grad_norm": 2.139897346496582, "learning_rate": 3.6519563822963437e-06, "loss": 0.8726, "step": 25120 }, { "epoch": 0.3062045263427297, "grad_norm": 2.058973789215088, "learning_rate": 3.6516356638871075e-06, "loss": 0.8672, "step": 25125 }, { "epoch": 0.30626546256687753, "grad_norm": 2.1563963890075684, "learning_rate": 3.6513149454778706e-06, "loss": 0.8797, "step": 25130 }, { "epoch": 0.3063263987910253, "grad_norm": 1.9912917613983154, "learning_rate": 3.650994227068634e-06, "loss": 0.865, "step": 25135 }, { "epoch": 0.3063873350151731, "grad_norm": 1.9657351970672607, "learning_rate": 3.6506735086593974e-06, "loss": 0.8867, "step": 25140 }, { "epoch": 0.30644827123932095, "grad_norm": 2.370980739593506, "learning_rate": 3.6503527902501605e-06, "loss": 0.8853, "step": 25145 }, { "epoch": 0.3065092074634687, "grad_norm": 1.8287549018859863, "learning_rate": 3.650032071840924e-06, "loss": 0.8847, "step": 25150 }, { "epoch": 0.30657014368761654, "grad_norm": 1.8504953384399414, "learning_rate": 3.6497113534316873e-06, "loss": 0.8744, "step": 25155 }, { "epoch": 0.30663107991176436, "grad_norm": 1.9467320442199707, "learning_rate": 3.6493906350224508e-06, "loss": 0.863, "step": 25160 }, { "epoch": 0.3066920161359121, "grad_norm": 2.181978225708008, "learning_rate": 3.649069916613214e-06, "loss": 0.8452, "step": 25165 }, { "epoch": 0.30675295236005995, "grad_norm": 2.1847786903381348, "learning_rate": 3.6487491982039772e-06, "loss": 0.916, "step": 25170 }, { "epoch": 0.3068138885842078, "grad_norm": 1.9135973453521729, "learning_rate": 3.6484284797947407e-06, "loss": 0.8976, "step": 25175 }, { "epoch": 0.3068748248083556, "grad_norm": 8.379800796508789, "learning_rate": 3.6481077613855037e-06, "loss": 0.8473, "step": 25180 }, { "epoch": 0.30693576103250336, "grad_norm": 1.8448950052261353, "learning_rate": 3.6477870429762676e-06, "loss": 0.8277, "step": 25185 }, { "epoch": 0.3069966972566512, "grad_norm": 1.8714042901992798, "learning_rate": 3.6474663245670306e-06, "loss": 0.8967, "step": 25190 }, { "epoch": 0.307057633480799, "grad_norm": 1.7674850225448608, "learning_rate": 3.6471456061577936e-06, "loss": 0.8782, "step": 25195 }, { "epoch": 0.3071185697049468, "grad_norm": 1.8882243633270264, "learning_rate": 3.6468248877485566e-06, "loss": 0.9299, "step": 25200 }, { "epoch": 0.3071795059290946, "grad_norm": 1.906673550605774, "learning_rate": 3.6465041693393205e-06, "loss": 0.9679, "step": 25205 }, { "epoch": 0.3072404421532424, "grad_norm": 1.9586330652236938, "learning_rate": 3.6461834509300835e-06, "loss": 0.9445, "step": 25210 }, { "epoch": 0.30730137837739024, "grad_norm": 2.054945230484009, "learning_rate": 3.645862732520847e-06, "loss": 0.8393, "step": 25215 }, { "epoch": 0.307362314601538, "grad_norm": 1.8230938911437988, "learning_rate": 3.6455420141116104e-06, "loss": 0.8633, "step": 25220 }, { "epoch": 0.30742325082568583, "grad_norm": 2.103778123855591, "learning_rate": 3.6452212957023734e-06, "loss": 0.9063, "step": 25225 }, { "epoch": 0.30748418704983366, "grad_norm": 2.0720419883728027, "learning_rate": 3.644900577293137e-06, "loss": 0.7967, "step": 25230 }, { "epoch": 0.3075451232739814, "grad_norm": 1.8767037391662598, "learning_rate": 3.6445798588839003e-06, "loss": 0.8547, "step": 25235 }, { "epoch": 0.30760605949812925, "grad_norm": 1.8956983089447021, "learning_rate": 3.6442591404746637e-06, "loss": 0.8202, "step": 25240 }, { "epoch": 0.30766699572227707, "grad_norm": 1.7963314056396484, "learning_rate": 3.6439384220654267e-06, "loss": 0.854, "step": 25245 }, { "epoch": 0.3077279319464249, "grad_norm": 2.1278891563415527, "learning_rate": 3.6436177036561906e-06, "loss": 0.835, "step": 25250 }, { "epoch": 0.30778886817057266, "grad_norm": 1.9877195358276367, "learning_rate": 3.6432969852469536e-06, "loss": 0.7955, "step": 25255 }, { "epoch": 0.3078498043947205, "grad_norm": 2.3938231468200684, "learning_rate": 3.6429762668377166e-06, "loss": 0.8272, "step": 25260 }, { "epoch": 0.3079107406188683, "grad_norm": 2.0244929790496826, "learning_rate": 3.6426555484284805e-06, "loss": 0.8619, "step": 25265 }, { "epoch": 0.3079716768430161, "grad_norm": 1.7337970733642578, "learning_rate": 3.6423348300192435e-06, "loss": 0.9138, "step": 25270 }, { "epoch": 0.3080326130671639, "grad_norm": 2.354499578475952, "learning_rate": 3.6420141116100065e-06, "loss": 0.8914, "step": 25275 }, { "epoch": 0.3080935492913117, "grad_norm": 1.8126943111419678, "learning_rate": 3.64169339320077e-06, "loss": 0.8886, "step": 25280 }, { "epoch": 0.30815448551545954, "grad_norm": 1.7072440385818481, "learning_rate": 3.6413726747915334e-06, "loss": 0.9026, "step": 25285 }, { "epoch": 0.3082154217396073, "grad_norm": 1.811719298362732, "learning_rate": 3.6410519563822964e-06, "loss": 0.8687, "step": 25290 }, { "epoch": 0.30827635796375513, "grad_norm": 1.6519933938980103, "learning_rate": 3.64073123797306e-06, "loss": 0.8421, "step": 25295 }, { "epoch": 0.30833729418790295, "grad_norm": 1.9526699781417847, "learning_rate": 3.6404105195638233e-06, "loss": 0.8665, "step": 25300 }, { "epoch": 0.3083982304120507, "grad_norm": 2.089184045791626, "learning_rate": 3.6400898011545867e-06, "loss": 0.814, "step": 25305 }, { "epoch": 0.30845916663619855, "grad_norm": 1.9177685976028442, "learning_rate": 3.6397690827453498e-06, "loss": 0.8382, "step": 25310 }, { "epoch": 0.30852010286034637, "grad_norm": 2.033195972442627, "learning_rate": 3.639448364336113e-06, "loss": 0.923, "step": 25315 }, { "epoch": 0.3085810390844942, "grad_norm": 2.0880496501922607, "learning_rate": 3.6391276459268766e-06, "loss": 0.8, "step": 25320 }, { "epoch": 0.30864197530864196, "grad_norm": 2.5327982902526855, "learning_rate": 3.6388069275176397e-06, "loss": 0.8099, "step": 25325 }, { "epoch": 0.3087029115327898, "grad_norm": 1.7438908815383911, "learning_rate": 3.6384862091084035e-06, "loss": 0.8734, "step": 25330 }, { "epoch": 0.3087638477569376, "grad_norm": 1.7239434719085693, "learning_rate": 3.6381654906991665e-06, "loss": 0.7853, "step": 25335 }, { "epoch": 0.30882478398108537, "grad_norm": 1.7888782024383545, "learning_rate": 3.6378447722899296e-06, "loss": 0.9298, "step": 25340 }, { "epoch": 0.3088857202052332, "grad_norm": 2.3322081565856934, "learning_rate": 3.6375240538806934e-06, "loss": 0.8138, "step": 25345 }, { "epoch": 0.308946656429381, "grad_norm": 1.8922991752624512, "learning_rate": 3.6372033354714564e-06, "loss": 0.8787, "step": 25350 }, { "epoch": 0.30900759265352884, "grad_norm": 1.6978033781051636, "learning_rate": 3.6368826170622195e-06, "loss": 0.8093, "step": 25355 }, { "epoch": 0.3090685288776766, "grad_norm": 1.9772071838378906, "learning_rate": 3.636561898652983e-06, "loss": 0.8533, "step": 25360 }, { "epoch": 0.30912946510182443, "grad_norm": 2.0447306632995605, "learning_rate": 3.6362411802437463e-06, "loss": 0.8596, "step": 25365 }, { "epoch": 0.30919040132597225, "grad_norm": 2.090587615966797, "learning_rate": 3.6359204618345093e-06, "loss": 0.8668, "step": 25370 }, { "epoch": 0.30925133755012, "grad_norm": 1.831165075302124, "learning_rate": 3.6355997434252728e-06, "loss": 0.8328, "step": 25375 }, { "epoch": 0.30931227377426784, "grad_norm": 1.93913996219635, "learning_rate": 3.6352790250160362e-06, "loss": 0.8774, "step": 25380 }, { "epoch": 0.30937320999841567, "grad_norm": 2.474440336227417, "learning_rate": 3.6349583066067997e-06, "loss": 0.822, "step": 25385 }, { "epoch": 0.3094341462225635, "grad_norm": 2.417266607284546, "learning_rate": 3.6346375881975627e-06, "loss": 0.885, "step": 25390 }, { "epoch": 0.30949508244671126, "grad_norm": 1.9749780893325806, "learning_rate": 3.634316869788326e-06, "loss": 0.8345, "step": 25395 }, { "epoch": 0.3095560186708591, "grad_norm": 1.7547552585601807, "learning_rate": 3.6339961513790896e-06, "loss": 0.7919, "step": 25400 }, { "epoch": 0.3096169548950069, "grad_norm": 1.9307494163513184, "learning_rate": 3.6336754329698526e-06, "loss": 0.8496, "step": 25405 }, { "epoch": 0.30967789111915467, "grad_norm": 2.071920871734619, "learning_rate": 3.6333547145606164e-06, "loss": 0.8149, "step": 25410 }, { "epoch": 0.3097388273433025, "grad_norm": 1.57239830493927, "learning_rate": 3.6330339961513795e-06, "loss": 0.8514, "step": 25415 }, { "epoch": 0.3097997635674503, "grad_norm": 1.9331142902374268, "learning_rate": 3.6327132777421425e-06, "loss": 0.8055, "step": 25420 }, { "epoch": 0.30986069979159814, "grad_norm": 1.87310791015625, "learning_rate": 3.6323925593329063e-06, "loss": 0.8955, "step": 25425 }, { "epoch": 0.3099216360157459, "grad_norm": 1.950252652168274, "learning_rate": 3.6320718409236694e-06, "loss": 0.9015, "step": 25430 }, { "epoch": 0.30998257223989373, "grad_norm": 1.8606098890304565, "learning_rate": 3.6317511225144324e-06, "loss": 0.848, "step": 25435 }, { "epoch": 0.31004350846404155, "grad_norm": 1.921400547027588, "learning_rate": 3.6314304041051962e-06, "loss": 0.8004, "step": 25440 }, { "epoch": 0.3101044446881893, "grad_norm": 1.9196151494979858, "learning_rate": 3.6311096856959593e-06, "loss": 0.8275, "step": 25445 }, { "epoch": 0.31016538091233714, "grad_norm": 2.192967653274536, "learning_rate": 3.6307889672867223e-06, "loss": 0.887, "step": 25450 }, { "epoch": 0.31022631713648496, "grad_norm": 2.179549217224121, "learning_rate": 3.6304682488774857e-06, "loss": 0.9072, "step": 25455 }, { "epoch": 0.3102872533606328, "grad_norm": 1.8516610860824585, "learning_rate": 3.630147530468249e-06, "loss": 0.8407, "step": 25460 }, { "epoch": 0.31034818958478055, "grad_norm": 1.8971844911575317, "learning_rate": 3.6298268120590126e-06, "loss": 0.9224, "step": 25465 }, { "epoch": 0.3104091258089284, "grad_norm": 2.566396951675415, "learning_rate": 3.6295060936497756e-06, "loss": 0.911, "step": 25470 }, { "epoch": 0.3104700620330762, "grad_norm": 1.9931278228759766, "learning_rate": 3.6291853752405395e-06, "loss": 0.8961, "step": 25475 }, { "epoch": 0.31053099825722397, "grad_norm": 2.068730592727661, "learning_rate": 3.6288646568313025e-06, "loss": 0.8459, "step": 25480 }, { "epoch": 0.3105919344813718, "grad_norm": 2.7833523750305176, "learning_rate": 3.6285439384220655e-06, "loss": 0.9067, "step": 25485 }, { "epoch": 0.3106528707055196, "grad_norm": 1.8688232898712158, "learning_rate": 3.6282232200128294e-06, "loss": 0.9623, "step": 25490 }, { "epoch": 0.31071380692966744, "grad_norm": 2.19064998626709, "learning_rate": 3.6279025016035924e-06, "loss": 0.8654, "step": 25495 }, { "epoch": 0.3107747431538152, "grad_norm": 2.1132822036743164, "learning_rate": 3.6275817831943554e-06, "loss": 0.9036, "step": 25500 }, { "epoch": 0.310835679377963, "grad_norm": 1.7420454025268555, "learning_rate": 3.6272610647851193e-06, "loss": 0.8315, "step": 25505 }, { "epoch": 0.31089661560211085, "grad_norm": 2.1249377727508545, "learning_rate": 3.6269403463758823e-06, "loss": 0.8853, "step": 25510 }, { "epoch": 0.3109575518262586, "grad_norm": 1.811956763267517, "learning_rate": 3.6266196279666453e-06, "loss": 0.8379, "step": 25515 }, { "epoch": 0.31101848805040644, "grad_norm": 1.8344182968139648, "learning_rate": 3.626298909557409e-06, "loss": 0.8333, "step": 25520 }, { "epoch": 0.31107942427455426, "grad_norm": 1.7817449569702148, "learning_rate": 3.625978191148172e-06, "loss": 0.8363, "step": 25525 }, { "epoch": 0.3111403604987021, "grad_norm": 2.04799747467041, "learning_rate": 3.6256574727389356e-06, "loss": 0.8672, "step": 25530 }, { "epoch": 0.31120129672284985, "grad_norm": 2.141572952270508, "learning_rate": 3.6253367543296986e-06, "loss": 0.9069, "step": 25535 }, { "epoch": 0.3112622329469977, "grad_norm": 2.2978086471557617, "learning_rate": 3.625016035920462e-06, "loss": 0.9904, "step": 25540 }, { "epoch": 0.3113231691711455, "grad_norm": 1.8039543628692627, "learning_rate": 3.6246953175112255e-06, "loss": 0.8655, "step": 25545 }, { "epoch": 0.31138410539529326, "grad_norm": 1.870607614517212, "learning_rate": 3.6243745991019885e-06, "loss": 0.9052, "step": 25550 }, { "epoch": 0.3114450416194411, "grad_norm": 2.893281936645508, "learning_rate": 3.6240538806927524e-06, "loss": 0.854, "step": 25555 }, { "epoch": 0.3115059778435889, "grad_norm": 1.7099697589874268, "learning_rate": 3.6237331622835154e-06, "loss": 0.8438, "step": 25560 }, { "epoch": 0.31156691406773673, "grad_norm": 1.915070652961731, "learning_rate": 3.6234124438742784e-06, "loss": 0.852, "step": 25565 }, { "epoch": 0.3116278502918845, "grad_norm": 2.059370756149292, "learning_rate": 3.6230917254650423e-06, "loss": 0.9055, "step": 25570 }, { "epoch": 0.3116887865160323, "grad_norm": 1.981943130493164, "learning_rate": 3.6227710070558053e-06, "loss": 0.8708, "step": 25575 }, { "epoch": 0.31174972274018015, "grad_norm": 2.1800336837768555, "learning_rate": 3.6224502886465683e-06, "loss": 0.8469, "step": 25580 }, { "epoch": 0.3118106589643279, "grad_norm": 2.0012996196746826, "learning_rate": 3.622129570237332e-06, "loss": 0.8859, "step": 25585 }, { "epoch": 0.31187159518847574, "grad_norm": 2.2590134143829346, "learning_rate": 3.6218088518280952e-06, "loss": 0.8217, "step": 25590 }, { "epoch": 0.31193253141262356, "grad_norm": 1.6168943643569946, "learning_rate": 3.6214881334188582e-06, "loss": 0.8813, "step": 25595 }, { "epoch": 0.3119934676367714, "grad_norm": 1.8794183731079102, "learning_rate": 3.621167415009622e-06, "loss": 0.8914, "step": 25600 }, { "epoch": 0.31205440386091915, "grad_norm": 2.4156267642974854, "learning_rate": 3.620846696600385e-06, "loss": 0.8338, "step": 25605 }, { "epoch": 0.312115340085067, "grad_norm": 1.8044424057006836, "learning_rate": 3.6205259781911486e-06, "loss": 0.8298, "step": 25610 }, { "epoch": 0.3121762763092148, "grad_norm": 1.9922618865966797, "learning_rate": 3.6202052597819116e-06, "loss": 0.8393, "step": 25615 }, { "epoch": 0.31223721253336256, "grad_norm": 1.9800806045532227, "learning_rate": 3.619884541372675e-06, "loss": 0.825, "step": 25620 }, { "epoch": 0.3122981487575104, "grad_norm": 2.11069393157959, "learning_rate": 3.6195638229634384e-06, "loss": 0.8494, "step": 25625 }, { "epoch": 0.3123590849816582, "grad_norm": 1.8757976293563843, "learning_rate": 3.6192431045542015e-06, "loss": 0.8487, "step": 25630 }, { "epoch": 0.31242002120580603, "grad_norm": 2.0946781635284424, "learning_rate": 3.6189223861449653e-06, "loss": 0.8913, "step": 25635 }, { "epoch": 0.3124809574299538, "grad_norm": 2.3179855346679688, "learning_rate": 3.6186016677357283e-06, "loss": 0.8102, "step": 25640 }, { "epoch": 0.3125418936541016, "grad_norm": 2.066011667251587, "learning_rate": 3.6182809493264914e-06, "loss": 0.872, "step": 25645 }, { "epoch": 0.31260282987824944, "grad_norm": 2.22489070892334, "learning_rate": 3.6179602309172552e-06, "loss": 0.8498, "step": 25650 }, { "epoch": 0.3126637661023972, "grad_norm": 2.113602638244629, "learning_rate": 3.6176395125080182e-06, "loss": 0.8676, "step": 25655 }, { "epoch": 0.31272470232654503, "grad_norm": 1.7631125450134277, "learning_rate": 3.6173187940987813e-06, "loss": 0.8828, "step": 25660 }, { "epoch": 0.31278563855069286, "grad_norm": 1.8897697925567627, "learning_rate": 3.616998075689545e-06, "loss": 0.8377, "step": 25665 }, { "epoch": 0.3128465747748406, "grad_norm": 2.1245195865631104, "learning_rate": 3.616677357280308e-06, "loss": 0.8481, "step": 25670 }, { "epoch": 0.31290751099898845, "grad_norm": 2.075774908065796, "learning_rate": 3.616356638871071e-06, "loss": 0.8948, "step": 25675 }, { "epoch": 0.31296844722313627, "grad_norm": 2.0452234745025635, "learning_rate": 3.616035920461835e-06, "loss": 0.8882, "step": 25680 }, { "epoch": 0.3130293834472841, "grad_norm": 2.1455516815185547, "learning_rate": 3.615715202052598e-06, "loss": 0.826, "step": 25685 }, { "epoch": 0.31309031967143186, "grad_norm": 1.8139104843139648, "learning_rate": 3.6153944836433615e-06, "loss": 0.8575, "step": 25690 }, { "epoch": 0.3131512558955797, "grad_norm": 1.8631013631820679, "learning_rate": 3.615073765234125e-06, "loss": 0.8935, "step": 25695 }, { "epoch": 0.3132121921197275, "grad_norm": 2.0265259742736816, "learning_rate": 3.614753046824888e-06, "loss": 0.8637, "step": 25700 }, { "epoch": 0.3132731283438753, "grad_norm": 1.809802532196045, "learning_rate": 3.6144323284156514e-06, "loss": 0.8997, "step": 25705 }, { "epoch": 0.3133340645680231, "grad_norm": 1.7759740352630615, "learning_rate": 3.6141116100064144e-06, "loss": 0.8788, "step": 25710 }, { "epoch": 0.3133950007921709, "grad_norm": 1.8296172618865967, "learning_rate": 3.6137908915971783e-06, "loss": 0.872, "step": 25715 }, { "epoch": 0.31345593701631874, "grad_norm": 1.8984787464141846, "learning_rate": 3.6134701731879413e-06, "loss": 0.8438, "step": 25720 }, { "epoch": 0.3135168732404665, "grad_norm": 2.1325607299804688, "learning_rate": 3.6131494547787043e-06, "loss": 0.8261, "step": 25725 }, { "epoch": 0.31357780946461433, "grad_norm": 1.8283288478851318, "learning_rate": 3.612828736369468e-06, "loss": 0.8416, "step": 25730 }, { "epoch": 0.31363874568876215, "grad_norm": 1.8729629516601562, "learning_rate": 3.612508017960231e-06, "loss": 0.9093, "step": 25735 }, { "epoch": 0.3136996819129099, "grad_norm": 1.8864506483078003, "learning_rate": 3.612187299550994e-06, "loss": 0.7833, "step": 25740 }, { "epoch": 0.31376061813705775, "grad_norm": 1.9959360361099243, "learning_rate": 3.611866581141758e-06, "loss": 0.8759, "step": 25745 }, { "epoch": 0.31382155436120557, "grad_norm": 1.9953715801239014, "learning_rate": 3.611545862732521e-06, "loss": 0.8882, "step": 25750 }, { "epoch": 0.3138824905853534, "grad_norm": 2.5185248851776123, "learning_rate": 3.6112251443232845e-06, "loss": 0.883, "step": 25755 }, { "epoch": 0.31394342680950116, "grad_norm": 2.080670118331909, "learning_rate": 3.610904425914048e-06, "loss": 0.925, "step": 25760 }, { "epoch": 0.314004363033649, "grad_norm": 1.7488375902175903, "learning_rate": 3.610583707504811e-06, "loss": 0.7856, "step": 25765 }, { "epoch": 0.3140652992577968, "grad_norm": 1.9903936386108398, "learning_rate": 3.6102629890955744e-06, "loss": 0.8904, "step": 25770 }, { "epoch": 0.31412623548194457, "grad_norm": 1.8720725774765015, "learning_rate": 3.609942270686338e-06, "loss": 0.8735, "step": 25775 }, { "epoch": 0.3141871717060924, "grad_norm": 2.1591596603393555, "learning_rate": 3.6096215522771013e-06, "loss": 0.866, "step": 25780 }, { "epoch": 0.3142481079302402, "grad_norm": 1.6823959350585938, "learning_rate": 3.6093008338678643e-06, "loss": 0.8706, "step": 25785 }, { "epoch": 0.31430904415438804, "grad_norm": 1.9348256587982178, "learning_rate": 3.6089801154586273e-06, "loss": 0.8533, "step": 25790 }, { "epoch": 0.3143699803785358, "grad_norm": 1.8669464588165283, "learning_rate": 3.608659397049391e-06, "loss": 0.9679, "step": 25795 }, { "epoch": 0.31443091660268363, "grad_norm": 1.816948652267456, "learning_rate": 3.608338678640154e-06, "loss": 0.8154, "step": 25800 }, { "epoch": 0.31449185282683145, "grad_norm": 1.8353643417358398, "learning_rate": 3.6080179602309172e-06, "loss": 0.8748, "step": 25805 }, { "epoch": 0.3145527890509792, "grad_norm": 2.160038471221924, "learning_rate": 3.607697241821681e-06, "loss": 0.8275, "step": 25810 }, { "epoch": 0.31461372527512704, "grad_norm": 2.1420440673828125, "learning_rate": 3.607376523412444e-06, "loss": 0.8836, "step": 25815 }, { "epoch": 0.31467466149927487, "grad_norm": 1.917969822883606, "learning_rate": 3.607055805003207e-06, "loss": 0.8247, "step": 25820 }, { "epoch": 0.3147355977234227, "grad_norm": 1.598429560661316, "learning_rate": 3.606735086593971e-06, "loss": 0.8879, "step": 25825 }, { "epoch": 0.31479653394757046, "grad_norm": 2.234377384185791, "learning_rate": 3.606414368184734e-06, "loss": 0.8641, "step": 25830 }, { "epoch": 0.3148574701717183, "grad_norm": 2.0920095443725586, "learning_rate": 3.6060936497754974e-06, "loss": 0.811, "step": 25835 }, { "epoch": 0.3149184063958661, "grad_norm": 1.9125553369522095, "learning_rate": 3.605772931366261e-06, "loss": 0.845, "step": 25840 }, { "epoch": 0.31497934262001387, "grad_norm": 1.9452506303787231, "learning_rate": 3.605452212957024e-06, "loss": 0.9183, "step": 25845 }, { "epoch": 0.3150402788441617, "grad_norm": 1.9515999555587769, "learning_rate": 3.6051314945477873e-06, "loss": 0.8463, "step": 25850 }, { "epoch": 0.3151012150683095, "grad_norm": 2.268519163131714, "learning_rate": 3.6048107761385508e-06, "loss": 0.8322, "step": 25855 }, { "epoch": 0.31516215129245734, "grad_norm": 2.0268216133117676, "learning_rate": 3.6044900577293142e-06, "loss": 0.9069, "step": 25860 }, { "epoch": 0.3152230875166051, "grad_norm": 1.9739516973495483, "learning_rate": 3.6041693393200772e-06, "loss": 0.9191, "step": 25865 }, { "epoch": 0.31528402374075293, "grad_norm": 2.0879931449890137, "learning_rate": 3.6038486209108402e-06, "loss": 0.9311, "step": 25870 }, { "epoch": 0.31534495996490075, "grad_norm": 1.8951250314712524, "learning_rate": 3.603527902501604e-06, "loss": 0.8919, "step": 25875 }, { "epoch": 0.3154058961890485, "grad_norm": 1.6998411417007446, "learning_rate": 3.603207184092367e-06, "loss": 0.8288, "step": 25880 }, { "epoch": 0.31546683241319634, "grad_norm": 2.2531135082244873, "learning_rate": 3.60288646568313e-06, "loss": 0.83, "step": 25885 }, { "epoch": 0.31552776863734416, "grad_norm": 2.036067247390747, "learning_rate": 3.602565747273894e-06, "loss": 0.8895, "step": 25890 }, { "epoch": 0.315588704861492, "grad_norm": 2.1249053478240967, "learning_rate": 3.602245028864657e-06, "loss": 0.8662, "step": 25895 }, { "epoch": 0.31564964108563975, "grad_norm": 1.9944590330123901, "learning_rate": 3.60192431045542e-06, "loss": 0.9047, "step": 25900 }, { "epoch": 0.3157105773097876, "grad_norm": 2.001779079437256, "learning_rate": 3.601603592046184e-06, "loss": 0.9759, "step": 25905 }, { "epoch": 0.3157715135339354, "grad_norm": 1.9043081998825073, "learning_rate": 3.601282873636947e-06, "loss": 0.8547, "step": 25910 }, { "epoch": 0.31583244975808317, "grad_norm": 1.8572955131530762, "learning_rate": 3.6009621552277104e-06, "loss": 0.8118, "step": 25915 }, { "epoch": 0.315893385982231, "grad_norm": 1.9489048719406128, "learning_rate": 3.600641436818474e-06, "loss": 0.9052, "step": 25920 }, { "epoch": 0.3159543222063788, "grad_norm": 2.330142021179199, "learning_rate": 3.600320718409237e-06, "loss": 0.8594, "step": 25925 }, { "epoch": 0.31601525843052664, "grad_norm": 2.0046181678771973, "learning_rate": 3.6000000000000003e-06, "loss": 0.9032, "step": 25930 }, { "epoch": 0.3160761946546744, "grad_norm": 2.1527292728424072, "learning_rate": 3.5996792815907637e-06, "loss": 0.8876, "step": 25935 }, { "epoch": 0.3161371308788222, "grad_norm": 1.8352248668670654, "learning_rate": 3.599358563181527e-06, "loss": 0.7943, "step": 25940 }, { "epoch": 0.31619806710297005, "grad_norm": 1.6859508752822876, "learning_rate": 3.59903784477229e-06, "loss": 0.8795, "step": 25945 }, { "epoch": 0.3162590033271178, "grad_norm": 1.99717116355896, "learning_rate": 3.598717126363053e-06, "loss": 0.8824, "step": 25950 }, { "epoch": 0.31631993955126564, "grad_norm": 2.093451499938965, "learning_rate": 3.598396407953817e-06, "loss": 0.822, "step": 25955 }, { "epoch": 0.31638087577541346, "grad_norm": 1.9769597053527832, "learning_rate": 3.59807568954458e-06, "loss": 0.8272, "step": 25960 }, { "epoch": 0.3164418119995613, "grad_norm": 1.9846092462539673, "learning_rate": 3.597754971135343e-06, "loss": 0.7707, "step": 25965 }, { "epoch": 0.31650274822370905, "grad_norm": 1.8829320669174194, "learning_rate": 3.597434252726107e-06, "loss": 0.9143, "step": 25970 }, { "epoch": 0.3165636844478569, "grad_norm": 1.6803061962127686, "learning_rate": 3.59711353431687e-06, "loss": 0.8963, "step": 25975 }, { "epoch": 0.3166246206720047, "grad_norm": 1.6456184387207031, "learning_rate": 3.5967928159076334e-06, "loss": 0.8304, "step": 25980 }, { "epoch": 0.31668555689615246, "grad_norm": 1.9785070419311523, "learning_rate": 3.596472097498397e-06, "loss": 0.815, "step": 25985 }, { "epoch": 0.3167464931203003, "grad_norm": 1.892943263053894, "learning_rate": 3.59615137908916e-06, "loss": 0.9055, "step": 25990 }, { "epoch": 0.3168074293444481, "grad_norm": 2.6316943168640137, "learning_rate": 3.5958306606799233e-06, "loss": 0.8607, "step": 25995 }, { "epoch": 0.31686836556859593, "grad_norm": 2.1478374004364014, "learning_rate": 3.5955099422706867e-06, "loss": 0.9671, "step": 26000 }, { "epoch": 0.3169293017927437, "grad_norm": 1.9756473302841187, "learning_rate": 3.59518922386145e-06, "loss": 0.8986, "step": 26005 }, { "epoch": 0.3169902380168915, "grad_norm": 2.053811550140381, "learning_rate": 3.594868505452213e-06, "loss": 0.8403, "step": 26010 }, { "epoch": 0.31705117424103935, "grad_norm": 2.023386240005493, "learning_rate": 3.5945477870429766e-06, "loss": 0.8766, "step": 26015 }, { "epoch": 0.3171121104651871, "grad_norm": 2.1543822288513184, "learning_rate": 3.59422706863374e-06, "loss": 0.9673, "step": 26020 }, { "epoch": 0.31717304668933494, "grad_norm": 2.4498701095581055, "learning_rate": 3.593906350224503e-06, "loss": 0.7989, "step": 26025 }, { "epoch": 0.31723398291348276, "grad_norm": 1.98358952999115, "learning_rate": 3.593585631815267e-06, "loss": 0.8901, "step": 26030 }, { "epoch": 0.3172949191376306, "grad_norm": 1.886533498764038, "learning_rate": 3.59326491340603e-06, "loss": 0.8447, "step": 26035 }, { "epoch": 0.31735585536177835, "grad_norm": 1.9879683256149292, "learning_rate": 3.592944194996793e-06, "loss": 0.8485, "step": 26040 }, { "epoch": 0.3174167915859262, "grad_norm": 1.906862735748291, "learning_rate": 3.592623476587556e-06, "loss": 0.962, "step": 26045 }, { "epoch": 0.317477727810074, "grad_norm": 2.094700574874878, "learning_rate": 3.59230275817832e-06, "loss": 0.8702, "step": 26050 }, { "epoch": 0.31753866403422176, "grad_norm": 1.9871046543121338, "learning_rate": 3.591982039769083e-06, "loss": 0.9399, "step": 26055 }, { "epoch": 0.3175996002583696, "grad_norm": 1.7691285610198975, "learning_rate": 3.5916613213598463e-06, "loss": 0.7895, "step": 26060 }, { "epoch": 0.3176605364825174, "grad_norm": 2.0401220321655273, "learning_rate": 3.5913406029506098e-06, "loss": 0.8825, "step": 26065 }, { "epoch": 0.31772147270666523, "grad_norm": 1.7333089113235474, "learning_rate": 3.5910198845413728e-06, "loss": 0.7733, "step": 26070 }, { "epoch": 0.317782408930813, "grad_norm": 1.9368813037872314, "learning_rate": 3.5906991661321362e-06, "loss": 0.9222, "step": 26075 }, { "epoch": 0.3178433451549608, "grad_norm": 1.8153796195983887, "learning_rate": 3.5903784477228997e-06, "loss": 0.885, "step": 26080 }, { "epoch": 0.31790428137910864, "grad_norm": 1.7358485460281372, "learning_rate": 3.590057729313663e-06, "loss": 0.8324, "step": 26085 }, { "epoch": 0.3179652176032564, "grad_norm": 1.8762050867080688, "learning_rate": 3.589737010904426e-06, "loss": 0.7998, "step": 26090 }, { "epoch": 0.31802615382740423, "grad_norm": 1.9550482034683228, "learning_rate": 3.5894162924951896e-06, "loss": 0.8748, "step": 26095 }, { "epoch": 0.31808709005155206, "grad_norm": 2.0579097270965576, "learning_rate": 3.589095574085953e-06, "loss": 0.8609, "step": 26100 }, { "epoch": 0.3181480262756999, "grad_norm": 1.9741101264953613, "learning_rate": 3.588774855676716e-06, "loss": 0.9036, "step": 26105 }, { "epoch": 0.31820896249984765, "grad_norm": 2.135707139968872, "learning_rate": 3.58845413726748e-06, "loss": 0.9192, "step": 26110 }, { "epoch": 0.31826989872399547, "grad_norm": 1.927607774734497, "learning_rate": 3.588133418858243e-06, "loss": 0.8552, "step": 26115 }, { "epoch": 0.3183308349481433, "grad_norm": 1.7820870876312256, "learning_rate": 3.587812700449006e-06, "loss": 0.8722, "step": 26120 }, { "epoch": 0.31839177117229106, "grad_norm": 2.0801572799682617, "learning_rate": 3.587491982039769e-06, "loss": 0.8938, "step": 26125 }, { "epoch": 0.3184527073964389, "grad_norm": 1.7098181247711182, "learning_rate": 3.587171263630533e-06, "loss": 0.8737, "step": 26130 }, { "epoch": 0.3185136436205867, "grad_norm": 1.7507562637329102, "learning_rate": 3.586850545221296e-06, "loss": 0.888, "step": 26135 }, { "epoch": 0.3185745798447345, "grad_norm": 1.7712864875793457, "learning_rate": 3.5865298268120592e-06, "loss": 0.9298, "step": 26140 }, { "epoch": 0.3186355160688823, "grad_norm": 1.630374789237976, "learning_rate": 3.5862091084028227e-06, "loss": 0.9055, "step": 26145 }, { "epoch": 0.3186964522930301, "grad_norm": 1.7389088869094849, "learning_rate": 3.5858883899935857e-06, "loss": 0.8424, "step": 26150 }, { "epoch": 0.31875738851717794, "grad_norm": 1.633696436882019, "learning_rate": 3.585567671584349e-06, "loss": 0.8214, "step": 26155 }, { "epoch": 0.3188183247413257, "grad_norm": 1.986846923828125, "learning_rate": 3.5852469531751126e-06, "loss": 0.7922, "step": 26160 }, { "epoch": 0.31887926096547353, "grad_norm": 1.8557424545288086, "learning_rate": 3.584926234765876e-06, "loss": 0.7492, "step": 26165 }, { "epoch": 0.31894019718962136, "grad_norm": 2.305999755859375, "learning_rate": 3.584605516356639e-06, "loss": 0.835, "step": 26170 }, { "epoch": 0.3190011334137691, "grad_norm": 2.0568158626556396, "learning_rate": 3.584284797947403e-06, "loss": 0.8316, "step": 26175 }, { "epoch": 0.31906206963791695, "grad_norm": 1.936313509941101, "learning_rate": 3.583964079538166e-06, "loss": 0.7927, "step": 26180 }, { "epoch": 0.31912300586206477, "grad_norm": 2.1719815731048584, "learning_rate": 3.583643361128929e-06, "loss": 0.9222, "step": 26185 }, { "epoch": 0.3191839420862126, "grad_norm": 2.038893461227417, "learning_rate": 3.583322642719693e-06, "loss": 0.9001, "step": 26190 }, { "epoch": 0.31924487831036036, "grad_norm": 2.49241304397583, "learning_rate": 3.583001924310456e-06, "loss": 0.9585, "step": 26195 }, { "epoch": 0.3193058145345082, "grad_norm": 2.1401915550231934, "learning_rate": 3.582681205901219e-06, "loss": 0.925, "step": 26200 }, { "epoch": 0.319366750758656, "grad_norm": 1.8419997692108154, "learning_rate": 3.582360487491982e-06, "loss": 0.8148, "step": 26205 }, { "epoch": 0.31942768698280377, "grad_norm": 1.6542906761169434, "learning_rate": 3.5820397690827457e-06, "loss": 0.8234, "step": 26210 }, { "epoch": 0.3194886232069516, "grad_norm": 1.9410667419433594, "learning_rate": 3.5817190506735087e-06, "loss": 0.7879, "step": 26215 }, { "epoch": 0.3195495594310994, "grad_norm": 1.9775114059448242, "learning_rate": 3.581398332264272e-06, "loss": 0.8529, "step": 26220 }, { "epoch": 0.31961049565524724, "grad_norm": 1.7639511823654175, "learning_rate": 3.5810776138550356e-06, "loss": 0.8293, "step": 26225 }, { "epoch": 0.319671431879395, "grad_norm": 1.6310497522354126, "learning_rate": 3.580756895445799e-06, "loss": 0.8468, "step": 26230 }, { "epoch": 0.31973236810354283, "grad_norm": 1.8660426139831543, "learning_rate": 3.580436177036562e-06, "loss": 0.8525, "step": 26235 }, { "epoch": 0.31979330432769065, "grad_norm": 1.8456165790557861, "learning_rate": 3.5801154586273255e-06, "loss": 0.8488, "step": 26240 }, { "epoch": 0.3198542405518384, "grad_norm": 1.8009364604949951, "learning_rate": 3.579794740218089e-06, "loss": 0.8222, "step": 26245 }, { "epoch": 0.31991517677598624, "grad_norm": 2.423140048980713, "learning_rate": 3.579474021808852e-06, "loss": 0.9078, "step": 26250 }, { "epoch": 0.31997611300013407, "grad_norm": 1.9027022123336792, "learning_rate": 3.579153303399616e-06, "loss": 0.8067, "step": 26255 }, { "epoch": 0.3200370492242819, "grad_norm": 1.7202359437942505, "learning_rate": 3.578832584990379e-06, "loss": 0.8279, "step": 26260 }, { "epoch": 0.32009798544842966, "grad_norm": 1.7769197225570679, "learning_rate": 3.578511866581142e-06, "loss": 0.8344, "step": 26265 }, { "epoch": 0.3201589216725775, "grad_norm": 2.0206947326660156, "learning_rate": 3.5781911481719057e-06, "loss": 0.8203, "step": 26270 }, { "epoch": 0.3202198578967253, "grad_norm": 2.024266481399536, "learning_rate": 3.5778704297626687e-06, "loss": 0.8957, "step": 26275 }, { "epoch": 0.32028079412087307, "grad_norm": 1.7039316892623901, "learning_rate": 3.5775497113534318e-06, "loss": 0.915, "step": 26280 }, { "epoch": 0.3203417303450209, "grad_norm": 1.9799346923828125, "learning_rate": 3.5772289929441956e-06, "loss": 0.8384, "step": 26285 }, { "epoch": 0.3204026665691687, "grad_norm": 1.8617807626724243, "learning_rate": 3.5769082745349586e-06, "loss": 0.8151, "step": 26290 }, { "epoch": 0.32046360279331654, "grad_norm": 1.8211872577667236, "learning_rate": 3.5765875561257217e-06, "loss": 0.8577, "step": 26295 }, { "epoch": 0.3205245390174643, "grad_norm": 2.030888557434082, "learning_rate": 3.576266837716485e-06, "loss": 0.8045, "step": 26300 }, { "epoch": 0.32058547524161213, "grad_norm": 1.8133351802825928, "learning_rate": 3.5759461193072485e-06, "loss": 0.8909, "step": 26305 }, { "epoch": 0.32064641146575995, "grad_norm": 1.900706171989441, "learning_rate": 3.575625400898012e-06, "loss": 0.8981, "step": 26310 }, { "epoch": 0.3207073476899077, "grad_norm": 1.795844316482544, "learning_rate": 3.575304682488775e-06, "loss": 0.8569, "step": 26315 }, { "epoch": 0.32076828391405554, "grad_norm": 1.8884971141815186, "learning_rate": 3.5749839640795384e-06, "loss": 0.9825, "step": 26320 }, { "epoch": 0.32082922013820336, "grad_norm": 1.8627355098724365, "learning_rate": 3.574663245670302e-06, "loss": 0.8291, "step": 26325 }, { "epoch": 0.3208901563623512, "grad_norm": 1.710575819015503, "learning_rate": 3.574342527261065e-06, "loss": 0.8873, "step": 26330 }, { "epoch": 0.32095109258649895, "grad_norm": 1.9783897399902344, "learning_rate": 3.5740218088518288e-06, "loss": 0.8121, "step": 26335 }, { "epoch": 0.3210120288106468, "grad_norm": 1.9518475532531738, "learning_rate": 3.5737010904425918e-06, "loss": 0.7774, "step": 26340 }, { "epoch": 0.3210729650347946, "grad_norm": 1.8242741823196411, "learning_rate": 3.573380372033355e-06, "loss": 0.8126, "step": 26345 }, { "epoch": 0.32113390125894237, "grad_norm": 1.9180068969726562, "learning_rate": 3.5730596536241187e-06, "loss": 0.9217, "step": 26350 }, { "epoch": 0.3211948374830902, "grad_norm": 1.7959368228912354, "learning_rate": 3.5727389352148817e-06, "loss": 0.8488, "step": 26355 }, { "epoch": 0.321255773707238, "grad_norm": 2.2654097080230713, "learning_rate": 3.5724182168056447e-06, "loss": 0.8612, "step": 26360 }, { "epoch": 0.32131670993138584, "grad_norm": 1.6926401853561401, "learning_rate": 3.5720974983964086e-06, "loss": 0.7735, "step": 26365 }, { "epoch": 0.3213776461555336, "grad_norm": 1.9064029455184937, "learning_rate": 3.5717767799871716e-06, "loss": 0.9487, "step": 26370 }, { "epoch": 0.3214385823796814, "grad_norm": 2.2480995655059814, "learning_rate": 3.5714560615779346e-06, "loss": 0.8417, "step": 26375 }, { "epoch": 0.32149951860382925, "grad_norm": 2.1268651485443115, "learning_rate": 3.571135343168698e-06, "loss": 0.8963, "step": 26380 }, { "epoch": 0.321560454827977, "grad_norm": 1.9292659759521484, "learning_rate": 3.5708146247594615e-06, "loss": 0.8682, "step": 26385 }, { "epoch": 0.32162139105212484, "grad_norm": 1.7702505588531494, "learning_rate": 3.570493906350225e-06, "loss": 0.8999, "step": 26390 }, { "epoch": 0.32168232727627266, "grad_norm": 1.9820857048034668, "learning_rate": 3.570173187940988e-06, "loss": 0.8552, "step": 26395 }, { "epoch": 0.3217432635004205, "grad_norm": 1.8193637132644653, "learning_rate": 3.5698524695317514e-06, "loss": 0.9196, "step": 26400 }, { "epoch": 0.32180419972456825, "grad_norm": 2.214259624481201, "learning_rate": 3.569531751122515e-06, "loss": 0.8255, "step": 26405 }, { "epoch": 0.3218651359487161, "grad_norm": 2.4923653602600098, "learning_rate": 3.569211032713278e-06, "loss": 0.8936, "step": 26410 }, { "epoch": 0.3219260721728639, "grad_norm": 1.7510935068130493, "learning_rate": 3.5688903143040417e-06, "loss": 0.9286, "step": 26415 }, { "epoch": 0.32198700839701166, "grad_norm": 1.8025193214416504, "learning_rate": 3.5685695958948047e-06, "loss": 0.792, "step": 26420 }, { "epoch": 0.3220479446211595, "grad_norm": 1.8917574882507324, "learning_rate": 3.5682488774855677e-06, "loss": 0.8609, "step": 26425 }, { "epoch": 0.3221088808453073, "grad_norm": 1.7788047790527344, "learning_rate": 3.5679281590763316e-06, "loss": 0.8007, "step": 26430 }, { "epoch": 0.32216981706945513, "grad_norm": 1.8966270685195923, "learning_rate": 3.5676074406670946e-06, "loss": 0.855, "step": 26435 }, { "epoch": 0.3222307532936029, "grad_norm": 2.191659450531006, "learning_rate": 3.5672867222578576e-06, "loss": 0.8743, "step": 26440 }, { "epoch": 0.3222916895177507, "grad_norm": 1.885606288909912, "learning_rate": 3.5669660038486215e-06, "loss": 0.8273, "step": 26445 }, { "epoch": 0.32235262574189855, "grad_norm": 2.1944665908813477, "learning_rate": 3.5666452854393845e-06, "loss": 0.8258, "step": 26450 }, { "epoch": 0.3224135619660463, "grad_norm": 2.283857583999634, "learning_rate": 3.566324567030148e-06, "loss": 0.8312, "step": 26455 }, { "epoch": 0.32247449819019414, "grad_norm": 1.8640347719192505, "learning_rate": 3.566003848620911e-06, "loss": 0.9211, "step": 26460 }, { "epoch": 0.32253543441434196, "grad_norm": 1.8682838678359985, "learning_rate": 3.5656831302116744e-06, "loss": 0.8655, "step": 26465 }, { "epoch": 0.3225963706384898, "grad_norm": 1.6830679178237915, "learning_rate": 3.565362411802438e-06, "loss": 0.9023, "step": 26470 }, { "epoch": 0.32265730686263755, "grad_norm": 1.872051477432251, "learning_rate": 3.565041693393201e-06, "loss": 0.7894, "step": 26475 }, { "epoch": 0.3227182430867854, "grad_norm": 1.9705758094787598, "learning_rate": 3.5647209749839647e-06, "loss": 0.8665, "step": 26480 }, { "epoch": 0.3227791793109332, "grad_norm": 1.9779874086380005, "learning_rate": 3.5644002565747277e-06, "loss": 0.8194, "step": 26485 }, { "epoch": 0.32284011553508096, "grad_norm": 1.8139444589614868, "learning_rate": 3.5640795381654908e-06, "loss": 0.8562, "step": 26490 }, { "epoch": 0.3229010517592288, "grad_norm": 2.018003225326538, "learning_rate": 3.5637588197562546e-06, "loss": 0.8869, "step": 26495 }, { "epoch": 0.3229619879833766, "grad_norm": 2.12896728515625, "learning_rate": 3.5634381013470176e-06, "loss": 0.8252, "step": 26500 }, { "epoch": 0.32302292420752443, "grad_norm": 1.7662088871002197, "learning_rate": 3.5631173829377806e-06, "loss": 0.8487, "step": 26505 }, { "epoch": 0.3230838604316722, "grad_norm": 1.869701623916626, "learning_rate": 3.5627966645285445e-06, "loss": 0.9231, "step": 26510 }, { "epoch": 0.32314479665582, "grad_norm": 1.9786080121994019, "learning_rate": 3.5624759461193075e-06, "loss": 0.7853, "step": 26515 }, { "epoch": 0.32320573287996784, "grad_norm": 2.2161645889282227, "learning_rate": 3.5621552277100705e-06, "loss": 0.9085, "step": 26520 }, { "epoch": 0.3232666691041156, "grad_norm": 1.6622711420059204, "learning_rate": 3.5618345093008344e-06, "loss": 0.867, "step": 26525 }, { "epoch": 0.32332760532826343, "grad_norm": 1.7486131191253662, "learning_rate": 3.5615137908915974e-06, "loss": 0.8702, "step": 26530 }, { "epoch": 0.32338854155241126, "grad_norm": 2.086071729660034, "learning_rate": 3.561193072482361e-06, "loss": 0.823, "step": 26535 }, { "epoch": 0.3234494777765591, "grad_norm": 2.1297569274902344, "learning_rate": 3.560872354073124e-06, "loss": 0.9034, "step": 26540 }, { "epoch": 0.32351041400070685, "grad_norm": 1.7750080823898315, "learning_rate": 3.5605516356638873e-06, "loss": 0.8166, "step": 26545 }, { "epoch": 0.32357135022485467, "grad_norm": 1.8290857076644897, "learning_rate": 3.5602309172546508e-06, "loss": 0.9311, "step": 26550 }, { "epoch": 0.3236322864490025, "grad_norm": 2.2211945056915283, "learning_rate": 3.5599101988454138e-06, "loss": 0.8616, "step": 26555 }, { "epoch": 0.32369322267315026, "grad_norm": 1.7787442207336426, "learning_rate": 3.5595894804361776e-06, "loss": 0.8472, "step": 26560 }, { "epoch": 0.3237541588972981, "grad_norm": 1.8229657411575317, "learning_rate": 3.5592687620269407e-06, "loss": 0.9078, "step": 26565 }, { "epoch": 0.3238150951214459, "grad_norm": 1.8377244472503662, "learning_rate": 3.5589480436177037e-06, "loss": 0.8318, "step": 26570 }, { "epoch": 0.32387603134559373, "grad_norm": 2.122779369354248, "learning_rate": 3.5586273252084675e-06, "loss": 0.8322, "step": 26575 }, { "epoch": 0.3239369675697415, "grad_norm": 1.6991190910339355, "learning_rate": 3.5583066067992306e-06, "loss": 0.8178, "step": 26580 }, { "epoch": 0.3239979037938893, "grad_norm": 2.3232672214508057, "learning_rate": 3.5579858883899936e-06, "loss": 0.8599, "step": 26585 }, { "epoch": 0.32405884001803714, "grad_norm": 2.2331740856170654, "learning_rate": 3.5576651699807574e-06, "loss": 0.8816, "step": 26590 }, { "epoch": 0.3241197762421849, "grad_norm": 1.821357250213623, "learning_rate": 3.5573444515715205e-06, "loss": 0.8441, "step": 26595 }, { "epoch": 0.32418071246633273, "grad_norm": 2.090714693069458, "learning_rate": 3.5570237331622835e-06, "loss": 0.8722, "step": 26600 }, { "epoch": 0.32424164869048056, "grad_norm": 1.9416799545288086, "learning_rate": 3.5567030147530473e-06, "loss": 0.8757, "step": 26605 }, { "epoch": 0.3243025849146283, "grad_norm": 1.872280478477478, "learning_rate": 3.5563822963438104e-06, "loss": 0.8009, "step": 26610 }, { "epoch": 0.32436352113877615, "grad_norm": 1.984824776649475, "learning_rate": 3.556061577934574e-06, "loss": 0.847, "step": 26615 }, { "epoch": 0.32442445736292397, "grad_norm": 2.1277036666870117, "learning_rate": 3.5557408595253372e-06, "loss": 0.8564, "step": 26620 }, { "epoch": 0.3244853935870718, "grad_norm": 2.318960666656494, "learning_rate": 3.5554201411161003e-06, "loss": 0.9771, "step": 26625 }, { "epoch": 0.32454632981121956, "grad_norm": 1.8789989948272705, "learning_rate": 3.5550994227068637e-06, "loss": 0.8369, "step": 26630 }, { "epoch": 0.3246072660353674, "grad_norm": 2.2022593021392822, "learning_rate": 3.5547787042976267e-06, "loss": 0.868, "step": 26635 }, { "epoch": 0.3246682022595152, "grad_norm": 1.777506709098816, "learning_rate": 3.5544579858883906e-06, "loss": 0.9003, "step": 26640 }, { "epoch": 0.32472913848366297, "grad_norm": 2.2907350063323975, "learning_rate": 3.5541372674791536e-06, "loss": 0.8575, "step": 26645 }, { "epoch": 0.3247900747078108, "grad_norm": 2.002918004989624, "learning_rate": 3.5538165490699166e-06, "loss": 0.9074, "step": 26650 }, { "epoch": 0.3248510109319586, "grad_norm": 1.9859892129898071, "learning_rate": 3.5534958306606805e-06, "loss": 0.8441, "step": 26655 }, { "epoch": 0.32491194715610644, "grad_norm": 1.806188941001892, "learning_rate": 3.5531751122514435e-06, "loss": 0.9025, "step": 26660 }, { "epoch": 0.3249728833802542, "grad_norm": 1.8640011548995972, "learning_rate": 3.5528543938422065e-06, "loss": 0.8125, "step": 26665 }, { "epoch": 0.32503381960440203, "grad_norm": 2.4426486492156982, "learning_rate": 3.5525336754329704e-06, "loss": 0.8962, "step": 26670 }, { "epoch": 0.32509475582854985, "grad_norm": 1.7992478609085083, "learning_rate": 3.5522129570237334e-06, "loss": 0.8427, "step": 26675 }, { "epoch": 0.3251556920526976, "grad_norm": 2.4357800483703613, "learning_rate": 3.551892238614497e-06, "loss": 0.9365, "step": 26680 }, { "epoch": 0.32521662827684544, "grad_norm": 2.32336163520813, "learning_rate": 3.5515715202052603e-06, "loss": 0.9127, "step": 26685 }, { "epoch": 0.32527756450099327, "grad_norm": 1.6061687469482422, "learning_rate": 3.5512508017960233e-06, "loss": 0.7993, "step": 26690 }, { "epoch": 0.3253385007251411, "grad_norm": 1.9741302728652954, "learning_rate": 3.5509300833867867e-06, "loss": 0.9532, "step": 26695 }, { "epoch": 0.32539943694928886, "grad_norm": 2.0769224166870117, "learning_rate": 3.55060936497755e-06, "loss": 0.855, "step": 26700 }, { "epoch": 0.3254603731734367, "grad_norm": 2.2372701168060303, "learning_rate": 3.5502886465683136e-06, "loss": 0.925, "step": 26705 }, { "epoch": 0.3255213093975845, "grad_norm": 1.9619953632354736, "learning_rate": 3.5499679281590766e-06, "loss": 0.858, "step": 26710 }, { "epoch": 0.32558224562173227, "grad_norm": 2.078798532485962, "learning_rate": 3.5496472097498396e-06, "loss": 0.8239, "step": 26715 }, { "epoch": 0.3256431818458801, "grad_norm": 1.744160771369934, "learning_rate": 3.5493264913406035e-06, "loss": 0.8253, "step": 26720 }, { "epoch": 0.3257041180700279, "grad_norm": 1.6108503341674805, "learning_rate": 3.5490057729313665e-06, "loss": 0.8582, "step": 26725 }, { "epoch": 0.32576505429417574, "grad_norm": 2.342060089111328, "learning_rate": 3.5486850545221295e-06, "loss": 0.873, "step": 26730 }, { "epoch": 0.3258259905183235, "grad_norm": 2.053663969039917, "learning_rate": 3.5483643361128934e-06, "loss": 0.856, "step": 26735 }, { "epoch": 0.32588692674247133, "grad_norm": 1.8851368427276611, "learning_rate": 3.5480436177036564e-06, "loss": 0.8607, "step": 26740 }, { "epoch": 0.32594786296661915, "grad_norm": 2.3088202476501465, "learning_rate": 3.5477228992944194e-06, "loss": 0.8466, "step": 26745 }, { "epoch": 0.3260087991907669, "grad_norm": 1.959994912147522, "learning_rate": 3.5474021808851833e-06, "loss": 0.8175, "step": 26750 }, { "epoch": 0.32606973541491474, "grad_norm": 1.8514608144760132, "learning_rate": 3.5470814624759463e-06, "loss": 0.8415, "step": 26755 }, { "epoch": 0.32613067163906256, "grad_norm": 2.098475933074951, "learning_rate": 3.5467607440667097e-06, "loss": 0.8422, "step": 26760 }, { "epoch": 0.3261916078632104, "grad_norm": 1.8110779523849487, "learning_rate": 3.546440025657473e-06, "loss": 0.8731, "step": 26765 }, { "epoch": 0.32625254408735815, "grad_norm": 2.269399404525757, "learning_rate": 3.546119307248236e-06, "loss": 0.8502, "step": 26770 }, { "epoch": 0.326313480311506, "grad_norm": 1.951429009437561, "learning_rate": 3.5457985888389996e-06, "loss": 0.8928, "step": 26775 }, { "epoch": 0.3263744165356538, "grad_norm": 1.8424396514892578, "learning_rate": 3.545477870429763e-06, "loss": 0.8863, "step": 26780 }, { "epoch": 0.32643535275980157, "grad_norm": 1.9779690504074097, "learning_rate": 3.5451571520205265e-06, "loss": 0.8571, "step": 26785 }, { "epoch": 0.3264962889839494, "grad_norm": 2.2346200942993164, "learning_rate": 3.5448364336112895e-06, "loss": 0.826, "step": 26790 }, { "epoch": 0.3265572252080972, "grad_norm": 2.2180938720703125, "learning_rate": 3.5445157152020526e-06, "loss": 0.8372, "step": 26795 }, { "epoch": 0.32661816143224504, "grad_norm": 2.1506919860839844, "learning_rate": 3.5441949967928164e-06, "loss": 0.8347, "step": 26800 }, { "epoch": 0.3266790976563928, "grad_norm": 1.879767894744873, "learning_rate": 3.5438742783835794e-06, "loss": 0.8276, "step": 26805 }, { "epoch": 0.3267400338805406, "grad_norm": 1.9998977184295654, "learning_rate": 3.5435535599743425e-06, "loss": 0.9409, "step": 26810 }, { "epoch": 0.32680097010468845, "grad_norm": 2.051119804382324, "learning_rate": 3.5432328415651063e-06, "loss": 0.8433, "step": 26815 }, { "epoch": 0.3268619063288362, "grad_norm": 1.7943074703216553, "learning_rate": 3.5429121231558693e-06, "loss": 0.8473, "step": 26820 }, { "epoch": 0.32692284255298404, "grad_norm": 2.2701199054718018, "learning_rate": 3.5425914047466324e-06, "loss": 0.8952, "step": 26825 }, { "epoch": 0.32698377877713186, "grad_norm": 2.1146326065063477, "learning_rate": 3.5422706863373962e-06, "loss": 0.911, "step": 26830 }, { "epoch": 0.3270447150012797, "grad_norm": 2.0032269954681396, "learning_rate": 3.5419499679281592e-06, "loss": 0.9108, "step": 26835 }, { "epoch": 0.32710565122542745, "grad_norm": 1.7622851133346558, "learning_rate": 3.5416292495189227e-06, "loss": 0.803, "step": 26840 }, { "epoch": 0.3271665874495753, "grad_norm": 2.170703887939453, "learning_rate": 3.541308531109686e-06, "loss": 0.9645, "step": 26845 }, { "epoch": 0.3272275236737231, "grad_norm": 2.144476890563965, "learning_rate": 3.540987812700449e-06, "loss": 0.8672, "step": 26850 }, { "epoch": 0.32728845989787086, "grad_norm": 2.7774734497070312, "learning_rate": 3.5406670942912126e-06, "loss": 0.835, "step": 26855 }, { "epoch": 0.3273493961220187, "grad_norm": 1.7785183191299438, "learning_rate": 3.540346375881976e-06, "loss": 0.9488, "step": 26860 }, { "epoch": 0.3274103323461665, "grad_norm": 2.1198668479919434, "learning_rate": 3.5400256574727395e-06, "loss": 0.8402, "step": 26865 }, { "epoch": 0.32747126857031433, "grad_norm": 1.8696568012237549, "learning_rate": 3.5397049390635025e-06, "loss": 0.8776, "step": 26870 }, { "epoch": 0.3275322047944621, "grad_norm": 1.9361505508422852, "learning_rate": 3.5393842206542655e-06, "loss": 0.8351, "step": 26875 }, { "epoch": 0.3275931410186099, "grad_norm": 1.935409665107727, "learning_rate": 3.5390635022450294e-06, "loss": 0.8218, "step": 26880 }, { "epoch": 0.32765407724275775, "grad_norm": 1.9557561874389648, "learning_rate": 3.5387427838357924e-06, "loss": 0.8474, "step": 26885 }, { "epoch": 0.3277150134669055, "grad_norm": 1.8836818933486938, "learning_rate": 3.5384220654265554e-06, "loss": 0.8703, "step": 26890 }, { "epoch": 0.32777594969105334, "grad_norm": 1.8521872758865356, "learning_rate": 3.5381013470173192e-06, "loss": 0.8571, "step": 26895 }, { "epoch": 0.32783688591520116, "grad_norm": 2.3021886348724365, "learning_rate": 3.5377806286080823e-06, "loss": 0.9652, "step": 26900 }, { "epoch": 0.327897822139349, "grad_norm": 1.7942787408828735, "learning_rate": 3.5374599101988453e-06, "loss": 0.868, "step": 26905 }, { "epoch": 0.32795875836349675, "grad_norm": 1.921983003616333, "learning_rate": 3.537139191789609e-06, "loss": 0.8537, "step": 26910 }, { "epoch": 0.3280196945876446, "grad_norm": 2.4027867317199707, "learning_rate": 3.536818473380372e-06, "loss": 0.8522, "step": 26915 }, { "epoch": 0.3280806308117924, "grad_norm": 2.2492988109588623, "learning_rate": 3.5364977549711356e-06, "loss": 0.886, "step": 26920 }, { "epoch": 0.32814156703594016, "grad_norm": 1.7997218370437622, "learning_rate": 3.536177036561899e-06, "loss": 0.8941, "step": 26925 }, { "epoch": 0.328202503260088, "grad_norm": 1.9076077938079834, "learning_rate": 3.5358563181526625e-06, "loss": 0.9215, "step": 26930 }, { "epoch": 0.3282634394842358, "grad_norm": 1.821587085723877, "learning_rate": 3.5355355997434255e-06, "loss": 0.879, "step": 26935 }, { "epoch": 0.32832437570838363, "grad_norm": 1.999219536781311, "learning_rate": 3.535214881334189e-06, "loss": 0.8677, "step": 26940 }, { "epoch": 0.3283853119325314, "grad_norm": 1.9492243528366089, "learning_rate": 3.5348941629249524e-06, "loss": 0.8839, "step": 26945 }, { "epoch": 0.3284462481566792, "grad_norm": 1.5543490648269653, "learning_rate": 3.5345734445157154e-06, "loss": 0.8957, "step": 26950 }, { "epoch": 0.32850718438082704, "grad_norm": 1.9414321184158325, "learning_rate": 3.5342527261064793e-06, "loss": 0.8709, "step": 26955 }, { "epoch": 0.3285681206049748, "grad_norm": 1.8141146898269653, "learning_rate": 3.5339320076972423e-06, "loss": 0.8431, "step": 26960 }, { "epoch": 0.32862905682912263, "grad_norm": 2.062420129776001, "learning_rate": 3.5336112892880053e-06, "loss": 0.8167, "step": 26965 }, { "epoch": 0.32868999305327046, "grad_norm": 1.8531889915466309, "learning_rate": 3.5332905708787683e-06, "loss": 0.8927, "step": 26970 }, { "epoch": 0.3287509292774183, "grad_norm": 1.6169307231903076, "learning_rate": 3.532969852469532e-06, "loss": 0.8202, "step": 26975 }, { "epoch": 0.32881186550156605, "grad_norm": 1.8938733339309692, "learning_rate": 3.532649134060295e-06, "loss": 0.8301, "step": 26980 }, { "epoch": 0.32887280172571387, "grad_norm": 2.0251004695892334, "learning_rate": 3.5323284156510586e-06, "loss": 0.9066, "step": 26985 }, { "epoch": 0.3289337379498617, "grad_norm": 1.7596815824508667, "learning_rate": 3.532007697241822e-06, "loss": 0.8152, "step": 26990 }, { "epoch": 0.32899467417400946, "grad_norm": 1.9046151638031006, "learning_rate": 3.531686978832585e-06, "loss": 0.9024, "step": 26995 }, { "epoch": 0.3290556103981573, "grad_norm": 1.9789122343063354, "learning_rate": 3.5313662604233485e-06, "loss": 0.9205, "step": 27000 }, { "epoch": 0.3291165466223051, "grad_norm": 2.317866325378418, "learning_rate": 3.531045542014112e-06, "loss": 0.9194, "step": 27005 }, { "epoch": 0.32917748284645293, "grad_norm": 1.906071424484253, "learning_rate": 3.5307248236048754e-06, "loss": 0.8522, "step": 27010 }, { "epoch": 0.3292384190706007, "grad_norm": 1.8748128414154053, "learning_rate": 3.5304041051956384e-06, "loss": 0.8496, "step": 27015 }, { "epoch": 0.3292993552947485, "grad_norm": 1.7349004745483398, "learning_rate": 3.530083386786402e-06, "loss": 0.8303, "step": 27020 }, { "epoch": 0.32936029151889634, "grad_norm": 1.745421290397644, "learning_rate": 3.5297626683771653e-06, "loss": 0.8074, "step": 27025 }, { "epoch": 0.3294212277430441, "grad_norm": 1.9802557229995728, "learning_rate": 3.5294419499679283e-06, "loss": 0.8759, "step": 27030 }, { "epoch": 0.32948216396719193, "grad_norm": 1.5924023389816284, "learning_rate": 3.529121231558692e-06, "loss": 0.8931, "step": 27035 }, { "epoch": 0.32954310019133976, "grad_norm": 2.01608943939209, "learning_rate": 3.528800513149455e-06, "loss": 0.8817, "step": 27040 }, { "epoch": 0.3296040364154876, "grad_norm": 2.1315672397613525, "learning_rate": 3.5284797947402182e-06, "loss": 0.8481, "step": 27045 }, { "epoch": 0.32966497263963535, "grad_norm": 1.5789412260055542, "learning_rate": 3.5281590763309812e-06, "loss": 0.8619, "step": 27050 }, { "epoch": 0.32972590886378317, "grad_norm": 2.128236770629883, "learning_rate": 3.527838357921745e-06, "loss": 0.9413, "step": 27055 }, { "epoch": 0.329786845087931, "grad_norm": 2.07096004486084, "learning_rate": 3.527517639512508e-06, "loss": 0.8427, "step": 27060 }, { "epoch": 0.32984778131207876, "grad_norm": 1.7678712606430054, "learning_rate": 3.5271969211032716e-06, "loss": 0.8512, "step": 27065 }, { "epoch": 0.3299087175362266, "grad_norm": 1.9514646530151367, "learning_rate": 3.526876202694035e-06, "loss": 0.8338, "step": 27070 }, { "epoch": 0.3299696537603744, "grad_norm": 2.5648410320281982, "learning_rate": 3.526555484284798e-06, "loss": 0.8519, "step": 27075 }, { "epoch": 0.33003058998452217, "grad_norm": 1.9898793697357178, "learning_rate": 3.5262347658755615e-06, "loss": 0.8335, "step": 27080 }, { "epoch": 0.33009152620867, "grad_norm": 2.0770249366760254, "learning_rate": 3.525914047466325e-06, "loss": 0.8836, "step": 27085 }, { "epoch": 0.3301524624328178, "grad_norm": 1.9118469953536987, "learning_rate": 3.5255933290570883e-06, "loss": 0.8647, "step": 27090 }, { "epoch": 0.33021339865696564, "grad_norm": 1.8405665159225464, "learning_rate": 3.5252726106478514e-06, "loss": 0.8557, "step": 27095 }, { "epoch": 0.3302743348811134, "grad_norm": 1.9079428911209106, "learning_rate": 3.524951892238615e-06, "loss": 0.832, "step": 27100 }, { "epoch": 0.33033527110526123, "grad_norm": 1.7362630367279053, "learning_rate": 3.5246311738293782e-06, "loss": 0.8024, "step": 27105 }, { "epoch": 0.33039620732940905, "grad_norm": 2.062958002090454, "learning_rate": 3.5243104554201413e-06, "loss": 0.8958, "step": 27110 }, { "epoch": 0.3304571435535568, "grad_norm": 2.138211965560913, "learning_rate": 3.523989737010905e-06, "loss": 0.9018, "step": 27115 }, { "epoch": 0.33051807977770464, "grad_norm": 2.1645870208740234, "learning_rate": 3.523669018601668e-06, "loss": 0.8864, "step": 27120 }, { "epoch": 0.33057901600185247, "grad_norm": 1.8608014583587646, "learning_rate": 3.523348300192431e-06, "loss": 0.8486, "step": 27125 }, { "epoch": 0.3306399522260003, "grad_norm": 1.7911092042922974, "learning_rate": 3.523027581783194e-06, "loss": 0.8175, "step": 27130 }, { "epoch": 0.33070088845014806, "grad_norm": 2.1394433975219727, "learning_rate": 3.522706863373958e-06, "loss": 0.7902, "step": 27135 }, { "epoch": 0.3307618246742959, "grad_norm": 2.0254099369049072, "learning_rate": 3.522386144964721e-06, "loss": 0.8831, "step": 27140 }, { "epoch": 0.3308227608984437, "grad_norm": 2.0404608249664307, "learning_rate": 3.5220654265554845e-06, "loss": 0.8961, "step": 27145 }, { "epoch": 0.33088369712259147, "grad_norm": 1.939566731452942, "learning_rate": 3.521744708146248e-06, "loss": 0.8602, "step": 27150 }, { "epoch": 0.3309446333467393, "grad_norm": 1.8594787120819092, "learning_rate": 3.5214239897370114e-06, "loss": 0.83, "step": 27155 }, { "epoch": 0.3310055695708871, "grad_norm": 1.7926713228225708, "learning_rate": 3.5211032713277744e-06, "loss": 0.8348, "step": 27160 }, { "epoch": 0.33106650579503494, "grad_norm": 1.845604658126831, "learning_rate": 3.520782552918538e-06, "loss": 0.9233, "step": 27165 }, { "epoch": 0.3311274420191827, "grad_norm": 1.9347901344299316, "learning_rate": 3.5204618345093013e-06, "loss": 0.8694, "step": 27170 }, { "epoch": 0.33118837824333053, "grad_norm": 1.9250067472457886, "learning_rate": 3.5201411161000643e-06, "loss": 0.9057, "step": 27175 }, { "epoch": 0.33124931446747835, "grad_norm": 1.8491122722625732, "learning_rate": 3.519820397690828e-06, "loss": 0.741, "step": 27180 }, { "epoch": 0.3313102506916261, "grad_norm": 1.8505948781967163, "learning_rate": 3.519499679281591e-06, "loss": 0.8532, "step": 27185 }, { "epoch": 0.33137118691577394, "grad_norm": 1.9403244256973267, "learning_rate": 3.519178960872354e-06, "loss": 0.8754, "step": 27190 }, { "epoch": 0.33143212313992176, "grad_norm": 1.871371865272522, "learning_rate": 3.518858242463118e-06, "loss": 0.8708, "step": 27195 }, { "epoch": 0.3314930593640696, "grad_norm": 1.8376787900924683, "learning_rate": 3.518537524053881e-06, "loss": 0.8448, "step": 27200 }, { "epoch": 0.33155399558821735, "grad_norm": 1.9120460748672485, "learning_rate": 3.518216805644644e-06, "loss": 0.8216, "step": 27205 }, { "epoch": 0.3316149318123652, "grad_norm": 2.035248041152954, "learning_rate": 3.517896087235408e-06, "loss": 0.8444, "step": 27210 }, { "epoch": 0.331675868036513, "grad_norm": 1.9860774278640747, "learning_rate": 3.517575368826171e-06, "loss": 0.8447, "step": 27215 }, { "epoch": 0.33173680426066077, "grad_norm": 1.791244626045227, "learning_rate": 3.517254650416934e-06, "loss": 0.8777, "step": 27220 }, { "epoch": 0.3317977404848086, "grad_norm": 1.8291432857513428, "learning_rate": 3.5169339320076974e-06, "loss": 0.8564, "step": 27225 }, { "epoch": 0.3318586767089564, "grad_norm": 1.749367356300354, "learning_rate": 3.516613213598461e-06, "loss": 0.888, "step": 27230 }, { "epoch": 0.33191961293310424, "grad_norm": 1.8057197332382202, "learning_rate": 3.5162924951892243e-06, "loss": 0.9416, "step": 27235 }, { "epoch": 0.331980549157252, "grad_norm": 2.0630388259887695, "learning_rate": 3.5159717767799873e-06, "loss": 0.804, "step": 27240 }, { "epoch": 0.3320414853813998, "grad_norm": 1.7363128662109375, "learning_rate": 3.5156510583707508e-06, "loss": 0.8277, "step": 27245 }, { "epoch": 0.33210242160554765, "grad_norm": 1.6432937383651733, "learning_rate": 3.515330339961514e-06, "loss": 0.8552, "step": 27250 }, { "epoch": 0.3321633578296954, "grad_norm": 2.5239176750183105, "learning_rate": 3.515009621552277e-06, "loss": 0.8891, "step": 27255 }, { "epoch": 0.33222429405384324, "grad_norm": 1.8865997791290283, "learning_rate": 3.514688903143041e-06, "loss": 0.8635, "step": 27260 }, { "epoch": 0.33228523027799106, "grad_norm": 2.1789515018463135, "learning_rate": 3.514368184733804e-06, "loss": 0.8514, "step": 27265 }, { "epoch": 0.3323461665021389, "grad_norm": 2.1352481842041016, "learning_rate": 3.514047466324567e-06, "loss": 0.8843, "step": 27270 }, { "epoch": 0.33240710272628665, "grad_norm": 1.8497371673583984, "learning_rate": 3.513726747915331e-06, "loss": 0.8224, "step": 27275 }, { "epoch": 0.3324680389504345, "grad_norm": 2.1110177040100098, "learning_rate": 3.513406029506094e-06, "loss": 0.8825, "step": 27280 }, { "epoch": 0.3325289751745823, "grad_norm": 2.1728880405426025, "learning_rate": 3.513085311096857e-06, "loss": 0.9079, "step": 27285 }, { "epoch": 0.33258991139873006, "grad_norm": 2.049078941345215, "learning_rate": 3.512764592687621e-06, "loss": 0.934, "step": 27290 }, { "epoch": 0.3326508476228779, "grad_norm": 2.075324058532715, "learning_rate": 3.512443874278384e-06, "loss": 0.8621, "step": 27295 }, { "epoch": 0.3327117838470257, "grad_norm": 1.8681014776229858, "learning_rate": 3.512123155869147e-06, "loss": 0.856, "step": 27300 }, { "epoch": 0.33277272007117353, "grad_norm": 1.8893957138061523, "learning_rate": 3.5118024374599103e-06, "loss": 0.8293, "step": 27305 }, { "epoch": 0.3328336562953213, "grad_norm": 2.0917177200317383, "learning_rate": 3.5114817190506738e-06, "loss": 0.8927, "step": 27310 }, { "epoch": 0.3328945925194691, "grad_norm": 1.7874324321746826, "learning_rate": 3.5111610006414372e-06, "loss": 0.8515, "step": 27315 }, { "epoch": 0.33295552874361695, "grad_norm": 2.0808334350585938, "learning_rate": 3.5108402822322002e-06, "loss": 0.8456, "step": 27320 }, { "epoch": 0.3330164649677647, "grad_norm": 1.7568236589431763, "learning_rate": 3.5105195638229637e-06, "loss": 0.8281, "step": 27325 }, { "epoch": 0.33307740119191254, "grad_norm": 1.9281781911849976, "learning_rate": 3.510198845413727e-06, "loss": 0.825, "step": 27330 }, { "epoch": 0.33313833741606036, "grad_norm": 1.8254551887512207, "learning_rate": 3.50987812700449e-06, "loss": 0.8091, "step": 27335 }, { "epoch": 0.3331992736402082, "grad_norm": 1.7732667922973633, "learning_rate": 3.509557408595254e-06, "loss": 0.8864, "step": 27340 }, { "epoch": 0.33326020986435595, "grad_norm": 1.8482006788253784, "learning_rate": 3.509236690186017e-06, "loss": 0.8112, "step": 27345 }, { "epoch": 0.3333211460885038, "grad_norm": 2.4524986743927, "learning_rate": 3.50891597177678e-06, "loss": 0.9086, "step": 27350 }, { "epoch": 0.3333820823126516, "grad_norm": 1.9911694526672363, "learning_rate": 3.508595253367544e-06, "loss": 0.8184, "step": 27355 }, { "epoch": 0.33344301853679936, "grad_norm": 1.958802580833435, "learning_rate": 3.508274534958307e-06, "loss": 0.8724, "step": 27360 }, { "epoch": 0.3335039547609472, "grad_norm": 1.9230396747589111, "learning_rate": 3.50795381654907e-06, "loss": 0.8948, "step": 27365 }, { "epoch": 0.333564890985095, "grad_norm": 1.7487159967422485, "learning_rate": 3.507633098139834e-06, "loss": 0.864, "step": 27370 }, { "epoch": 0.33362582720924283, "grad_norm": 1.73824942111969, "learning_rate": 3.507312379730597e-06, "loss": 0.811, "step": 27375 }, { "epoch": 0.3336867634333906, "grad_norm": 2.06124210357666, "learning_rate": 3.5069916613213603e-06, "loss": 0.8557, "step": 27380 }, { "epoch": 0.3337476996575384, "grad_norm": 1.9498569965362549, "learning_rate": 3.5066709429121233e-06, "loss": 0.8302, "step": 27385 }, { "epoch": 0.33380863588168624, "grad_norm": 2.6123459339141846, "learning_rate": 3.5063502245028867e-06, "loss": 0.8842, "step": 27390 }, { "epoch": 0.333869572105834, "grad_norm": 1.7353519201278687, "learning_rate": 3.50602950609365e-06, "loss": 0.8619, "step": 27395 }, { "epoch": 0.33393050832998183, "grad_norm": 2.0078704357147217, "learning_rate": 3.505708787684413e-06, "loss": 0.8384, "step": 27400 }, { "epoch": 0.33399144455412966, "grad_norm": 1.8266186714172363, "learning_rate": 3.505388069275177e-06, "loss": 0.8668, "step": 27405 }, { "epoch": 0.3340523807782775, "grad_norm": 1.718125820159912, "learning_rate": 3.50506735086594e-06, "loss": 0.8411, "step": 27410 }, { "epoch": 0.33411331700242525, "grad_norm": 1.8766429424285889, "learning_rate": 3.504746632456703e-06, "loss": 0.8508, "step": 27415 }, { "epoch": 0.33417425322657307, "grad_norm": 2.2987775802612305, "learning_rate": 3.504425914047467e-06, "loss": 0.8741, "step": 27420 }, { "epoch": 0.3342351894507209, "grad_norm": 2.5885825157165527, "learning_rate": 3.50410519563823e-06, "loss": 0.8571, "step": 27425 }, { "epoch": 0.33429612567486866, "grad_norm": 1.98336660861969, "learning_rate": 3.503784477228993e-06, "loss": 0.7986, "step": 27430 }, { "epoch": 0.3343570618990165, "grad_norm": 1.7692909240722656, "learning_rate": 3.503463758819757e-06, "loss": 0.9113, "step": 27435 }, { "epoch": 0.3344179981231643, "grad_norm": 1.8622727394104004, "learning_rate": 3.50314304041052e-06, "loss": 0.9136, "step": 27440 }, { "epoch": 0.33447893434731213, "grad_norm": 1.7386187314987183, "learning_rate": 3.502822322001283e-06, "loss": 0.7969, "step": 27445 }, { "epoch": 0.3345398705714599, "grad_norm": 1.890158772468567, "learning_rate": 3.5025016035920467e-06, "loss": 0.7883, "step": 27450 }, { "epoch": 0.3346008067956077, "grad_norm": 1.8683397769927979, "learning_rate": 3.5021808851828097e-06, "loss": 0.758, "step": 27455 }, { "epoch": 0.33466174301975554, "grad_norm": 1.6346278190612793, "learning_rate": 3.501860166773573e-06, "loss": 0.7654, "step": 27460 }, { "epoch": 0.3347226792439033, "grad_norm": 2.0575380325317383, "learning_rate": 3.501539448364336e-06, "loss": 0.8856, "step": 27465 }, { "epoch": 0.33478361546805113, "grad_norm": 1.640594720840454, "learning_rate": 3.5012187299550996e-06, "loss": 0.8949, "step": 27470 }, { "epoch": 0.33484455169219896, "grad_norm": 1.7743096351623535, "learning_rate": 3.500898011545863e-06, "loss": 0.8067, "step": 27475 }, { "epoch": 0.3349054879163468, "grad_norm": 1.8438220024108887, "learning_rate": 3.500577293136626e-06, "loss": 0.8253, "step": 27480 }, { "epoch": 0.33496642414049455, "grad_norm": 1.6245900392532349, "learning_rate": 3.50025657472739e-06, "loss": 0.86, "step": 27485 }, { "epoch": 0.33502736036464237, "grad_norm": 1.8690303564071655, "learning_rate": 3.499935856318153e-06, "loss": 0.8762, "step": 27490 }, { "epoch": 0.3350882965887902, "grad_norm": 2.182126045227051, "learning_rate": 3.499615137908916e-06, "loss": 0.9168, "step": 27495 }, { "epoch": 0.33514923281293796, "grad_norm": 1.8525563478469849, "learning_rate": 3.49929441949968e-06, "loss": 0.8704, "step": 27500 }, { "epoch": 0.3352101690370858, "grad_norm": 2.053943157196045, "learning_rate": 3.498973701090443e-06, "loss": 0.8277, "step": 27505 }, { "epoch": 0.3352711052612336, "grad_norm": 1.8519175052642822, "learning_rate": 3.498652982681206e-06, "loss": 0.9385, "step": 27510 }, { "epoch": 0.3353320414853814, "grad_norm": 1.6762678623199463, "learning_rate": 3.4983322642719698e-06, "loss": 0.8986, "step": 27515 }, { "epoch": 0.3353929777095292, "grad_norm": 2.13097882270813, "learning_rate": 3.4980115458627328e-06, "loss": 0.8692, "step": 27520 }, { "epoch": 0.335453913933677, "grad_norm": 1.8729579448699951, "learning_rate": 3.4976908274534958e-06, "loss": 0.8813, "step": 27525 }, { "epoch": 0.33551485015782484, "grad_norm": 2.1062159538269043, "learning_rate": 3.4973701090442596e-06, "loss": 0.9081, "step": 27530 }, { "epoch": 0.3355757863819726, "grad_norm": 1.9692355394363403, "learning_rate": 3.4970493906350227e-06, "loss": 0.8753, "step": 27535 }, { "epoch": 0.33563672260612043, "grad_norm": 1.7638840675354004, "learning_rate": 3.496728672225786e-06, "loss": 0.8105, "step": 27540 }, { "epoch": 0.33569765883026825, "grad_norm": 2.3576905727386475, "learning_rate": 3.4964079538165495e-06, "loss": 0.8481, "step": 27545 }, { "epoch": 0.3357585950544161, "grad_norm": 1.8173372745513916, "learning_rate": 3.4960872354073126e-06, "loss": 0.8931, "step": 27550 }, { "epoch": 0.33581953127856384, "grad_norm": 1.919792890548706, "learning_rate": 3.495766516998076e-06, "loss": 0.85, "step": 27555 }, { "epoch": 0.33588046750271167, "grad_norm": 2.0448882579803467, "learning_rate": 3.495445798588839e-06, "loss": 0.8981, "step": 27560 }, { "epoch": 0.3359414037268595, "grad_norm": 2.2834692001342773, "learning_rate": 3.495125080179603e-06, "loss": 0.844, "step": 27565 }, { "epoch": 0.33600233995100726, "grad_norm": 1.8072764873504639, "learning_rate": 3.494804361770366e-06, "loss": 0.75, "step": 27570 }, { "epoch": 0.3360632761751551, "grad_norm": 1.9219809770584106, "learning_rate": 3.494483643361129e-06, "loss": 0.8533, "step": 27575 }, { "epoch": 0.3361242123993029, "grad_norm": 1.902475118637085, "learning_rate": 3.4941629249518928e-06, "loss": 0.8514, "step": 27580 }, { "epoch": 0.33618514862345067, "grad_norm": 1.7703588008880615, "learning_rate": 3.493842206542656e-06, "loss": 0.8235, "step": 27585 }, { "epoch": 0.3362460848475985, "grad_norm": 2.1637511253356934, "learning_rate": 3.493521488133419e-06, "loss": 0.8581, "step": 27590 }, { "epoch": 0.3363070210717463, "grad_norm": 1.8864145278930664, "learning_rate": 3.4932007697241827e-06, "loss": 0.8107, "step": 27595 }, { "epoch": 0.33636795729589414, "grad_norm": 1.749463677406311, "learning_rate": 3.4928800513149457e-06, "loss": 0.8504, "step": 27600 }, { "epoch": 0.3364288935200419, "grad_norm": 1.7351442575454712, "learning_rate": 3.4925593329057087e-06, "loss": 0.823, "step": 27605 }, { "epoch": 0.33648982974418973, "grad_norm": 2.3304941654205322, "learning_rate": 3.4922386144964726e-06, "loss": 0.8899, "step": 27610 }, { "epoch": 0.33655076596833755, "grad_norm": 2.232814311981201, "learning_rate": 3.4919178960872356e-06, "loss": 0.8466, "step": 27615 }, { "epoch": 0.3366117021924853, "grad_norm": 1.8356949090957642, "learning_rate": 3.491597177677999e-06, "loss": 0.7705, "step": 27620 }, { "epoch": 0.33667263841663314, "grad_norm": 1.9125902652740479, "learning_rate": 3.4912764592687625e-06, "loss": 0.8359, "step": 27625 }, { "epoch": 0.33673357464078096, "grad_norm": 1.9444787502288818, "learning_rate": 3.490955740859526e-06, "loss": 0.8607, "step": 27630 }, { "epoch": 0.3367945108649288, "grad_norm": 2.17677903175354, "learning_rate": 3.490635022450289e-06, "loss": 0.8331, "step": 27635 }, { "epoch": 0.33685544708907655, "grad_norm": 2.0505168437957764, "learning_rate": 3.490314304041052e-06, "loss": 0.9125, "step": 27640 }, { "epoch": 0.3369163833132244, "grad_norm": 1.9425604343414307, "learning_rate": 3.489993585631816e-06, "loss": 0.8598, "step": 27645 }, { "epoch": 0.3369773195373722, "grad_norm": 1.681191325187683, "learning_rate": 3.489672867222579e-06, "loss": 0.8318, "step": 27650 }, { "epoch": 0.33703825576151997, "grad_norm": 1.5246152877807617, "learning_rate": 3.489352148813342e-06, "loss": 0.8981, "step": 27655 }, { "epoch": 0.3370991919856678, "grad_norm": 1.8350918292999268, "learning_rate": 3.4890314304041057e-06, "loss": 0.8606, "step": 27660 }, { "epoch": 0.3371601282098156, "grad_norm": 2.077669143676758, "learning_rate": 3.4887107119948687e-06, "loss": 0.9099, "step": 27665 }, { "epoch": 0.33722106443396344, "grad_norm": 2.0569849014282227, "learning_rate": 3.4883899935856317e-06, "loss": 0.893, "step": 27670 }, { "epoch": 0.3372820006581112, "grad_norm": 1.906933307647705, "learning_rate": 3.4880692751763956e-06, "loss": 0.87, "step": 27675 }, { "epoch": 0.337342936882259, "grad_norm": 1.8868504762649536, "learning_rate": 3.4877485567671586e-06, "loss": 0.8048, "step": 27680 }, { "epoch": 0.33740387310640685, "grad_norm": 1.7872213125228882, "learning_rate": 3.487427838357922e-06, "loss": 0.8229, "step": 27685 }, { "epoch": 0.3374648093305546, "grad_norm": 2.5112860202789307, "learning_rate": 3.4871071199486855e-06, "loss": 0.9264, "step": 27690 }, { "epoch": 0.33752574555470244, "grad_norm": 1.885498046875, "learning_rate": 3.4867864015394485e-06, "loss": 0.801, "step": 27695 }, { "epoch": 0.33758668177885026, "grad_norm": 1.9558265209197998, "learning_rate": 3.486465683130212e-06, "loss": 0.8931, "step": 27700 }, { "epoch": 0.3376476180029981, "grad_norm": 1.9215253591537476, "learning_rate": 3.4861449647209754e-06, "loss": 0.8192, "step": 27705 }, { "epoch": 0.33770855422714585, "grad_norm": 1.8784281015396118, "learning_rate": 3.485824246311739e-06, "loss": 0.8537, "step": 27710 }, { "epoch": 0.3377694904512937, "grad_norm": 2.1498775482177734, "learning_rate": 3.485503527902502e-06, "loss": 0.924, "step": 27715 }, { "epoch": 0.3378304266754415, "grad_norm": 2.1554806232452393, "learning_rate": 3.485182809493265e-06, "loss": 0.876, "step": 27720 }, { "epoch": 0.33789136289958926, "grad_norm": 1.6863043308258057, "learning_rate": 3.4848620910840287e-06, "loss": 0.8525, "step": 27725 }, { "epoch": 0.3379522991237371, "grad_norm": 2.17702579498291, "learning_rate": 3.4845413726747918e-06, "loss": 0.8774, "step": 27730 }, { "epoch": 0.3380132353478849, "grad_norm": 2.4346065521240234, "learning_rate": 3.4842206542655548e-06, "loss": 0.83, "step": 27735 }, { "epoch": 0.33807417157203273, "grad_norm": 1.955039381980896, "learning_rate": 3.4838999358563186e-06, "loss": 0.8912, "step": 27740 }, { "epoch": 0.3381351077961805, "grad_norm": 1.819747805595398, "learning_rate": 3.4835792174470817e-06, "loss": 0.795, "step": 27745 }, { "epoch": 0.3381960440203283, "grad_norm": 1.8274105787277222, "learning_rate": 3.4832584990378447e-06, "loss": 0.8765, "step": 27750 }, { "epoch": 0.33825698024447615, "grad_norm": 1.6346241235733032, "learning_rate": 3.4829377806286085e-06, "loss": 0.8566, "step": 27755 }, { "epoch": 0.3383179164686239, "grad_norm": 1.829552412033081, "learning_rate": 3.4826170622193716e-06, "loss": 0.8939, "step": 27760 }, { "epoch": 0.33837885269277174, "grad_norm": 1.6899601221084595, "learning_rate": 3.482296343810135e-06, "loss": 0.7583, "step": 27765 }, { "epoch": 0.33843978891691956, "grad_norm": 1.968132495880127, "learning_rate": 3.4819756254008984e-06, "loss": 0.8518, "step": 27770 }, { "epoch": 0.3385007251410674, "grad_norm": 2.411681890487671, "learning_rate": 3.4816549069916614e-06, "loss": 0.8806, "step": 27775 }, { "epoch": 0.33856166136521515, "grad_norm": 1.59242844581604, "learning_rate": 3.481334188582425e-06, "loss": 0.8656, "step": 27780 }, { "epoch": 0.338622597589363, "grad_norm": 1.8824292421340942, "learning_rate": 3.4810134701731883e-06, "loss": 0.883, "step": 27785 }, { "epoch": 0.3386835338135108, "grad_norm": 1.7657510042190552, "learning_rate": 3.4806927517639518e-06, "loss": 0.8352, "step": 27790 }, { "epoch": 0.33874447003765856, "grad_norm": 2.070786714553833, "learning_rate": 3.4803720333547148e-06, "loss": 0.8298, "step": 27795 }, { "epoch": 0.3388054062618064, "grad_norm": 1.933190107345581, "learning_rate": 3.4800513149454782e-06, "loss": 0.8998, "step": 27800 }, { "epoch": 0.3388663424859542, "grad_norm": 1.996414303779602, "learning_rate": 3.4797305965362417e-06, "loss": 0.8147, "step": 27805 }, { "epoch": 0.33892727871010203, "grad_norm": 1.8497483730316162, "learning_rate": 3.4794098781270047e-06, "loss": 0.9165, "step": 27810 }, { "epoch": 0.3389882149342498, "grad_norm": 1.9489418268203735, "learning_rate": 3.4790891597177677e-06, "loss": 0.8895, "step": 27815 }, { "epoch": 0.3390491511583976, "grad_norm": 1.8401308059692383, "learning_rate": 3.4787684413085316e-06, "loss": 0.8297, "step": 27820 }, { "epoch": 0.33911008738254544, "grad_norm": 1.9882620573043823, "learning_rate": 3.4784477228992946e-06, "loss": 0.8034, "step": 27825 }, { "epoch": 0.3391710236066932, "grad_norm": 1.8827075958251953, "learning_rate": 3.4781270044900576e-06, "loss": 0.9359, "step": 27830 }, { "epoch": 0.33923195983084103, "grad_norm": 1.8671008348464966, "learning_rate": 3.4778062860808215e-06, "loss": 0.8897, "step": 27835 }, { "epoch": 0.33929289605498886, "grad_norm": 1.8002195358276367, "learning_rate": 3.4774855676715845e-06, "loss": 0.8881, "step": 27840 }, { "epoch": 0.3393538322791367, "grad_norm": 1.8565081357955933, "learning_rate": 3.477164849262348e-06, "loss": 0.811, "step": 27845 }, { "epoch": 0.33941476850328445, "grad_norm": 2.086798667907715, "learning_rate": 3.4768441308531114e-06, "loss": 0.9334, "step": 27850 }, { "epoch": 0.33947570472743227, "grad_norm": 2.289591073989868, "learning_rate": 3.476523412443875e-06, "loss": 0.8818, "step": 27855 }, { "epoch": 0.3395366409515801, "grad_norm": 2.561960220336914, "learning_rate": 3.476202694034638e-06, "loss": 0.8516, "step": 27860 }, { "epoch": 0.33959757717572786, "grad_norm": 1.9011400938034058, "learning_rate": 3.4758819756254013e-06, "loss": 0.8262, "step": 27865 }, { "epoch": 0.3396585133998757, "grad_norm": 1.6809470653533936, "learning_rate": 3.4755612572161647e-06, "loss": 0.8718, "step": 27870 }, { "epoch": 0.3397194496240235, "grad_norm": 1.8154295682907104, "learning_rate": 3.4752405388069277e-06, "loss": 0.9121, "step": 27875 }, { "epoch": 0.33978038584817133, "grad_norm": 1.86089289188385, "learning_rate": 3.4749198203976916e-06, "loss": 0.8591, "step": 27880 }, { "epoch": 0.3398413220723191, "grad_norm": 2.051661252975464, "learning_rate": 3.4745991019884546e-06, "loss": 0.8608, "step": 27885 }, { "epoch": 0.3399022582964669, "grad_norm": 1.7746939659118652, "learning_rate": 3.4742783835792176e-06, "loss": 0.8609, "step": 27890 }, { "epoch": 0.33996319452061474, "grad_norm": 2.1003503799438477, "learning_rate": 3.4739576651699806e-06, "loss": 0.8857, "step": 27895 }, { "epoch": 0.3400241307447625, "grad_norm": 1.982866883277893, "learning_rate": 3.4736369467607445e-06, "loss": 0.8534, "step": 27900 }, { "epoch": 0.34008506696891033, "grad_norm": 1.915439486503601, "learning_rate": 3.4733162283515075e-06, "loss": 0.8921, "step": 27905 }, { "epoch": 0.34014600319305816, "grad_norm": 1.7617428302764893, "learning_rate": 3.472995509942271e-06, "loss": 0.9471, "step": 27910 }, { "epoch": 0.340206939417206, "grad_norm": 1.9371707439422607, "learning_rate": 3.4726747915330344e-06, "loss": 0.8655, "step": 27915 }, { "epoch": 0.34026787564135375, "grad_norm": 2.134434461593628, "learning_rate": 3.4723540731237974e-06, "loss": 0.8368, "step": 27920 }, { "epoch": 0.34032881186550157, "grad_norm": 1.847859501838684, "learning_rate": 3.472033354714561e-06, "loss": 0.8272, "step": 27925 }, { "epoch": 0.3403897480896494, "grad_norm": 2.0371220111846924, "learning_rate": 3.4717126363053243e-06, "loss": 0.8223, "step": 27930 }, { "epoch": 0.34045068431379716, "grad_norm": 1.933695912361145, "learning_rate": 3.4713919178960877e-06, "loss": 0.8143, "step": 27935 }, { "epoch": 0.340511620537945, "grad_norm": 1.893368124961853, "learning_rate": 3.4710711994868507e-06, "loss": 0.8315, "step": 27940 }, { "epoch": 0.3405725567620928, "grad_norm": 1.6054414510726929, "learning_rate": 3.470750481077614e-06, "loss": 0.8431, "step": 27945 }, { "epoch": 0.3406334929862406, "grad_norm": 1.9948608875274658, "learning_rate": 3.4704297626683776e-06, "loss": 0.8925, "step": 27950 }, { "epoch": 0.3406944292103884, "grad_norm": 2.22275972366333, "learning_rate": 3.4701090442591406e-06, "loss": 0.9032, "step": 27955 }, { "epoch": 0.3407553654345362, "grad_norm": 2.140648126602173, "learning_rate": 3.4697883258499045e-06, "loss": 0.8871, "step": 27960 }, { "epoch": 0.34081630165868404, "grad_norm": 1.8464176654815674, "learning_rate": 3.4694676074406675e-06, "loss": 0.8475, "step": 27965 }, { "epoch": 0.3408772378828318, "grad_norm": 2.010923385620117, "learning_rate": 3.4691468890314305e-06, "loss": 0.8563, "step": 27970 }, { "epoch": 0.34093817410697963, "grad_norm": 2.0081558227539062, "learning_rate": 3.4688261706221936e-06, "loss": 0.8109, "step": 27975 }, { "epoch": 0.34099911033112745, "grad_norm": 1.9756580591201782, "learning_rate": 3.4685054522129574e-06, "loss": 0.7925, "step": 27980 }, { "epoch": 0.3410600465552753, "grad_norm": 1.9928265810012817, "learning_rate": 3.4681847338037204e-06, "loss": 0.9012, "step": 27985 }, { "epoch": 0.34112098277942304, "grad_norm": 1.7170480489730835, "learning_rate": 3.467864015394484e-06, "loss": 0.8024, "step": 27990 }, { "epoch": 0.34118191900357087, "grad_norm": 1.8613587617874146, "learning_rate": 3.4675432969852473e-06, "loss": 0.8951, "step": 27995 }, { "epoch": 0.3412428552277187, "grad_norm": 1.6715017557144165, "learning_rate": 3.4672225785760103e-06, "loss": 0.8974, "step": 28000 }, { "epoch": 0.34130379145186646, "grad_norm": 1.8819586038589478, "learning_rate": 3.4669018601667738e-06, "loss": 0.8612, "step": 28005 }, { "epoch": 0.3413647276760143, "grad_norm": 2.039431571960449, "learning_rate": 3.4665811417575372e-06, "loss": 0.8165, "step": 28010 }, { "epoch": 0.3414256639001621, "grad_norm": 1.863294005393982, "learning_rate": 3.4662604233483007e-06, "loss": 0.8197, "step": 28015 }, { "epoch": 0.3414866001243099, "grad_norm": 2.1440322399139404, "learning_rate": 3.4659397049390637e-06, "loss": 0.8705, "step": 28020 }, { "epoch": 0.3415475363484577, "grad_norm": 1.8905956745147705, "learning_rate": 3.465618986529827e-06, "loss": 0.8388, "step": 28025 }, { "epoch": 0.3416084725726055, "grad_norm": 1.9247819185256958, "learning_rate": 3.4652982681205905e-06, "loss": 0.864, "step": 28030 }, { "epoch": 0.34166940879675334, "grad_norm": 2.36547589302063, "learning_rate": 3.4649775497113536e-06, "loss": 0.8363, "step": 28035 }, { "epoch": 0.3417303450209011, "grad_norm": 1.865054965019226, "learning_rate": 3.4646568313021174e-06, "loss": 0.7588, "step": 28040 }, { "epoch": 0.34179128124504893, "grad_norm": 1.93808913230896, "learning_rate": 3.4643361128928804e-06, "loss": 0.8469, "step": 28045 }, { "epoch": 0.34185221746919675, "grad_norm": 1.8452551364898682, "learning_rate": 3.4640153944836435e-06, "loss": 0.8634, "step": 28050 }, { "epoch": 0.3419131536933445, "grad_norm": 1.834235429763794, "learning_rate": 3.4636946760744065e-06, "loss": 0.9003, "step": 28055 }, { "epoch": 0.34197408991749234, "grad_norm": 1.707696557044983, "learning_rate": 3.4633739576651703e-06, "loss": 0.8797, "step": 28060 }, { "epoch": 0.34203502614164016, "grad_norm": 2.2557685375213623, "learning_rate": 3.4630532392559334e-06, "loss": 0.8874, "step": 28065 }, { "epoch": 0.342095962365788, "grad_norm": 1.9841184616088867, "learning_rate": 3.462732520846697e-06, "loss": 0.8481, "step": 28070 }, { "epoch": 0.34215689858993575, "grad_norm": 1.7072819471359253, "learning_rate": 3.4624118024374602e-06, "loss": 0.9324, "step": 28075 }, { "epoch": 0.3422178348140836, "grad_norm": 1.860629916191101, "learning_rate": 3.4620910840282233e-06, "loss": 0.9167, "step": 28080 }, { "epoch": 0.3422787710382314, "grad_norm": 1.7646387815475464, "learning_rate": 3.4617703656189867e-06, "loss": 0.8817, "step": 28085 }, { "epoch": 0.34233970726237917, "grad_norm": 2.0270063877105713, "learning_rate": 3.46144964720975e-06, "loss": 0.8256, "step": 28090 }, { "epoch": 0.342400643486527, "grad_norm": 1.9420336484909058, "learning_rate": 3.4611289288005136e-06, "loss": 0.8262, "step": 28095 }, { "epoch": 0.3424615797106748, "grad_norm": 1.9069268703460693, "learning_rate": 3.4608082103912766e-06, "loss": 0.864, "step": 28100 }, { "epoch": 0.34252251593482264, "grad_norm": 1.9816467761993408, "learning_rate": 3.4604874919820405e-06, "loss": 0.8442, "step": 28105 }, { "epoch": 0.3425834521589704, "grad_norm": 1.9914859533309937, "learning_rate": 3.4601667735728035e-06, "loss": 0.8952, "step": 28110 }, { "epoch": 0.3426443883831182, "grad_norm": 2.4888384342193604, "learning_rate": 3.4598460551635665e-06, "loss": 0.8657, "step": 28115 }, { "epoch": 0.34270532460726605, "grad_norm": 1.914803385734558, "learning_rate": 3.4595253367543304e-06, "loss": 0.8721, "step": 28120 }, { "epoch": 0.3427662608314138, "grad_norm": 2.1768009662628174, "learning_rate": 3.4592046183450934e-06, "loss": 0.8794, "step": 28125 }, { "epoch": 0.34282719705556164, "grad_norm": 1.9166173934936523, "learning_rate": 3.4588838999358564e-06, "loss": 0.8486, "step": 28130 }, { "epoch": 0.34288813327970946, "grad_norm": 2.0475285053253174, "learning_rate": 3.4585631815266203e-06, "loss": 0.8668, "step": 28135 }, { "epoch": 0.3429490695038573, "grad_norm": 2.086143970489502, "learning_rate": 3.4582424631173833e-06, "loss": 0.865, "step": 28140 }, { "epoch": 0.34301000572800505, "grad_norm": 2.759561538696289, "learning_rate": 3.4579217447081463e-06, "loss": 0.9949, "step": 28145 }, { "epoch": 0.3430709419521529, "grad_norm": 2.052948236465454, "learning_rate": 3.4576010262989097e-06, "loss": 0.8154, "step": 28150 }, { "epoch": 0.3431318781763007, "grad_norm": 2.0664939880371094, "learning_rate": 3.457280307889673e-06, "loss": 0.9174, "step": 28155 }, { "epoch": 0.34319281440044846, "grad_norm": 1.825098991394043, "learning_rate": 3.4569595894804366e-06, "loss": 0.8522, "step": 28160 }, { "epoch": 0.3432537506245963, "grad_norm": 1.8772711753845215, "learning_rate": 3.4566388710711996e-06, "loss": 0.8642, "step": 28165 }, { "epoch": 0.3433146868487441, "grad_norm": 1.90712571144104, "learning_rate": 3.456318152661963e-06, "loss": 0.8382, "step": 28170 }, { "epoch": 0.34337562307289193, "grad_norm": 2.0736284255981445, "learning_rate": 3.4559974342527265e-06, "loss": 0.8729, "step": 28175 }, { "epoch": 0.3434365592970397, "grad_norm": 1.9716036319732666, "learning_rate": 3.4556767158434895e-06, "loss": 0.9807, "step": 28180 }, { "epoch": 0.3434974955211875, "grad_norm": 1.791442632675171, "learning_rate": 3.4553559974342534e-06, "loss": 0.8983, "step": 28185 }, { "epoch": 0.34355843174533535, "grad_norm": 1.972931981086731, "learning_rate": 3.4550352790250164e-06, "loss": 0.8744, "step": 28190 }, { "epoch": 0.3436193679694831, "grad_norm": 1.7476849555969238, "learning_rate": 3.4547145606157794e-06, "loss": 0.8324, "step": 28195 }, { "epoch": 0.34368030419363094, "grad_norm": 1.970974326133728, "learning_rate": 3.4543938422065433e-06, "loss": 0.8989, "step": 28200 }, { "epoch": 0.34374124041777876, "grad_norm": 1.7313306331634521, "learning_rate": 3.4540731237973063e-06, "loss": 0.8142, "step": 28205 }, { "epoch": 0.3438021766419266, "grad_norm": 2.544528007507324, "learning_rate": 3.4537524053880693e-06, "loss": 0.8413, "step": 28210 }, { "epoch": 0.34386311286607435, "grad_norm": 2.0511105060577393, "learning_rate": 3.453431686978833e-06, "loss": 0.8437, "step": 28215 }, { "epoch": 0.3439240490902222, "grad_norm": 1.7843822240829468, "learning_rate": 3.453110968569596e-06, "loss": 0.8521, "step": 28220 }, { "epoch": 0.34398498531437, "grad_norm": 1.9788074493408203, "learning_rate": 3.4527902501603592e-06, "loss": 0.747, "step": 28225 }, { "epoch": 0.34404592153851776, "grad_norm": 2.4071247577667236, "learning_rate": 3.4524695317511227e-06, "loss": 0.8808, "step": 28230 }, { "epoch": 0.3441068577626656, "grad_norm": 1.922236442565918, "learning_rate": 3.452148813341886e-06, "loss": 0.8726, "step": 28235 }, { "epoch": 0.3441677939868134, "grad_norm": 1.870956301689148, "learning_rate": 3.4518280949326495e-06, "loss": 0.8167, "step": 28240 }, { "epoch": 0.34422873021096123, "grad_norm": 1.9452009201049805, "learning_rate": 3.4515073765234126e-06, "loss": 0.8459, "step": 28245 }, { "epoch": 0.344289666435109, "grad_norm": 1.924310564994812, "learning_rate": 3.451186658114176e-06, "loss": 0.8189, "step": 28250 }, { "epoch": 0.3443506026592568, "grad_norm": 2.781020164489746, "learning_rate": 3.4508659397049394e-06, "loss": 0.8233, "step": 28255 }, { "epoch": 0.34441153888340464, "grad_norm": 1.7348155975341797, "learning_rate": 3.4505452212957025e-06, "loss": 0.8524, "step": 28260 }, { "epoch": 0.3444724751075524, "grad_norm": 1.5246508121490479, "learning_rate": 3.4502245028864663e-06, "loss": 0.8547, "step": 28265 }, { "epoch": 0.34453341133170023, "grad_norm": 2.3018112182617188, "learning_rate": 3.4499037844772293e-06, "loss": 0.8744, "step": 28270 }, { "epoch": 0.34459434755584806, "grad_norm": 2.022620439529419, "learning_rate": 3.4495830660679923e-06, "loss": 0.8883, "step": 28275 }, { "epoch": 0.3446552837799959, "grad_norm": 1.9256757497787476, "learning_rate": 3.4492623476587562e-06, "loss": 0.8204, "step": 28280 }, { "epoch": 0.34471622000414365, "grad_norm": 1.9197280406951904, "learning_rate": 3.4489416292495192e-06, "loss": 0.8808, "step": 28285 }, { "epoch": 0.34477715622829147, "grad_norm": 2.165750741958618, "learning_rate": 3.4486209108402822e-06, "loss": 0.8846, "step": 28290 }, { "epoch": 0.3448380924524393, "grad_norm": 1.7171950340270996, "learning_rate": 3.448300192431046e-06, "loss": 0.9024, "step": 28295 }, { "epoch": 0.34489902867658706, "grad_norm": 1.980080485343933, "learning_rate": 3.447979474021809e-06, "loss": 0.868, "step": 28300 }, { "epoch": 0.3449599649007349, "grad_norm": 2.0677850246429443, "learning_rate": 3.447658755612572e-06, "loss": 0.805, "step": 28305 }, { "epoch": 0.3450209011248827, "grad_norm": 2.1217031478881836, "learning_rate": 3.4473380372033356e-06, "loss": 0.8735, "step": 28310 }, { "epoch": 0.34508183734903053, "grad_norm": 2.2448487281799316, "learning_rate": 3.447017318794099e-06, "loss": 0.8872, "step": 28315 }, { "epoch": 0.3451427735731783, "grad_norm": 1.8004697561264038, "learning_rate": 3.4466966003848625e-06, "loss": 0.9213, "step": 28320 }, { "epoch": 0.3452037097973261, "grad_norm": 2.3574397563934326, "learning_rate": 3.4463758819756255e-06, "loss": 0.887, "step": 28325 }, { "epoch": 0.34526464602147394, "grad_norm": 1.8105565309524536, "learning_rate": 3.4460551635663893e-06, "loss": 0.8975, "step": 28330 }, { "epoch": 0.3453255822456217, "grad_norm": 1.8754891157150269, "learning_rate": 3.4457344451571524e-06, "loss": 0.898, "step": 28335 }, { "epoch": 0.34538651846976953, "grad_norm": 2.30057692527771, "learning_rate": 3.4454137267479154e-06, "loss": 0.8776, "step": 28340 }, { "epoch": 0.34544745469391736, "grad_norm": 1.8677384853363037, "learning_rate": 3.4450930083386792e-06, "loss": 0.8852, "step": 28345 }, { "epoch": 0.3455083909180652, "grad_norm": 2.122546672821045, "learning_rate": 3.4447722899294423e-06, "loss": 0.7702, "step": 28350 }, { "epoch": 0.34556932714221295, "grad_norm": 2.079134702682495, "learning_rate": 3.4444515715202053e-06, "loss": 0.8478, "step": 28355 }, { "epoch": 0.34563026336636077, "grad_norm": 2.055953025817871, "learning_rate": 3.444130853110969e-06, "loss": 0.911, "step": 28360 }, { "epoch": 0.3456911995905086, "grad_norm": 1.8399478197097778, "learning_rate": 3.443810134701732e-06, "loss": 0.896, "step": 28365 }, { "epoch": 0.34575213581465636, "grad_norm": 1.957165002822876, "learning_rate": 3.443489416292495e-06, "loss": 0.8072, "step": 28370 }, { "epoch": 0.3458130720388042, "grad_norm": 2.0371809005737305, "learning_rate": 3.443168697883259e-06, "loss": 0.8993, "step": 28375 }, { "epoch": 0.345874008262952, "grad_norm": 1.8531550168991089, "learning_rate": 3.442847979474022e-06, "loss": 0.8296, "step": 28380 }, { "epoch": 0.3459349444870998, "grad_norm": 1.7316789627075195, "learning_rate": 3.4425272610647855e-06, "loss": 0.8659, "step": 28385 }, { "epoch": 0.3459958807112476, "grad_norm": 1.7205291986465454, "learning_rate": 3.4422065426555485e-06, "loss": 0.8773, "step": 28390 }, { "epoch": 0.3460568169353954, "grad_norm": 1.9182151556015015, "learning_rate": 3.441885824246312e-06, "loss": 0.8947, "step": 28395 }, { "epoch": 0.34611775315954324, "grad_norm": 1.925054669380188, "learning_rate": 3.4415651058370754e-06, "loss": 0.8386, "step": 28400 }, { "epoch": 0.346178689383691, "grad_norm": 1.7177352905273438, "learning_rate": 3.4412443874278384e-06, "loss": 0.8185, "step": 28405 }, { "epoch": 0.34623962560783883, "grad_norm": 2.1288535594940186, "learning_rate": 3.4409236690186023e-06, "loss": 0.8442, "step": 28410 }, { "epoch": 0.34630056183198665, "grad_norm": 1.7191087007522583, "learning_rate": 3.4406029506093653e-06, "loss": 0.8488, "step": 28415 }, { "epoch": 0.3463614980561345, "grad_norm": 2.0081558227539062, "learning_rate": 3.4402822322001283e-06, "loss": 0.8442, "step": 28420 }, { "epoch": 0.34642243428028224, "grad_norm": 1.8979556560516357, "learning_rate": 3.439961513790892e-06, "loss": 0.8613, "step": 28425 }, { "epoch": 0.34648337050443007, "grad_norm": 2.1893832683563232, "learning_rate": 3.439640795381655e-06, "loss": 0.774, "step": 28430 }, { "epoch": 0.3465443067285779, "grad_norm": 1.750285029411316, "learning_rate": 3.439320076972418e-06, "loss": 0.8371, "step": 28435 }, { "epoch": 0.34660524295272566, "grad_norm": 1.6487748622894287, "learning_rate": 3.438999358563182e-06, "loss": 0.9605, "step": 28440 }, { "epoch": 0.3466661791768735, "grad_norm": 1.9181139469146729, "learning_rate": 3.438678640153945e-06, "loss": 0.8765, "step": 28445 }, { "epoch": 0.3467271154010213, "grad_norm": 2.0907981395721436, "learning_rate": 3.438357921744708e-06, "loss": 0.8706, "step": 28450 }, { "epoch": 0.3467880516251691, "grad_norm": 2.156587600708008, "learning_rate": 3.438037203335472e-06, "loss": 0.885, "step": 28455 }, { "epoch": 0.3468489878493169, "grad_norm": 2.30926513671875, "learning_rate": 3.437716484926235e-06, "loss": 0.9067, "step": 28460 }, { "epoch": 0.3469099240734647, "grad_norm": 2.4436511993408203, "learning_rate": 3.4373957665169984e-06, "loss": 0.8524, "step": 28465 }, { "epoch": 0.34697086029761254, "grad_norm": 1.7610392570495605, "learning_rate": 3.437075048107762e-06, "loss": 0.8368, "step": 28470 }, { "epoch": 0.3470317965217603, "grad_norm": 1.9001741409301758, "learning_rate": 3.436754329698525e-06, "loss": 0.9126, "step": 28475 }, { "epoch": 0.34709273274590813, "grad_norm": 1.7554899454116821, "learning_rate": 3.4364336112892883e-06, "loss": 0.857, "step": 28480 }, { "epoch": 0.34715366897005595, "grad_norm": 1.9038302898406982, "learning_rate": 3.4361128928800513e-06, "loss": 0.8467, "step": 28485 }, { "epoch": 0.3472146051942038, "grad_norm": 2.071852207183838, "learning_rate": 3.435792174470815e-06, "loss": 0.9126, "step": 28490 }, { "epoch": 0.34727554141835154, "grad_norm": 2.1749043464660645, "learning_rate": 3.4354714560615782e-06, "loss": 0.8809, "step": 28495 }, { "epoch": 0.34733647764249936, "grad_norm": 1.944611668586731, "learning_rate": 3.4351507376523412e-06, "loss": 0.8405, "step": 28500 }, { "epoch": 0.3473974138666472, "grad_norm": 1.8678178787231445, "learning_rate": 3.434830019243105e-06, "loss": 0.8859, "step": 28505 }, { "epoch": 0.34745835009079495, "grad_norm": 2.097590923309326, "learning_rate": 3.434509300833868e-06, "loss": 0.8642, "step": 28510 }, { "epoch": 0.3475192863149428, "grad_norm": 1.9159764051437378, "learning_rate": 3.434188582424631e-06, "loss": 0.8423, "step": 28515 }, { "epoch": 0.3475802225390906, "grad_norm": 1.859522819519043, "learning_rate": 3.433867864015395e-06, "loss": 0.8979, "step": 28520 }, { "epoch": 0.34764115876323837, "grad_norm": 2.0831058025360107, "learning_rate": 3.433547145606158e-06, "loss": 0.8283, "step": 28525 }, { "epoch": 0.3477020949873862, "grad_norm": 1.9876525402069092, "learning_rate": 3.433226427196921e-06, "loss": 0.9028, "step": 28530 }, { "epoch": 0.347763031211534, "grad_norm": 2.0218381881713867, "learning_rate": 3.432905708787685e-06, "loss": 0.833, "step": 28535 }, { "epoch": 0.34782396743568184, "grad_norm": 2.2853198051452637, "learning_rate": 3.432584990378448e-06, "loss": 0.8905, "step": 28540 }, { "epoch": 0.3478849036598296, "grad_norm": 1.9601303339004517, "learning_rate": 3.4322642719692113e-06, "loss": 0.8321, "step": 28545 }, { "epoch": 0.3479458398839774, "grad_norm": 1.776430368423462, "learning_rate": 3.4319435535599748e-06, "loss": 0.8856, "step": 28550 }, { "epoch": 0.34800677610812525, "grad_norm": 1.7519577741622925, "learning_rate": 3.4316228351507382e-06, "loss": 0.853, "step": 28555 }, { "epoch": 0.348067712332273, "grad_norm": 1.9556699991226196, "learning_rate": 3.4313021167415012e-06, "loss": 0.8404, "step": 28560 }, { "epoch": 0.34812864855642084, "grad_norm": 1.8959872722625732, "learning_rate": 3.4309813983322643e-06, "loss": 0.948, "step": 28565 }, { "epoch": 0.34818958478056866, "grad_norm": 1.7663609981536865, "learning_rate": 3.430660679923028e-06, "loss": 0.8587, "step": 28570 }, { "epoch": 0.3482505210047165, "grad_norm": 2.2575912475585938, "learning_rate": 3.430339961513791e-06, "loss": 0.8727, "step": 28575 }, { "epoch": 0.34831145722886425, "grad_norm": 2.045559883117676, "learning_rate": 3.430019243104554e-06, "loss": 0.7958, "step": 28580 }, { "epoch": 0.3483723934530121, "grad_norm": 2.2045528888702393, "learning_rate": 3.429698524695318e-06, "loss": 0.9701, "step": 28585 }, { "epoch": 0.3484333296771599, "grad_norm": 2.367100715637207, "learning_rate": 3.429377806286081e-06, "loss": 0.8939, "step": 28590 }, { "epoch": 0.34849426590130766, "grad_norm": 1.7763257026672363, "learning_rate": 3.429057087876844e-06, "loss": 0.7804, "step": 28595 }, { "epoch": 0.3485552021254555, "grad_norm": 1.8187735080718994, "learning_rate": 3.428736369467608e-06, "loss": 0.8522, "step": 28600 }, { "epoch": 0.3486161383496033, "grad_norm": 1.9216437339782715, "learning_rate": 3.428415651058371e-06, "loss": 0.8137, "step": 28605 }, { "epoch": 0.34867707457375113, "grad_norm": 1.9905608892440796, "learning_rate": 3.4280949326491344e-06, "loss": 0.8009, "step": 28610 }, { "epoch": 0.3487380107978989, "grad_norm": 2.0007781982421875, "learning_rate": 3.427774214239898e-06, "loss": 0.8422, "step": 28615 }, { "epoch": 0.3487989470220467, "grad_norm": 2.424421787261963, "learning_rate": 3.427453495830661e-06, "loss": 0.8319, "step": 28620 }, { "epoch": 0.34885988324619455, "grad_norm": 1.7199199199676514, "learning_rate": 3.4271327774214243e-06, "loss": 0.8384, "step": 28625 }, { "epoch": 0.3489208194703423, "grad_norm": 2.1038804054260254, "learning_rate": 3.4268120590121877e-06, "loss": 0.8414, "step": 28630 }, { "epoch": 0.34898175569449014, "grad_norm": 2.153254985809326, "learning_rate": 3.426491340602951e-06, "loss": 0.9038, "step": 28635 }, { "epoch": 0.34904269191863796, "grad_norm": 1.8309497833251953, "learning_rate": 3.426170622193714e-06, "loss": 0.846, "step": 28640 }, { "epoch": 0.3491036281427858, "grad_norm": 1.9298975467681885, "learning_rate": 3.425849903784477e-06, "loss": 0.8835, "step": 28645 }, { "epoch": 0.34916456436693355, "grad_norm": 1.9831736087799072, "learning_rate": 3.425529185375241e-06, "loss": 0.8197, "step": 28650 }, { "epoch": 0.3492255005910814, "grad_norm": 2.9365992546081543, "learning_rate": 3.425208466966004e-06, "loss": 0.8352, "step": 28655 }, { "epoch": 0.3492864368152292, "grad_norm": 2.075514554977417, "learning_rate": 3.424887748556767e-06, "loss": 0.8356, "step": 28660 }, { "epoch": 0.34934737303937696, "grad_norm": 2.2380716800689697, "learning_rate": 3.424567030147531e-06, "loss": 0.8102, "step": 28665 }, { "epoch": 0.3494083092635248, "grad_norm": 1.889827847480774, "learning_rate": 3.424246311738294e-06, "loss": 0.7961, "step": 28670 }, { "epoch": 0.3494692454876726, "grad_norm": 2.0646045207977295, "learning_rate": 3.423925593329057e-06, "loss": 0.8534, "step": 28675 }, { "epoch": 0.34953018171182043, "grad_norm": 2.0660505294799805, "learning_rate": 3.423604874919821e-06, "loss": 0.838, "step": 28680 }, { "epoch": 0.3495911179359682, "grad_norm": 1.8316731452941895, "learning_rate": 3.423284156510584e-06, "loss": 0.7883, "step": 28685 }, { "epoch": 0.349652054160116, "grad_norm": 2.3031420707702637, "learning_rate": 3.4229634381013473e-06, "loss": 0.9079, "step": 28690 }, { "epoch": 0.34971299038426384, "grad_norm": 1.7127522230148315, "learning_rate": 3.4226427196921107e-06, "loss": 0.9056, "step": 28695 }, { "epoch": 0.3497739266084116, "grad_norm": 2.5748395919799805, "learning_rate": 3.4223220012828738e-06, "loss": 0.8671, "step": 28700 }, { "epoch": 0.34983486283255943, "grad_norm": 1.7104781866073608, "learning_rate": 3.422001282873637e-06, "loss": 0.8441, "step": 28705 }, { "epoch": 0.34989579905670726, "grad_norm": 2.1959426403045654, "learning_rate": 3.4216805644644006e-06, "loss": 0.8542, "step": 28710 }, { "epoch": 0.3499567352808551, "grad_norm": 1.934562087059021, "learning_rate": 3.421359846055164e-06, "loss": 0.905, "step": 28715 }, { "epoch": 0.35001767150500285, "grad_norm": 1.9448210000991821, "learning_rate": 3.421039127645927e-06, "loss": 0.8634, "step": 28720 }, { "epoch": 0.35007860772915067, "grad_norm": 2.0443363189697266, "learning_rate": 3.4207184092366905e-06, "loss": 0.8637, "step": 28725 }, { "epoch": 0.3501395439532985, "grad_norm": 1.9856261014938354, "learning_rate": 3.420397690827454e-06, "loss": 0.8851, "step": 28730 }, { "epoch": 0.35020048017744626, "grad_norm": 1.972347378730774, "learning_rate": 3.420076972418217e-06, "loss": 0.8785, "step": 28735 }, { "epoch": 0.3502614164015941, "grad_norm": 2.1183886528015137, "learning_rate": 3.41975625400898e-06, "loss": 0.857, "step": 28740 }, { "epoch": 0.3503223526257419, "grad_norm": 1.885698914527893, "learning_rate": 3.419435535599744e-06, "loss": 0.8358, "step": 28745 }, { "epoch": 0.35038328884988973, "grad_norm": 2.107609748840332, "learning_rate": 3.419114817190507e-06, "loss": 0.8616, "step": 28750 }, { "epoch": 0.3504442250740375, "grad_norm": 1.7489664554595947, "learning_rate": 3.41879409878127e-06, "loss": 0.8749, "step": 28755 }, { "epoch": 0.3505051612981853, "grad_norm": 1.7893668413162231, "learning_rate": 3.4184733803720338e-06, "loss": 0.8499, "step": 28760 }, { "epoch": 0.35056609752233314, "grad_norm": 2.542170286178589, "learning_rate": 3.418152661962797e-06, "loss": 0.85, "step": 28765 }, { "epoch": 0.3506270337464809, "grad_norm": 2.4204065799713135, "learning_rate": 3.4178319435535602e-06, "loss": 0.8196, "step": 28770 }, { "epoch": 0.35068796997062873, "grad_norm": 2.23848295211792, "learning_rate": 3.4175112251443237e-06, "loss": 0.7916, "step": 28775 }, { "epoch": 0.35074890619477656, "grad_norm": 2.2664568424224854, "learning_rate": 3.4171905067350867e-06, "loss": 0.872, "step": 28780 }, { "epoch": 0.3508098424189244, "grad_norm": 1.9019228219985962, "learning_rate": 3.41686978832585e-06, "loss": 0.824, "step": 28785 }, { "epoch": 0.35087077864307215, "grad_norm": 1.7009111642837524, "learning_rate": 3.4165490699166136e-06, "loss": 0.8457, "step": 28790 }, { "epoch": 0.35093171486721997, "grad_norm": 1.8060662746429443, "learning_rate": 3.416228351507377e-06, "loss": 0.8532, "step": 28795 }, { "epoch": 0.3509926510913678, "grad_norm": 2.0124855041503906, "learning_rate": 3.41590763309814e-06, "loss": 0.7934, "step": 28800 }, { "epoch": 0.35105358731551556, "grad_norm": 1.8321634531021118, "learning_rate": 3.415586914688904e-06, "loss": 0.8264, "step": 28805 }, { "epoch": 0.3511145235396634, "grad_norm": 1.6883870363235474, "learning_rate": 3.415266196279667e-06, "loss": 0.8999, "step": 28810 }, { "epoch": 0.3511754597638112, "grad_norm": 1.8017953634262085, "learning_rate": 3.41494547787043e-06, "loss": 0.812, "step": 28815 }, { "epoch": 0.351236395987959, "grad_norm": 2.0034520626068115, "learning_rate": 3.414624759461193e-06, "loss": 0.8423, "step": 28820 }, { "epoch": 0.3512973322121068, "grad_norm": 1.7245025634765625, "learning_rate": 3.414304041051957e-06, "loss": 0.9084, "step": 28825 }, { "epoch": 0.3513582684362546, "grad_norm": 1.8072657585144043, "learning_rate": 3.41398332264272e-06, "loss": 0.8698, "step": 28830 }, { "epoch": 0.35141920466040244, "grad_norm": 1.89981210231781, "learning_rate": 3.4136626042334833e-06, "loss": 0.8824, "step": 28835 }, { "epoch": 0.3514801408845502, "grad_norm": 1.8106889724731445, "learning_rate": 3.4133418858242467e-06, "loss": 0.878, "step": 28840 }, { "epoch": 0.35154107710869803, "grad_norm": 1.7027945518493652, "learning_rate": 3.4130211674150097e-06, "loss": 0.8712, "step": 28845 }, { "epoch": 0.35160201333284585, "grad_norm": 1.6747210025787354, "learning_rate": 3.412700449005773e-06, "loss": 0.8199, "step": 28850 }, { "epoch": 0.3516629495569937, "grad_norm": 2.133307456970215, "learning_rate": 3.4123797305965366e-06, "loss": 0.8459, "step": 28855 }, { "epoch": 0.35172388578114144, "grad_norm": 1.9578925371170044, "learning_rate": 3.4120590121873e-06, "loss": 0.9251, "step": 28860 }, { "epoch": 0.35178482200528927, "grad_norm": 2.3117918968200684, "learning_rate": 3.411738293778063e-06, "loss": 0.836, "step": 28865 }, { "epoch": 0.3518457582294371, "grad_norm": 1.8077967166900635, "learning_rate": 3.4114175753688265e-06, "loss": 0.8845, "step": 28870 }, { "epoch": 0.35190669445358486, "grad_norm": 1.9020044803619385, "learning_rate": 3.41109685695959e-06, "loss": 0.8925, "step": 28875 }, { "epoch": 0.3519676306777327, "grad_norm": 2.0872249603271484, "learning_rate": 3.410776138550353e-06, "loss": 0.8627, "step": 28880 }, { "epoch": 0.3520285669018805, "grad_norm": 1.9731999635696411, "learning_rate": 3.410455420141117e-06, "loss": 0.9696, "step": 28885 }, { "epoch": 0.3520895031260283, "grad_norm": 2.3581268787384033, "learning_rate": 3.41013470173188e-06, "loss": 0.797, "step": 28890 }, { "epoch": 0.3521504393501761, "grad_norm": 2.195387363433838, "learning_rate": 3.409813983322643e-06, "loss": 0.9461, "step": 28895 }, { "epoch": 0.3522113755743239, "grad_norm": 1.9497698545455933, "learning_rate": 3.409493264913406e-06, "loss": 0.7992, "step": 28900 }, { "epoch": 0.35227231179847174, "grad_norm": 2.2826640605926514, "learning_rate": 3.4091725465041697e-06, "loss": 0.8642, "step": 28905 }, { "epoch": 0.3523332480226195, "grad_norm": 1.8938302993774414, "learning_rate": 3.4088518280949327e-06, "loss": 0.8247, "step": 28910 }, { "epoch": 0.35239418424676733, "grad_norm": 2.376533269882202, "learning_rate": 3.408531109685696e-06, "loss": 0.9232, "step": 28915 }, { "epoch": 0.35245512047091515, "grad_norm": 1.8892725706100464, "learning_rate": 3.4082103912764596e-06, "loss": 0.8188, "step": 28920 }, { "epoch": 0.352516056695063, "grad_norm": 2.554300546646118, "learning_rate": 3.4078896728672226e-06, "loss": 0.8601, "step": 28925 }, { "epoch": 0.35257699291921074, "grad_norm": 2.132711410522461, "learning_rate": 3.407568954457986e-06, "loss": 0.8886, "step": 28930 }, { "epoch": 0.35263792914335856, "grad_norm": 2.22245454788208, "learning_rate": 3.4072482360487495e-06, "loss": 0.8791, "step": 28935 }, { "epoch": 0.3526988653675064, "grad_norm": 2.161383867263794, "learning_rate": 3.406927517639513e-06, "loss": 0.8905, "step": 28940 }, { "epoch": 0.35275980159165415, "grad_norm": 1.8359824419021606, "learning_rate": 3.406606799230276e-06, "loss": 0.94, "step": 28945 }, { "epoch": 0.352820737815802, "grad_norm": 2.740082025527954, "learning_rate": 3.4062860808210394e-06, "loss": 0.8629, "step": 28950 }, { "epoch": 0.3528816740399498, "grad_norm": 1.9154869318008423, "learning_rate": 3.405965362411803e-06, "loss": 0.8959, "step": 28955 }, { "epoch": 0.3529426102640976, "grad_norm": 2.013690233230591, "learning_rate": 3.405644644002566e-06, "loss": 0.8474, "step": 28960 }, { "epoch": 0.3530035464882454, "grad_norm": 2.092546224594116, "learning_rate": 3.4053239255933297e-06, "loss": 0.8935, "step": 28965 }, { "epoch": 0.3530644827123932, "grad_norm": 1.9896397590637207, "learning_rate": 3.4050032071840928e-06, "loss": 0.9023, "step": 28970 }, { "epoch": 0.35312541893654104, "grad_norm": 2.1400420665740967, "learning_rate": 3.4046824887748558e-06, "loss": 0.947, "step": 28975 }, { "epoch": 0.3531863551606888, "grad_norm": 2.12117600440979, "learning_rate": 3.404361770365619e-06, "loss": 0.8285, "step": 28980 }, { "epoch": 0.3532472913848366, "grad_norm": 2.0834543704986572, "learning_rate": 3.4040410519563827e-06, "loss": 0.8707, "step": 28985 }, { "epoch": 0.35330822760898445, "grad_norm": 2.0162296295166016, "learning_rate": 3.4037203335471457e-06, "loss": 0.797, "step": 28990 }, { "epoch": 0.35336916383313227, "grad_norm": 1.9636178016662598, "learning_rate": 3.403399615137909e-06, "loss": 0.964, "step": 28995 }, { "epoch": 0.35343010005728004, "grad_norm": 1.7855799198150635, "learning_rate": 3.4030788967286726e-06, "loss": 0.8727, "step": 29000 }, { "epoch": 0.35349103628142786, "grad_norm": 1.889919638633728, "learning_rate": 3.4027581783194356e-06, "loss": 0.7789, "step": 29005 }, { "epoch": 0.3535519725055757, "grad_norm": 1.789414405822754, "learning_rate": 3.402437459910199e-06, "loss": 0.8571, "step": 29010 }, { "epoch": 0.35361290872972345, "grad_norm": 1.8146063089370728, "learning_rate": 3.4021167415009625e-06, "loss": 0.8808, "step": 29015 }, { "epoch": 0.3536738449538713, "grad_norm": 2.0936291217803955, "learning_rate": 3.401796023091726e-06, "loss": 0.8382, "step": 29020 }, { "epoch": 0.3537347811780191, "grad_norm": 1.9395653009414673, "learning_rate": 3.401475304682489e-06, "loss": 0.8525, "step": 29025 }, { "epoch": 0.35379571740216686, "grad_norm": 2.131558656692505, "learning_rate": 3.4011545862732528e-06, "loss": 0.8391, "step": 29030 }, { "epoch": 0.3538566536263147, "grad_norm": 1.8249825239181519, "learning_rate": 3.400833867864016e-06, "loss": 0.8594, "step": 29035 }, { "epoch": 0.3539175898504625, "grad_norm": 1.8883119821548462, "learning_rate": 3.400513149454779e-06, "loss": 0.8712, "step": 29040 }, { "epoch": 0.35397852607461033, "grad_norm": 2.203209161758423, "learning_rate": 3.4001924310455427e-06, "loss": 0.8209, "step": 29045 }, { "epoch": 0.3540394622987581, "grad_norm": 2.1367623805999756, "learning_rate": 3.3998717126363057e-06, "loss": 0.8223, "step": 29050 }, { "epoch": 0.3541003985229059, "grad_norm": 1.995923399925232, "learning_rate": 3.3995509942270687e-06, "loss": 0.8893, "step": 29055 }, { "epoch": 0.35416133474705375, "grad_norm": 1.9534891843795776, "learning_rate": 3.3992302758178326e-06, "loss": 0.8565, "step": 29060 }, { "epoch": 0.3542222709712015, "grad_norm": 1.8306236267089844, "learning_rate": 3.3989095574085956e-06, "loss": 0.8557, "step": 29065 }, { "epoch": 0.35428320719534934, "grad_norm": 1.9639263153076172, "learning_rate": 3.3985888389993586e-06, "loss": 0.8143, "step": 29070 }, { "epoch": 0.35434414341949716, "grad_norm": 1.8635945320129395, "learning_rate": 3.398268120590122e-06, "loss": 0.8225, "step": 29075 }, { "epoch": 0.354405079643645, "grad_norm": 1.9367988109588623, "learning_rate": 3.3979474021808855e-06, "loss": 0.8928, "step": 29080 }, { "epoch": 0.35446601586779275, "grad_norm": 1.7446380853652954, "learning_rate": 3.397626683771649e-06, "loss": 0.8456, "step": 29085 }, { "epoch": 0.3545269520919406, "grad_norm": 1.930989146232605, "learning_rate": 3.397305965362412e-06, "loss": 0.86, "step": 29090 }, { "epoch": 0.3545878883160884, "grad_norm": 1.8239479064941406, "learning_rate": 3.3969852469531754e-06, "loss": 0.8585, "step": 29095 }, { "epoch": 0.35464882454023616, "grad_norm": 1.9711970090866089, "learning_rate": 3.396664528543939e-06, "loss": 0.8749, "step": 29100 }, { "epoch": 0.354709760764384, "grad_norm": 2.2783401012420654, "learning_rate": 3.396343810134702e-06, "loss": 0.7869, "step": 29105 }, { "epoch": 0.3547706969885318, "grad_norm": 1.7547014951705933, "learning_rate": 3.3960230917254657e-06, "loss": 0.8668, "step": 29110 }, { "epoch": 0.35483163321267963, "grad_norm": 2.1310722827911377, "learning_rate": 3.3957023733162287e-06, "loss": 0.8816, "step": 29115 }, { "epoch": 0.3548925694368274, "grad_norm": 2.157900094985962, "learning_rate": 3.3953816549069917e-06, "loss": 0.8319, "step": 29120 }, { "epoch": 0.3549535056609752, "grad_norm": 2.677830696105957, "learning_rate": 3.3950609364977556e-06, "loss": 0.8713, "step": 29125 }, { "epoch": 0.35501444188512304, "grad_norm": 2.035121202468872, "learning_rate": 3.3947402180885186e-06, "loss": 0.8707, "step": 29130 }, { "epoch": 0.3550753781092708, "grad_norm": 2.3961098194122314, "learning_rate": 3.3944194996792816e-06, "loss": 0.8256, "step": 29135 }, { "epoch": 0.35513631433341863, "grad_norm": 1.8394999504089355, "learning_rate": 3.3940987812700455e-06, "loss": 0.8045, "step": 29140 }, { "epoch": 0.35519725055756646, "grad_norm": 1.7933897972106934, "learning_rate": 3.3937780628608085e-06, "loss": 0.8838, "step": 29145 }, { "epoch": 0.3552581867817143, "grad_norm": 2.437431573867798, "learning_rate": 3.3934573444515715e-06, "loss": 0.9677, "step": 29150 }, { "epoch": 0.35531912300586205, "grad_norm": 1.9830472469329834, "learning_rate": 3.393136626042335e-06, "loss": 0.8774, "step": 29155 }, { "epoch": 0.35538005923000987, "grad_norm": 1.7494038343429565, "learning_rate": 3.3928159076330984e-06, "loss": 0.8511, "step": 29160 }, { "epoch": 0.3554409954541577, "grad_norm": 2.0816283226013184, "learning_rate": 3.392495189223862e-06, "loss": 0.883, "step": 29165 }, { "epoch": 0.35550193167830546, "grad_norm": 2.122591018676758, "learning_rate": 3.392174470814625e-06, "loss": 0.9134, "step": 29170 }, { "epoch": 0.3555628679024533, "grad_norm": 2.0446529388427734, "learning_rate": 3.3918537524053883e-06, "loss": 0.778, "step": 29175 }, { "epoch": 0.3556238041266011, "grad_norm": 2.0814309120178223, "learning_rate": 3.3915330339961517e-06, "loss": 0.8119, "step": 29180 }, { "epoch": 0.35568474035074893, "grad_norm": 1.899878978729248, "learning_rate": 3.3912123155869148e-06, "loss": 0.8631, "step": 29185 }, { "epoch": 0.3557456765748967, "grad_norm": 2.195899248123169, "learning_rate": 3.3908915971776786e-06, "loss": 0.7782, "step": 29190 }, { "epoch": 0.3558066127990445, "grad_norm": 2.2934577465057373, "learning_rate": 3.3905708787684416e-06, "loss": 0.8929, "step": 29195 }, { "epoch": 0.35586754902319234, "grad_norm": 2.0618340969085693, "learning_rate": 3.3902501603592047e-06, "loss": 0.8363, "step": 29200 }, { "epoch": 0.3559284852473401, "grad_norm": 2.1583285331726074, "learning_rate": 3.3899294419499685e-06, "loss": 0.8718, "step": 29205 }, { "epoch": 0.35598942147148793, "grad_norm": 2.2179863452911377, "learning_rate": 3.3896087235407315e-06, "loss": 0.7935, "step": 29210 }, { "epoch": 0.35605035769563576, "grad_norm": 2.2653141021728516, "learning_rate": 3.3892880051314946e-06, "loss": 0.8573, "step": 29215 }, { "epoch": 0.3561112939197836, "grad_norm": 2.0026392936706543, "learning_rate": 3.3889672867222584e-06, "loss": 0.8287, "step": 29220 }, { "epoch": 0.35617223014393135, "grad_norm": 2.376777172088623, "learning_rate": 3.3886465683130214e-06, "loss": 0.8815, "step": 29225 }, { "epoch": 0.35623316636807917, "grad_norm": 1.947070598602295, "learning_rate": 3.3883258499037845e-06, "loss": 0.8521, "step": 29230 }, { "epoch": 0.356294102592227, "grad_norm": 2.6935155391693115, "learning_rate": 3.388005131494548e-06, "loss": 0.8362, "step": 29235 }, { "epoch": 0.35635503881637476, "grad_norm": 1.879360318183899, "learning_rate": 3.3876844130853113e-06, "loss": 0.903, "step": 29240 }, { "epoch": 0.3564159750405226, "grad_norm": 1.9175739288330078, "learning_rate": 3.3873636946760748e-06, "loss": 0.9027, "step": 29245 }, { "epoch": 0.3564769112646704, "grad_norm": 2.0639305114746094, "learning_rate": 3.387042976266838e-06, "loss": 0.8695, "step": 29250 }, { "epoch": 0.3565378474888182, "grad_norm": 1.7806661128997803, "learning_rate": 3.3867222578576017e-06, "loss": 0.9093, "step": 29255 }, { "epoch": 0.356598783712966, "grad_norm": 2.2449820041656494, "learning_rate": 3.3864015394483647e-06, "loss": 0.9006, "step": 29260 }, { "epoch": 0.3566597199371138, "grad_norm": 1.803550124168396, "learning_rate": 3.3860808210391277e-06, "loss": 0.7985, "step": 29265 }, { "epoch": 0.35672065616126164, "grad_norm": 1.8561056852340698, "learning_rate": 3.3857601026298916e-06, "loss": 0.8893, "step": 29270 }, { "epoch": 0.3567815923854094, "grad_norm": 2.083991289138794, "learning_rate": 3.3854393842206546e-06, "loss": 0.9634, "step": 29275 }, { "epoch": 0.35684252860955723, "grad_norm": 1.8319040536880493, "learning_rate": 3.3851186658114176e-06, "loss": 0.9027, "step": 29280 }, { "epoch": 0.35690346483370505, "grad_norm": 2.191655158996582, "learning_rate": 3.3847979474021815e-06, "loss": 0.8611, "step": 29285 }, { "epoch": 0.3569644010578529, "grad_norm": 1.9704283475875854, "learning_rate": 3.3844772289929445e-06, "loss": 0.8457, "step": 29290 }, { "epoch": 0.35702533728200064, "grad_norm": 2.501276731491089, "learning_rate": 3.3841565105837075e-06, "loss": 0.9439, "step": 29295 }, { "epoch": 0.35708627350614847, "grad_norm": 1.6839720010757446, "learning_rate": 3.3838357921744714e-06, "loss": 0.8009, "step": 29300 }, { "epoch": 0.3571472097302963, "grad_norm": 2.128115653991699, "learning_rate": 3.3835150737652344e-06, "loss": 0.8162, "step": 29305 }, { "epoch": 0.35720814595444406, "grad_norm": 2.074293851852417, "learning_rate": 3.383194355355998e-06, "loss": 0.8515, "step": 29310 }, { "epoch": 0.3572690821785919, "grad_norm": 1.7544546127319336, "learning_rate": 3.3828736369467612e-06, "loss": 0.8451, "step": 29315 }, { "epoch": 0.3573300184027397, "grad_norm": 2.1450400352478027, "learning_rate": 3.3825529185375243e-06, "loss": 0.8662, "step": 29320 }, { "epoch": 0.3573909546268875, "grad_norm": 1.8379199504852295, "learning_rate": 3.3822322001282877e-06, "loss": 0.8785, "step": 29325 }, { "epoch": 0.3574518908510353, "grad_norm": 1.9079651832580566, "learning_rate": 3.3819114817190507e-06, "loss": 0.9153, "step": 29330 }, { "epoch": 0.3575128270751831, "grad_norm": 1.8626309633255005, "learning_rate": 3.3815907633098146e-06, "loss": 0.8962, "step": 29335 }, { "epoch": 0.35757376329933094, "grad_norm": 2.2295405864715576, "learning_rate": 3.3812700449005776e-06, "loss": 0.852, "step": 29340 }, { "epoch": 0.3576346995234787, "grad_norm": 1.6467090845108032, "learning_rate": 3.3809493264913406e-06, "loss": 0.78, "step": 29345 }, { "epoch": 0.35769563574762653, "grad_norm": 2.153453826904297, "learning_rate": 3.3806286080821045e-06, "loss": 0.9211, "step": 29350 }, { "epoch": 0.35775657197177435, "grad_norm": 2.1016383171081543, "learning_rate": 3.3803078896728675e-06, "loss": 0.9055, "step": 29355 }, { "epoch": 0.3578175081959222, "grad_norm": 2.2744300365448, "learning_rate": 3.3799871712636305e-06, "loss": 0.8991, "step": 29360 }, { "epoch": 0.35787844442006994, "grad_norm": 1.8808001279830933, "learning_rate": 3.3796664528543944e-06, "loss": 0.8059, "step": 29365 }, { "epoch": 0.35793938064421776, "grad_norm": 2.294726848602295, "learning_rate": 3.3793457344451574e-06, "loss": 0.8079, "step": 29370 }, { "epoch": 0.3580003168683656, "grad_norm": 1.934043526649475, "learning_rate": 3.3790250160359204e-06, "loss": 0.8469, "step": 29375 }, { "epoch": 0.35806125309251335, "grad_norm": 1.8995987176895142, "learning_rate": 3.3787042976266843e-06, "loss": 0.8557, "step": 29380 }, { "epoch": 0.3581221893166612, "grad_norm": 2.0645039081573486, "learning_rate": 3.3783835792174473e-06, "loss": 0.8443, "step": 29385 }, { "epoch": 0.358183125540809, "grad_norm": 1.7119921445846558, "learning_rate": 3.3780628608082107e-06, "loss": 0.8157, "step": 29390 }, { "epoch": 0.3582440617649568, "grad_norm": 2.006216049194336, "learning_rate": 3.377742142398974e-06, "loss": 0.7568, "step": 29395 }, { "epoch": 0.3583049979891046, "grad_norm": 1.9355661869049072, "learning_rate": 3.377421423989737e-06, "loss": 1.0228, "step": 29400 }, { "epoch": 0.3583659342132524, "grad_norm": 2.806410789489746, "learning_rate": 3.3771007055805006e-06, "loss": 0.8803, "step": 29405 }, { "epoch": 0.35842687043740024, "grad_norm": 2.1747629642486572, "learning_rate": 3.3767799871712636e-06, "loss": 0.8505, "step": 29410 }, { "epoch": 0.358487806661548, "grad_norm": 1.874485731124878, "learning_rate": 3.3764592687620275e-06, "loss": 0.8907, "step": 29415 }, { "epoch": 0.3585487428856958, "grad_norm": 2.2621636390686035, "learning_rate": 3.3761385503527905e-06, "loss": 0.7932, "step": 29420 }, { "epoch": 0.35860967910984365, "grad_norm": 2.1784749031066895, "learning_rate": 3.3758178319435535e-06, "loss": 0.8058, "step": 29425 }, { "epoch": 0.35867061533399147, "grad_norm": 2.681364059448242, "learning_rate": 3.3754971135343174e-06, "loss": 0.8706, "step": 29430 }, { "epoch": 0.35873155155813924, "grad_norm": 2.023845672607422, "learning_rate": 3.3751763951250804e-06, "loss": 0.8843, "step": 29435 }, { "epoch": 0.35879248778228706, "grad_norm": 1.9972832202911377, "learning_rate": 3.3748556767158434e-06, "loss": 0.9259, "step": 29440 }, { "epoch": 0.3588534240064349, "grad_norm": 1.6953346729278564, "learning_rate": 3.3745349583066073e-06, "loss": 0.8007, "step": 29445 }, { "epoch": 0.35891436023058265, "grad_norm": 1.859344244003296, "learning_rate": 3.3742142398973703e-06, "loss": 0.84, "step": 29450 }, { "epoch": 0.3589752964547305, "grad_norm": 1.6870137453079224, "learning_rate": 3.3738935214881333e-06, "loss": 0.8561, "step": 29455 }, { "epoch": 0.3590362326788783, "grad_norm": 2.7069966793060303, "learning_rate": 3.373572803078897e-06, "loss": 0.8417, "step": 29460 }, { "epoch": 0.3590971689030261, "grad_norm": 1.6953444480895996, "learning_rate": 3.3732520846696602e-06, "loss": 0.8872, "step": 29465 }, { "epoch": 0.3591581051271739, "grad_norm": 2.043877124786377, "learning_rate": 3.3729313662604237e-06, "loss": 0.8683, "step": 29470 }, { "epoch": 0.3592190413513217, "grad_norm": 1.8581397533416748, "learning_rate": 3.372610647851187e-06, "loss": 0.9064, "step": 29475 }, { "epoch": 0.35927997757546953, "grad_norm": 1.8866162300109863, "learning_rate": 3.37228992944195e-06, "loss": 0.8859, "step": 29480 }, { "epoch": 0.3593409137996173, "grad_norm": 1.9192581176757812, "learning_rate": 3.3719692110327136e-06, "loss": 0.8871, "step": 29485 }, { "epoch": 0.3594018500237651, "grad_norm": 2.0210447311401367, "learning_rate": 3.3716484926234766e-06, "loss": 0.8743, "step": 29490 }, { "epoch": 0.35946278624791295, "grad_norm": 1.6546127796173096, "learning_rate": 3.3713277742142404e-06, "loss": 0.8126, "step": 29495 }, { "epoch": 0.3595237224720607, "grad_norm": 2.155844211578369, "learning_rate": 3.3710070558050035e-06, "loss": 0.9375, "step": 29500 }, { "epoch": 0.35958465869620854, "grad_norm": 1.8222657442092896, "learning_rate": 3.3706863373957665e-06, "loss": 0.8559, "step": 29505 }, { "epoch": 0.35964559492035636, "grad_norm": 2.0610146522521973, "learning_rate": 3.3703656189865303e-06, "loss": 0.8346, "step": 29510 }, { "epoch": 0.3597065311445042, "grad_norm": 2.2105517387390137, "learning_rate": 3.3700449005772934e-06, "loss": 0.9292, "step": 29515 }, { "epoch": 0.35976746736865195, "grad_norm": 2.198326349258423, "learning_rate": 3.3697241821680564e-06, "loss": 0.851, "step": 29520 }, { "epoch": 0.3598284035927998, "grad_norm": 1.828461766242981, "learning_rate": 3.3694034637588202e-06, "loss": 0.804, "step": 29525 }, { "epoch": 0.3598893398169476, "grad_norm": 1.8280953168869019, "learning_rate": 3.3690827453495833e-06, "loss": 0.8492, "step": 29530 }, { "epoch": 0.35995027604109536, "grad_norm": 2.3106441497802734, "learning_rate": 3.3687620269403467e-06, "loss": 0.907, "step": 29535 }, { "epoch": 0.3600112122652432, "grad_norm": 1.808014988899231, "learning_rate": 3.36844130853111e-06, "loss": 0.769, "step": 29540 }, { "epoch": 0.360072148489391, "grad_norm": 1.9949296712875366, "learning_rate": 3.368120590121873e-06, "loss": 0.8635, "step": 29545 }, { "epoch": 0.36013308471353883, "grad_norm": 1.9573668241500854, "learning_rate": 3.3677998717126366e-06, "loss": 0.9286, "step": 29550 }, { "epoch": 0.3601940209376866, "grad_norm": 1.874595284461975, "learning_rate": 3.3674791533034e-06, "loss": 0.8572, "step": 29555 }, { "epoch": 0.3602549571618344, "grad_norm": 2.161532163619995, "learning_rate": 3.3671584348941635e-06, "loss": 0.8863, "step": 29560 }, { "epoch": 0.36031589338598224, "grad_norm": 2.057551860809326, "learning_rate": 3.3668377164849265e-06, "loss": 0.8435, "step": 29565 }, { "epoch": 0.36037682961013, "grad_norm": 1.7245796918869019, "learning_rate": 3.3665169980756895e-06, "loss": 0.8654, "step": 29570 }, { "epoch": 0.36043776583427783, "grad_norm": 2.0309112071990967, "learning_rate": 3.3661962796664534e-06, "loss": 0.8481, "step": 29575 }, { "epoch": 0.36049870205842566, "grad_norm": 1.9878830909729004, "learning_rate": 3.3658755612572164e-06, "loss": 0.8921, "step": 29580 }, { "epoch": 0.3605596382825735, "grad_norm": 2.0441527366638184, "learning_rate": 3.3655548428479794e-06, "loss": 0.8786, "step": 29585 }, { "epoch": 0.36062057450672125, "grad_norm": 1.9258110523223877, "learning_rate": 3.3652341244387433e-06, "loss": 0.9027, "step": 29590 }, { "epoch": 0.36068151073086907, "grad_norm": 1.834726333618164, "learning_rate": 3.3649134060295063e-06, "loss": 0.832, "step": 29595 }, { "epoch": 0.3607424469550169, "grad_norm": 2.0294911861419678, "learning_rate": 3.3645926876202693e-06, "loss": 0.9206, "step": 29600 }, { "epoch": 0.36080338317916466, "grad_norm": 1.8728761672973633, "learning_rate": 3.364271969211033e-06, "loss": 0.9162, "step": 29605 }, { "epoch": 0.3608643194033125, "grad_norm": 1.9239767789840698, "learning_rate": 3.363951250801796e-06, "loss": 0.8674, "step": 29610 }, { "epoch": 0.3609252556274603, "grad_norm": 1.7373594045639038, "learning_rate": 3.3636305323925596e-06, "loss": 0.9697, "step": 29615 }, { "epoch": 0.36098619185160813, "grad_norm": 1.8695632219314575, "learning_rate": 3.363309813983323e-06, "loss": 0.7968, "step": 29620 }, { "epoch": 0.3610471280757559, "grad_norm": 1.7969632148742676, "learning_rate": 3.362989095574086e-06, "loss": 0.8925, "step": 29625 }, { "epoch": 0.3611080642999037, "grad_norm": 2.4844627380371094, "learning_rate": 3.3626683771648495e-06, "loss": 0.9289, "step": 29630 }, { "epoch": 0.36116900052405154, "grad_norm": 2.386432647705078, "learning_rate": 3.362347658755613e-06, "loss": 0.8281, "step": 29635 }, { "epoch": 0.3612299367481993, "grad_norm": 2.007673740386963, "learning_rate": 3.3620269403463764e-06, "loss": 0.7542, "step": 29640 }, { "epoch": 0.36129087297234713, "grad_norm": 1.6080923080444336, "learning_rate": 3.3617062219371394e-06, "loss": 0.821, "step": 29645 }, { "epoch": 0.36135180919649496, "grad_norm": 1.803846001625061, "learning_rate": 3.361385503527903e-06, "loss": 0.8422, "step": 29650 }, { "epoch": 0.3614127454206428, "grad_norm": 2.280698299407959, "learning_rate": 3.3610647851186663e-06, "loss": 0.8018, "step": 29655 }, { "epoch": 0.36147368164479055, "grad_norm": 1.8947575092315674, "learning_rate": 3.3607440667094293e-06, "loss": 0.8951, "step": 29660 }, { "epoch": 0.36153461786893837, "grad_norm": 2.4247326850891113, "learning_rate": 3.3604233483001923e-06, "loss": 0.8324, "step": 29665 }, { "epoch": 0.3615955540930862, "grad_norm": 2.2820441722869873, "learning_rate": 3.360102629890956e-06, "loss": 0.8547, "step": 29670 }, { "epoch": 0.36165649031723396, "grad_norm": 2.224332571029663, "learning_rate": 3.359781911481719e-06, "loss": 0.8809, "step": 29675 }, { "epoch": 0.3617174265413818, "grad_norm": 1.7583277225494385, "learning_rate": 3.3594611930724822e-06, "loss": 0.8784, "step": 29680 }, { "epoch": 0.3617783627655296, "grad_norm": 2.0906100273132324, "learning_rate": 3.359140474663246e-06, "loss": 0.8039, "step": 29685 }, { "epoch": 0.3618392989896774, "grad_norm": 1.9112218618392944, "learning_rate": 3.358819756254009e-06, "loss": 0.7724, "step": 29690 }, { "epoch": 0.3619002352138252, "grad_norm": 1.7446777820587158, "learning_rate": 3.3584990378447725e-06, "loss": 0.7681, "step": 29695 }, { "epoch": 0.361961171437973, "grad_norm": 2.1655240058898926, "learning_rate": 3.358178319435536e-06, "loss": 0.8649, "step": 29700 }, { "epoch": 0.36202210766212084, "grad_norm": 1.9477856159210205, "learning_rate": 3.357857601026299e-06, "loss": 0.8346, "step": 29705 }, { "epoch": 0.3620830438862686, "grad_norm": 1.755603551864624, "learning_rate": 3.3575368826170624e-06, "loss": 0.8655, "step": 29710 }, { "epoch": 0.36214398011041643, "grad_norm": 2.0745601654052734, "learning_rate": 3.357216164207826e-06, "loss": 0.8763, "step": 29715 }, { "epoch": 0.36220491633456425, "grad_norm": 1.9426342248916626, "learning_rate": 3.3568954457985893e-06, "loss": 0.8282, "step": 29720 }, { "epoch": 0.3622658525587121, "grad_norm": 1.9767838716506958, "learning_rate": 3.3565747273893523e-06, "loss": 0.8576, "step": 29725 }, { "epoch": 0.36232678878285984, "grad_norm": 1.7795084714889526, "learning_rate": 3.356254008980116e-06, "loss": 0.8909, "step": 29730 }, { "epoch": 0.36238772500700767, "grad_norm": 2.07376766204834, "learning_rate": 3.3559332905708792e-06, "loss": 0.8957, "step": 29735 }, { "epoch": 0.3624486612311555, "grad_norm": 2.027449607849121, "learning_rate": 3.3556125721616422e-06, "loss": 0.8549, "step": 29740 }, { "epoch": 0.36250959745530326, "grad_norm": 1.7196153402328491, "learning_rate": 3.3552918537524053e-06, "loss": 0.8844, "step": 29745 }, { "epoch": 0.3625705336794511, "grad_norm": 2.1237428188323975, "learning_rate": 3.354971135343169e-06, "loss": 0.8094, "step": 29750 }, { "epoch": 0.3626314699035989, "grad_norm": 2.0493710041046143, "learning_rate": 3.354650416933932e-06, "loss": 0.8265, "step": 29755 }, { "epoch": 0.3626924061277467, "grad_norm": 2.0490241050720215, "learning_rate": 3.3543296985246956e-06, "loss": 0.8286, "step": 29760 }, { "epoch": 0.3627533423518945, "grad_norm": 2.0396957397460938, "learning_rate": 3.354008980115459e-06, "loss": 0.9657, "step": 29765 }, { "epoch": 0.3628142785760423, "grad_norm": 1.6660798788070679, "learning_rate": 3.353688261706222e-06, "loss": 0.8493, "step": 29770 }, { "epoch": 0.36287521480019014, "grad_norm": 1.9911985397338867, "learning_rate": 3.3533675432969855e-06, "loss": 0.849, "step": 29775 }, { "epoch": 0.3629361510243379, "grad_norm": 1.9335975646972656, "learning_rate": 3.353046824887749e-06, "loss": 0.9008, "step": 29780 }, { "epoch": 0.36299708724848573, "grad_norm": 1.9040708541870117, "learning_rate": 3.3527261064785124e-06, "loss": 0.8678, "step": 29785 }, { "epoch": 0.36305802347263355, "grad_norm": 2.3458564281463623, "learning_rate": 3.3524053880692754e-06, "loss": 0.793, "step": 29790 }, { "epoch": 0.3631189596967814, "grad_norm": 1.6729583740234375, "learning_rate": 3.352084669660039e-06, "loss": 0.8939, "step": 29795 }, { "epoch": 0.36317989592092914, "grad_norm": 2.104438066482544, "learning_rate": 3.3517639512508023e-06, "loss": 0.8859, "step": 29800 }, { "epoch": 0.36324083214507696, "grad_norm": 2.0970919132232666, "learning_rate": 3.3514432328415653e-06, "loss": 0.91, "step": 29805 }, { "epoch": 0.3633017683692248, "grad_norm": 1.8731896877288818, "learning_rate": 3.351122514432329e-06, "loss": 0.8789, "step": 29810 }, { "epoch": 0.36336270459337255, "grad_norm": 2.196174383163452, "learning_rate": 3.350801796023092e-06, "loss": 0.8967, "step": 29815 }, { "epoch": 0.3634236408175204, "grad_norm": 1.9468947649002075, "learning_rate": 3.350481077613855e-06, "loss": 0.8749, "step": 29820 }, { "epoch": 0.3634845770416682, "grad_norm": 2.040847063064575, "learning_rate": 3.350160359204618e-06, "loss": 0.8683, "step": 29825 }, { "epoch": 0.363545513265816, "grad_norm": 1.7460726499557495, "learning_rate": 3.349839640795382e-06, "loss": 0.8993, "step": 29830 }, { "epoch": 0.3636064494899638, "grad_norm": 2.0082833766937256, "learning_rate": 3.349518922386145e-06, "loss": 0.8641, "step": 29835 }, { "epoch": 0.3636673857141116, "grad_norm": 1.7681597471237183, "learning_rate": 3.3491982039769085e-06, "loss": 0.8732, "step": 29840 }, { "epoch": 0.36372832193825944, "grad_norm": 1.902339220046997, "learning_rate": 3.348877485567672e-06, "loss": 0.8854, "step": 29845 }, { "epoch": 0.3637892581624072, "grad_norm": 1.6997400522232056, "learning_rate": 3.348556767158435e-06, "loss": 0.8625, "step": 29850 }, { "epoch": 0.363850194386555, "grad_norm": 1.7062807083129883, "learning_rate": 3.3482360487491984e-06, "loss": 0.8347, "step": 29855 }, { "epoch": 0.36391113061070285, "grad_norm": 1.776188850402832, "learning_rate": 3.347915330339962e-06, "loss": 0.8665, "step": 29860 }, { "epoch": 0.36397206683485067, "grad_norm": 2.0995352268218994, "learning_rate": 3.3475946119307253e-06, "loss": 0.8924, "step": 29865 }, { "epoch": 0.36403300305899844, "grad_norm": 1.9947843551635742, "learning_rate": 3.3472738935214883e-06, "loss": 0.8742, "step": 29870 }, { "epoch": 0.36409393928314626, "grad_norm": 2.005129098892212, "learning_rate": 3.3469531751122517e-06, "loss": 0.827, "step": 29875 }, { "epoch": 0.3641548755072941, "grad_norm": 2.1850223541259766, "learning_rate": 3.346632456703015e-06, "loss": 0.8902, "step": 29880 }, { "epoch": 0.36421581173144185, "grad_norm": 1.6267670392990112, "learning_rate": 3.346311738293778e-06, "loss": 0.8784, "step": 29885 }, { "epoch": 0.3642767479555897, "grad_norm": 1.80473792552948, "learning_rate": 3.345991019884542e-06, "loss": 0.8502, "step": 29890 }, { "epoch": 0.3643376841797375, "grad_norm": 1.8459364175796509, "learning_rate": 3.345670301475305e-06, "loss": 0.8643, "step": 29895 }, { "epoch": 0.3643986204038853, "grad_norm": 1.9458726644515991, "learning_rate": 3.345349583066068e-06, "loss": 0.8745, "step": 29900 }, { "epoch": 0.3644595566280331, "grad_norm": 1.7753995656967163, "learning_rate": 3.345028864656831e-06, "loss": 0.8423, "step": 29905 }, { "epoch": 0.3645204928521809, "grad_norm": 1.7795650959014893, "learning_rate": 3.344708146247595e-06, "loss": 0.7529, "step": 29910 }, { "epoch": 0.36458142907632873, "grad_norm": 1.798048973083496, "learning_rate": 3.344387427838358e-06, "loss": 0.8601, "step": 29915 }, { "epoch": 0.3646423653004765, "grad_norm": 1.924535870552063, "learning_rate": 3.3440667094291214e-06, "loss": 0.9897, "step": 29920 }, { "epoch": 0.3647033015246243, "grad_norm": 2.0916411876678467, "learning_rate": 3.343745991019885e-06, "loss": 0.865, "step": 29925 }, { "epoch": 0.36476423774877215, "grad_norm": 2.00882887840271, "learning_rate": 3.343425272610648e-06, "loss": 0.9137, "step": 29930 }, { "epoch": 0.36482517397291997, "grad_norm": 2.288438320159912, "learning_rate": 3.3431045542014113e-06, "loss": 0.8937, "step": 29935 }, { "epoch": 0.36488611019706774, "grad_norm": 1.7315454483032227, "learning_rate": 3.3427838357921748e-06, "loss": 0.8578, "step": 29940 }, { "epoch": 0.36494704642121556, "grad_norm": 2.014173746109009, "learning_rate": 3.342463117382938e-06, "loss": 0.9038, "step": 29945 }, { "epoch": 0.3650079826453634, "grad_norm": 2.3408634662628174, "learning_rate": 3.3421423989737012e-06, "loss": 0.8686, "step": 29950 }, { "epoch": 0.36506891886951115, "grad_norm": 1.7174022197723389, "learning_rate": 3.341821680564465e-06, "loss": 0.9073, "step": 29955 }, { "epoch": 0.365129855093659, "grad_norm": 1.8850494623184204, "learning_rate": 3.341500962155228e-06, "loss": 0.8597, "step": 29960 }, { "epoch": 0.3651907913178068, "grad_norm": 1.7925598621368408, "learning_rate": 3.341180243745991e-06, "loss": 0.8838, "step": 29965 }, { "epoch": 0.36525172754195456, "grad_norm": 2.712948799133301, "learning_rate": 3.340859525336755e-06, "loss": 0.8571, "step": 29970 }, { "epoch": 0.3653126637661024, "grad_norm": 2.0229175090789795, "learning_rate": 3.340538806927518e-06, "loss": 0.8265, "step": 29975 }, { "epoch": 0.3653735999902502, "grad_norm": 1.9762921333312988, "learning_rate": 3.340218088518281e-06, "loss": 0.8829, "step": 29980 }, { "epoch": 0.36543453621439803, "grad_norm": 1.8286585807800293, "learning_rate": 3.339897370109045e-06, "loss": 0.841, "step": 29985 }, { "epoch": 0.3654954724385458, "grad_norm": 1.9058412313461304, "learning_rate": 3.339576651699808e-06, "loss": 0.8157, "step": 29990 }, { "epoch": 0.3655564086626936, "grad_norm": 1.9289764165878296, "learning_rate": 3.339255933290571e-06, "loss": 0.9312, "step": 29995 }, { "epoch": 0.36561734488684144, "grad_norm": 3.5843427181243896, "learning_rate": 3.3389352148813344e-06, "loss": 0.8637, "step": 30000 }, { "epoch": 0.3656782811109892, "grad_norm": 1.953119158744812, "learning_rate": 3.338614496472098e-06, "loss": 0.8686, "step": 30005 }, { "epoch": 0.36573921733513703, "grad_norm": 1.7429903745651245, "learning_rate": 3.3382937780628612e-06, "loss": 0.864, "step": 30010 }, { "epoch": 0.36580015355928486, "grad_norm": 1.8160570859909058, "learning_rate": 3.3379730596536243e-06, "loss": 0.9335, "step": 30015 }, { "epoch": 0.3658610897834327, "grad_norm": 2.561150074005127, "learning_rate": 3.3376523412443877e-06, "loss": 0.8513, "step": 30020 }, { "epoch": 0.36592202600758045, "grad_norm": 1.782701015472412, "learning_rate": 3.337331622835151e-06, "loss": 0.8996, "step": 30025 }, { "epoch": 0.36598296223172827, "grad_norm": 2.1741573810577393, "learning_rate": 3.337010904425914e-06, "loss": 0.8346, "step": 30030 }, { "epoch": 0.3660438984558761, "grad_norm": 1.9958535432815552, "learning_rate": 3.336690186016678e-06, "loss": 0.8283, "step": 30035 }, { "epoch": 0.36610483468002386, "grad_norm": 1.5192075967788696, "learning_rate": 3.336369467607441e-06, "loss": 0.8097, "step": 30040 }, { "epoch": 0.3661657709041717, "grad_norm": 1.9548189640045166, "learning_rate": 3.336048749198204e-06, "loss": 0.7405, "step": 30045 }, { "epoch": 0.3662267071283195, "grad_norm": 2.1870315074920654, "learning_rate": 3.335728030788968e-06, "loss": 0.8593, "step": 30050 }, { "epoch": 0.36628764335246733, "grad_norm": 2.565646171569824, "learning_rate": 3.335407312379731e-06, "loss": 0.9037, "step": 30055 }, { "epoch": 0.3663485795766151, "grad_norm": 1.9570358991622925, "learning_rate": 3.335086593970494e-06, "loss": 0.8574, "step": 30060 }, { "epoch": 0.3664095158007629, "grad_norm": 1.691054105758667, "learning_rate": 3.334765875561258e-06, "loss": 0.8663, "step": 30065 }, { "epoch": 0.36647045202491074, "grad_norm": 1.7828329801559448, "learning_rate": 3.334445157152021e-06, "loss": 0.8334, "step": 30070 }, { "epoch": 0.3665313882490585, "grad_norm": 1.651134967803955, "learning_rate": 3.334124438742784e-06, "loss": 0.813, "step": 30075 }, { "epoch": 0.36659232447320633, "grad_norm": 1.755629301071167, "learning_rate": 3.3338037203335473e-06, "loss": 0.778, "step": 30080 }, { "epoch": 0.36665326069735416, "grad_norm": 1.830381989479065, "learning_rate": 3.3334830019243107e-06, "loss": 0.8341, "step": 30085 }, { "epoch": 0.366714196921502, "grad_norm": 2.4219443798065186, "learning_rate": 3.333162283515074e-06, "loss": 0.8307, "step": 30090 }, { "epoch": 0.36677513314564975, "grad_norm": 1.7306578159332275, "learning_rate": 3.332841565105837e-06, "loss": 0.8639, "step": 30095 }, { "epoch": 0.36683606936979757, "grad_norm": 1.669824242591858, "learning_rate": 3.3325208466966006e-06, "loss": 0.8555, "step": 30100 }, { "epoch": 0.3668970055939454, "grad_norm": 2.016637086868286, "learning_rate": 3.332200128287364e-06, "loss": 0.8991, "step": 30105 }, { "epoch": 0.36695794181809316, "grad_norm": 1.6479215621948242, "learning_rate": 3.331879409878127e-06, "loss": 0.8349, "step": 30110 }, { "epoch": 0.367018878042241, "grad_norm": 1.812656283378601, "learning_rate": 3.331558691468891e-06, "loss": 0.9047, "step": 30115 }, { "epoch": 0.3670798142663888, "grad_norm": 1.9217208623886108, "learning_rate": 3.331237973059654e-06, "loss": 0.8122, "step": 30120 }, { "epoch": 0.3671407504905366, "grad_norm": 1.8352503776550293, "learning_rate": 3.330917254650417e-06, "loss": 0.8699, "step": 30125 }, { "epoch": 0.3672016867146844, "grad_norm": 2.064079999923706, "learning_rate": 3.330596536241181e-06, "loss": 0.8973, "step": 30130 }, { "epoch": 0.3672626229388322, "grad_norm": 2.6182913780212402, "learning_rate": 3.330275817831944e-06, "loss": 0.8269, "step": 30135 }, { "epoch": 0.36732355916298004, "grad_norm": 1.535702109336853, "learning_rate": 3.329955099422707e-06, "loss": 0.8637, "step": 30140 }, { "epoch": 0.3673844953871278, "grad_norm": 1.7922402620315552, "learning_rate": 3.3296343810134707e-06, "loss": 0.8405, "step": 30145 }, { "epoch": 0.36744543161127563, "grad_norm": 1.952046275138855, "learning_rate": 3.3293136626042338e-06, "loss": 0.9175, "step": 30150 }, { "epoch": 0.36750636783542345, "grad_norm": 1.7455792427062988, "learning_rate": 3.3289929441949968e-06, "loss": 0.7357, "step": 30155 }, { "epoch": 0.3675673040595713, "grad_norm": 2.1646244525909424, "learning_rate": 3.32867222578576e-06, "loss": 0.887, "step": 30160 }, { "epoch": 0.36762824028371904, "grad_norm": 1.8328040838241577, "learning_rate": 3.3283515073765237e-06, "loss": 0.9014, "step": 30165 }, { "epoch": 0.36768917650786687, "grad_norm": 2.2206482887268066, "learning_rate": 3.328030788967287e-06, "loss": 0.8833, "step": 30170 }, { "epoch": 0.3677501127320147, "grad_norm": 2.0248374938964844, "learning_rate": 3.32771007055805e-06, "loss": 0.9212, "step": 30175 }, { "epoch": 0.36781104895616246, "grad_norm": 1.7534477710723877, "learning_rate": 3.3273893521488135e-06, "loss": 0.8223, "step": 30180 }, { "epoch": 0.3678719851803103, "grad_norm": 1.87571120262146, "learning_rate": 3.327068633739577e-06, "loss": 0.8106, "step": 30185 }, { "epoch": 0.3679329214044581, "grad_norm": 1.8946725130081177, "learning_rate": 3.32674791533034e-06, "loss": 0.8621, "step": 30190 }, { "epoch": 0.3679938576286059, "grad_norm": 2.212285280227661, "learning_rate": 3.326427196921104e-06, "loss": 0.8455, "step": 30195 }, { "epoch": 0.3680547938527537, "grad_norm": 1.7103822231292725, "learning_rate": 3.326106478511867e-06, "loss": 0.754, "step": 30200 }, { "epoch": 0.3681157300769015, "grad_norm": 2.117598056793213, "learning_rate": 3.32578576010263e-06, "loss": 0.7893, "step": 30205 }, { "epoch": 0.36817666630104934, "grad_norm": 1.965812087059021, "learning_rate": 3.3254650416933938e-06, "loss": 0.8612, "step": 30210 }, { "epoch": 0.3682376025251971, "grad_norm": 2.077660083770752, "learning_rate": 3.3251443232841568e-06, "loss": 0.7741, "step": 30215 }, { "epoch": 0.36829853874934493, "grad_norm": 1.8834508657455444, "learning_rate": 3.32482360487492e-06, "loss": 0.8793, "step": 30220 }, { "epoch": 0.36835947497349275, "grad_norm": 2.3719887733459473, "learning_rate": 3.3245028864656837e-06, "loss": 0.8587, "step": 30225 }, { "epoch": 0.3684204111976406, "grad_norm": 1.7000620365142822, "learning_rate": 3.3241821680564467e-06, "loss": 0.9111, "step": 30230 }, { "epoch": 0.36848134742178834, "grad_norm": 2.5185599327087402, "learning_rate": 3.32386144964721e-06, "loss": 0.92, "step": 30235 }, { "epoch": 0.36854228364593616, "grad_norm": 1.8232313394546509, "learning_rate": 3.3235407312379736e-06, "loss": 0.9102, "step": 30240 }, { "epoch": 0.368603219870084, "grad_norm": 1.854621171951294, "learning_rate": 3.3232200128287366e-06, "loss": 0.8356, "step": 30245 }, { "epoch": 0.36866415609423175, "grad_norm": 2.1208012104034424, "learning_rate": 3.3228992944195e-06, "loss": 0.8931, "step": 30250 }, { "epoch": 0.3687250923183796, "grad_norm": 2.02376651763916, "learning_rate": 3.322578576010263e-06, "loss": 0.8593, "step": 30255 }, { "epoch": 0.3687860285425274, "grad_norm": 1.68917715549469, "learning_rate": 3.322257857601027e-06, "loss": 0.8434, "step": 30260 }, { "epoch": 0.3688469647666752, "grad_norm": 2.0794644355773926, "learning_rate": 3.32193713919179e-06, "loss": 0.9291, "step": 30265 }, { "epoch": 0.368907900990823, "grad_norm": 1.6644037961959839, "learning_rate": 3.321616420782553e-06, "loss": 0.9547, "step": 30270 }, { "epoch": 0.3689688372149708, "grad_norm": 2.0073928833007812, "learning_rate": 3.321295702373317e-06, "loss": 0.817, "step": 30275 }, { "epoch": 0.36902977343911864, "grad_norm": 1.8379775285720825, "learning_rate": 3.32097498396408e-06, "loss": 0.8081, "step": 30280 }, { "epoch": 0.3690907096632664, "grad_norm": 2.2067151069641113, "learning_rate": 3.320654265554843e-06, "loss": 0.8587, "step": 30285 }, { "epoch": 0.3691516458874142, "grad_norm": 2.0152077674865723, "learning_rate": 3.3203335471456067e-06, "loss": 0.8135, "step": 30290 }, { "epoch": 0.36921258211156205, "grad_norm": 1.7530795335769653, "learning_rate": 3.3200128287363697e-06, "loss": 0.8346, "step": 30295 }, { "epoch": 0.36927351833570987, "grad_norm": 1.9888622760772705, "learning_rate": 3.3196921103271327e-06, "loss": 0.7917, "step": 30300 }, { "epoch": 0.36933445455985764, "grad_norm": 2.3392281532287598, "learning_rate": 3.3193713919178966e-06, "loss": 0.8547, "step": 30305 }, { "epoch": 0.36939539078400546, "grad_norm": 1.986987590789795, "learning_rate": 3.3190506735086596e-06, "loss": 0.8334, "step": 30310 }, { "epoch": 0.3694563270081533, "grad_norm": 1.9379281997680664, "learning_rate": 3.318729955099423e-06, "loss": 0.804, "step": 30315 }, { "epoch": 0.36951726323230105, "grad_norm": 2.019953489303589, "learning_rate": 3.3184092366901865e-06, "loss": 0.8576, "step": 30320 }, { "epoch": 0.3695781994564489, "grad_norm": 1.9891575574874878, "learning_rate": 3.3180885182809495e-06, "loss": 0.8914, "step": 30325 }, { "epoch": 0.3696391356805967, "grad_norm": 1.6897754669189453, "learning_rate": 3.317767799871713e-06, "loss": 0.8641, "step": 30330 }, { "epoch": 0.3697000719047445, "grad_norm": 2.677774429321289, "learning_rate": 3.317447081462476e-06, "loss": 0.7684, "step": 30335 }, { "epoch": 0.3697610081288923, "grad_norm": 2.0755813121795654, "learning_rate": 3.31712636305324e-06, "loss": 0.8793, "step": 30340 }, { "epoch": 0.3698219443530401, "grad_norm": 2.2200725078582764, "learning_rate": 3.316805644644003e-06, "loss": 0.8148, "step": 30345 }, { "epoch": 0.36988288057718793, "grad_norm": 2.311957359313965, "learning_rate": 3.316484926234766e-06, "loss": 0.8533, "step": 30350 }, { "epoch": 0.3699438168013357, "grad_norm": 2.0081748962402344, "learning_rate": 3.3161642078255297e-06, "loss": 0.9063, "step": 30355 }, { "epoch": 0.3700047530254835, "grad_norm": 2.147353172302246, "learning_rate": 3.3158434894162927e-06, "loss": 0.8503, "step": 30360 }, { "epoch": 0.37006568924963135, "grad_norm": 1.5183367729187012, "learning_rate": 3.3155227710070558e-06, "loss": 0.8167, "step": 30365 }, { "epoch": 0.37012662547377917, "grad_norm": 1.7421810626983643, "learning_rate": 3.3152020525978196e-06, "loss": 0.8408, "step": 30370 }, { "epoch": 0.37018756169792694, "grad_norm": 2.0614633560180664, "learning_rate": 3.3148813341885826e-06, "loss": 0.8895, "step": 30375 }, { "epoch": 0.37024849792207476, "grad_norm": 2.1319704055786133, "learning_rate": 3.3145606157793457e-06, "loss": 0.8567, "step": 30380 }, { "epoch": 0.3703094341462226, "grad_norm": 1.7795863151550293, "learning_rate": 3.3142398973701095e-06, "loss": 0.8443, "step": 30385 }, { "epoch": 0.37037037037037035, "grad_norm": 2.047158718109131, "learning_rate": 3.3139191789608725e-06, "loss": 0.8637, "step": 30390 }, { "epoch": 0.3704313065945182, "grad_norm": 1.8686550855636597, "learning_rate": 3.313598460551636e-06, "loss": 0.9788, "step": 30395 }, { "epoch": 0.370492242818666, "grad_norm": 2.3251585960388184, "learning_rate": 3.3132777421423994e-06, "loss": 0.848, "step": 30400 }, { "epoch": 0.3705531790428138, "grad_norm": 1.7781095504760742, "learning_rate": 3.3129570237331624e-06, "loss": 0.8711, "step": 30405 }, { "epoch": 0.3706141152669616, "grad_norm": 1.8034249544143677, "learning_rate": 3.312636305323926e-06, "loss": 0.8085, "step": 30410 }, { "epoch": 0.3706750514911094, "grad_norm": 1.8797246217727661, "learning_rate": 3.312315586914689e-06, "loss": 0.8845, "step": 30415 }, { "epoch": 0.37073598771525723, "grad_norm": 1.881727933883667, "learning_rate": 3.3119948685054528e-06, "loss": 0.9145, "step": 30420 }, { "epoch": 0.370796923939405, "grad_norm": 1.931968331336975, "learning_rate": 3.3116741500962158e-06, "loss": 0.9293, "step": 30425 }, { "epoch": 0.3708578601635528, "grad_norm": 2.124933958053589, "learning_rate": 3.3113534316869788e-06, "loss": 0.8179, "step": 30430 }, { "epoch": 0.37091879638770064, "grad_norm": 2.3657443523406982, "learning_rate": 3.3110327132777427e-06, "loss": 0.9314, "step": 30435 }, { "epoch": 0.3709797326118484, "grad_norm": 1.880527377128601, "learning_rate": 3.3107119948685057e-06, "loss": 0.8118, "step": 30440 }, { "epoch": 0.37104066883599623, "grad_norm": 1.970147967338562, "learning_rate": 3.3103912764592687e-06, "loss": 0.8589, "step": 30445 }, { "epoch": 0.37110160506014406, "grad_norm": 1.8585444688796997, "learning_rate": 3.3100705580500325e-06, "loss": 0.8096, "step": 30450 }, { "epoch": 0.3711625412842919, "grad_norm": 1.8105294704437256, "learning_rate": 3.3097498396407956e-06, "loss": 0.9233, "step": 30455 }, { "epoch": 0.37122347750843965, "grad_norm": 1.8829911947250366, "learning_rate": 3.309429121231559e-06, "loss": 0.8147, "step": 30460 }, { "epoch": 0.37128441373258747, "grad_norm": 1.9167070388793945, "learning_rate": 3.3091084028223224e-06, "loss": 0.8283, "step": 30465 }, { "epoch": 0.3713453499567353, "grad_norm": 1.9125419855117798, "learning_rate": 3.3087876844130855e-06, "loss": 0.8419, "step": 30470 }, { "epoch": 0.37140628618088306, "grad_norm": 1.6704763174057007, "learning_rate": 3.308466966003849e-06, "loss": 0.8587, "step": 30475 }, { "epoch": 0.3714672224050309, "grad_norm": 2.177640676498413, "learning_rate": 3.3081462475946123e-06, "loss": 0.8374, "step": 30480 }, { "epoch": 0.3715281586291787, "grad_norm": 1.8937433958053589, "learning_rate": 3.3078255291853758e-06, "loss": 0.7845, "step": 30485 }, { "epoch": 0.37158909485332653, "grad_norm": 1.981582760810852, "learning_rate": 3.307504810776139e-06, "loss": 0.8262, "step": 30490 }, { "epoch": 0.3716500310774743, "grad_norm": 2.0062689781188965, "learning_rate": 3.307184092366902e-06, "loss": 0.8966, "step": 30495 }, { "epoch": 0.3717109673016221, "grad_norm": 2.265364408493042, "learning_rate": 3.3068633739576657e-06, "loss": 0.9178, "step": 30500 }, { "epoch": 0.37177190352576994, "grad_norm": 1.9052164554595947, "learning_rate": 3.3065426555484287e-06, "loss": 0.8455, "step": 30505 }, { "epoch": 0.3718328397499177, "grad_norm": 2.018397092819214, "learning_rate": 3.3062219371391917e-06, "loss": 0.8474, "step": 30510 }, { "epoch": 0.37189377597406553, "grad_norm": 2.162984609603882, "learning_rate": 3.3059012187299556e-06, "loss": 0.859, "step": 30515 }, { "epoch": 0.37195471219821336, "grad_norm": 1.9884538650512695, "learning_rate": 3.3055805003207186e-06, "loss": 0.8678, "step": 30520 }, { "epoch": 0.3720156484223612, "grad_norm": 2.282442331314087, "learning_rate": 3.3052597819114816e-06, "loss": 0.8949, "step": 30525 }, { "epoch": 0.37207658464650895, "grad_norm": 1.9822107553482056, "learning_rate": 3.3049390635022455e-06, "loss": 0.8179, "step": 30530 }, { "epoch": 0.37213752087065677, "grad_norm": 1.9085302352905273, "learning_rate": 3.3046183450930085e-06, "loss": 0.8456, "step": 30535 }, { "epoch": 0.3721984570948046, "grad_norm": 1.9648891687393188, "learning_rate": 3.304297626683772e-06, "loss": 0.7974, "step": 30540 }, { "epoch": 0.37225939331895236, "grad_norm": 1.8422508239746094, "learning_rate": 3.3039769082745354e-06, "loss": 0.8305, "step": 30545 }, { "epoch": 0.3723203295431002, "grad_norm": 2.0017778873443604, "learning_rate": 3.3036561898652984e-06, "loss": 0.9444, "step": 30550 }, { "epoch": 0.372381265767248, "grad_norm": 1.8687071800231934, "learning_rate": 3.303335471456062e-06, "loss": 0.8045, "step": 30555 }, { "epoch": 0.3724422019913958, "grad_norm": 1.9642587900161743, "learning_rate": 3.3030147530468253e-06, "loss": 0.9225, "step": 30560 }, { "epoch": 0.3725031382155436, "grad_norm": 2.0340418815612793, "learning_rate": 3.3026940346375887e-06, "loss": 0.8964, "step": 30565 }, { "epoch": 0.3725640744396914, "grad_norm": 1.8217418193817139, "learning_rate": 3.3023733162283517e-06, "loss": 0.815, "step": 30570 }, { "epoch": 0.37262501066383924, "grad_norm": 1.8619740009307861, "learning_rate": 3.302052597819115e-06, "loss": 0.8368, "step": 30575 }, { "epoch": 0.372685946887987, "grad_norm": 2.086517095565796, "learning_rate": 3.3017318794098786e-06, "loss": 0.867, "step": 30580 }, { "epoch": 0.37274688311213483, "grad_norm": 1.9090182781219482, "learning_rate": 3.3014111610006416e-06, "loss": 0.7832, "step": 30585 }, { "epoch": 0.37280781933628265, "grad_norm": 2.232343912124634, "learning_rate": 3.3010904425914046e-06, "loss": 0.881, "step": 30590 }, { "epoch": 0.3728687555604305, "grad_norm": 1.7461997270584106, "learning_rate": 3.3007697241821685e-06, "loss": 0.8095, "step": 30595 }, { "epoch": 0.37292969178457824, "grad_norm": 1.647159218788147, "learning_rate": 3.3004490057729315e-06, "loss": 0.8717, "step": 30600 }, { "epoch": 0.37299062800872607, "grad_norm": 2.0008089542388916, "learning_rate": 3.3001282873636945e-06, "loss": 0.8813, "step": 30605 }, { "epoch": 0.3730515642328739, "grad_norm": 1.7504733800888062, "learning_rate": 3.2998075689544584e-06, "loss": 0.8353, "step": 30610 }, { "epoch": 0.37311250045702166, "grad_norm": 1.7410469055175781, "learning_rate": 3.2994868505452214e-06, "loss": 0.8795, "step": 30615 }, { "epoch": 0.3731734366811695, "grad_norm": 1.995167851448059, "learning_rate": 3.299166132135985e-06, "loss": 0.8123, "step": 30620 }, { "epoch": 0.3732343729053173, "grad_norm": 1.9488790035247803, "learning_rate": 3.2988454137267483e-06, "loss": 0.7958, "step": 30625 }, { "epoch": 0.3732953091294651, "grad_norm": 2.331977367401123, "learning_rate": 3.2985246953175113e-06, "loss": 0.829, "step": 30630 }, { "epoch": 0.3733562453536129, "grad_norm": 2.0807316303253174, "learning_rate": 3.2982039769082748e-06, "loss": 0.8096, "step": 30635 }, { "epoch": 0.3734171815777607, "grad_norm": 2.3608269691467285, "learning_rate": 3.297883258499038e-06, "loss": 0.8302, "step": 30640 }, { "epoch": 0.37347811780190854, "grad_norm": 1.899145245552063, "learning_rate": 3.2975625400898016e-06, "loss": 0.9035, "step": 30645 }, { "epoch": 0.3735390540260563, "grad_norm": 1.917783498764038, "learning_rate": 3.2972418216805647e-06, "loss": 0.9255, "step": 30650 }, { "epoch": 0.37359999025020413, "grad_norm": 1.9458023309707642, "learning_rate": 3.296921103271328e-06, "loss": 0.8704, "step": 30655 }, { "epoch": 0.37366092647435195, "grad_norm": 2.184293270111084, "learning_rate": 3.2966003848620915e-06, "loss": 0.8717, "step": 30660 }, { "epoch": 0.3737218626984998, "grad_norm": 2.073692560195923, "learning_rate": 3.2962796664528546e-06, "loss": 0.9052, "step": 30665 }, { "epoch": 0.37378279892264754, "grad_norm": 1.9673265218734741, "learning_rate": 3.2959589480436176e-06, "loss": 0.9046, "step": 30670 }, { "epoch": 0.37384373514679536, "grad_norm": 2.0094006061553955, "learning_rate": 3.2956382296343814e-06, "loss": 0.8551, "step": 30675 }, { "epoch": 0.3739046713709432, "grad_norm": 1.9288018941879272, "learning_rate": 3.2953175112251444e-06, "loss": 0.7925, "step": 30680 }, { "epoch": 0.37396560759509095, "grad_norm": 2.243482828140259, "learning_rate": 3.2949967928159075e-06, "loss": 0.8537, "step": 30685 }, { "epoch": 0.3740265438192388, "grad_norm": 1.4997460842132568, "learning_rate": 3.2946760744066713e-06, "loss": 0.8044, "step": 30690 }, { "epoch": 0.3740874800433866, "grad_norm": 1.9996405839920044, "learning_rate": 3.2943553559974343e-06, "loss": 0.8772, "step": 30695 }, { "epoch": 0.3741484162675344, "grad_norm": 1.935792326927185, "learning_rate": 3.2940346375881978e-06, "loss": 0.8604, "step": 30700 }, { "epoch": 0.3742093524916822, "grad_norm": 1.9208893775939941, "learning_rate": 3.2937139191789612e-06, "loss": 0.8751, "step": 30705 }, { "epoch": 0.37427028871583, "grad_norm": 2.2027058601379395, "learning_rate": 3.2933932007697247e-06, "loss": 0.8388, "step": 30710 }, { "epoch": 0.37433122493997784, "grad_norm": 1.8308690786361694, "learning_rate": 3.2930724823604877e-06, "loss": 0.8062, "step": 30715 }, { "epoch": 0.3743921611641256, "grad_norm": 1.903880000114441, "learning_rate": 3.292751763951251e-06, "loss": 0.838, "step": 30720 }, { "epoch": 0.3744530973882734, "grad_norm": 2.151392936706543, "learning_rate": 3.2924310455420146e-06, "loss": 0.8184, "step": 30725 }, { "epoch": 0.37451403361242125, "grad_norm": 1.6964654922485352, "learning_rate": 3.2921103271327776e-06, "loss": 0.8457, "step": 30730 }, { "epoch": 0.37457496983656907, "grad_norm": 1.8857266902923584, "learning_rate": 3.2917896087235414e-06, "loss": 0.8755, "step": 30735 }, { "epoch": 0.37463590606071684, "grad_norm": 1.9764995574951172, "learning_rate": 3.2914688903143045e-06, "loss": 0.9131, "step": 30740 }, { "epoch": 0.37469684228486466, "grad_norm": 2.023275136947632, "learning_rate": 3.2911481719050675e-06, "loss": 0.8303, "step": 30745 }, { "epoch": 0.3747577785090125, "grad_norm": 2.030133008956909, "learning_rate": 3.2908274534958305e-06, "loss": 0.8409, "step": 30750 }, { "epoch": 0.37481871473316025, "grad_norm": 1.7643922567367554, "learning_rate": 3.2905067350865944e-06, "loss": 0.8288, "step": 30755 }, { "epoch": 0.3748796509573081, "grad_norm": 1.9944000244140625, "learning_rate": 3.2901860166773574e-06, "loss": 0.7223, "step": 30760 }, { "epoch": 0.3749405871814559, "grad_norm": 1.7786139249801636, "learning_rate": 3.289865298268121e-06, "loss": 0.929, "step": 30765 }, { "epoch": 0.3750015234056037, "grad_norm": 2.3407020568847656, "learning_rate": 3.2895445798588843e-06, "loss": 0.8013, "step": 30770 }, { "epoch": 0.3750624596297515, "grad_norm": 2.052917718887329, "learning_rate": 3.2892238614496473e-06, "loss": 0.8442, "step": 30775 }, { "epoch": 0.3751233958538993, "grad_norm": 1.947980523109436, "learning_rate": 3.2889031430404107e-06, "loss": 0.8685, "step": 30780 }, { "epoch": 0.37518433207804713, "grad_norm": 2.002643346786499, "learning_rate": 3.288582424631174e-06, "loss": 0.8592, "step": 30785 }, { "epoch": 0.3752452683021949, "grad_norm": 2.2247467041015625, "learning_rate": 3.2882617062219376e-06, "loss": 0.8512, "step": 30790 }, { "epoch": 0.3753062045263427, "grad_norm": 1.806697130203247, "learning_rate": 3.2879409878127006e-06, "loss": 0.8945, "step": 30795 }, { "epoch": 0.37536714075049055, "grad_norm": 1.9675596952438354, "learning_rate": 3.287620269403464e-06, "loss": 0.8832, "step": 30800 }, { "epoch": 0.37542807697463837, "grad_norm": 1.8444410562515259, "learning_rate": 3.2872995509942275e-06, "loss": 0.8706, "step": 30805 }, { "epoch": 0.37548901319878614, "grad_norm": 2.372645378112793, "learning_rate": 3.2869788325849905e-06, "loss": 0.882, "step": 30810 }, { "epoch": 0.37554994942293396, "grad_norm": 1.902603030204773, "learning_rate": 3.2866581141757544e-06, "loss": 0.8291, "step": 30815 }, { "epoch": 0.3756108856470818, "grad_norm": 2.658810615539551, "learning_rate": 3.2863373957665174e-06, "loss": 0.7871, "step": 30820 }, { "epoch": 0.37567182187122955, "grad_norm": 1.8411000967025757, "learning_rate": 3.2860166773572804e-06, "loss": 0.8999, "step": 30825 }, { "epoch": 0.3757327580953774, "grad_norm": 1.9898262023925781, "learning_rate": 3.2856959589480443e-06, "loss": 0.8106, "step": 30830 }, { "epoch": 0.3757936943195252, "grad_norm": 1.8777180910110474, "learning_rate": 3.2853752405388073e-06, "loss": 0.8575, "step": 30835 }, { "epoch": 0.375854630543673, "grad_norm": 2.1759395599365234, "learning_rate": 3.2850545221295703e-06, "loss": 0.8363, "step": 30840 }, { "epoch": 0.3759155667678208, "grad_norm": 1.9530400037765503, "learning_rate": 3.2847338037203337e-06, "loss": 0.7467, "step": 30845 }, { "epoch": 0.3759765029919686, "grad_norm": 1.9157922267913818, "learning_rate": 3.284413085311097e-06, "loss": 0.9023, "step": 30850 }, { "epoch": 0.37603743921611643, "grad_norm": 2.3647193908691406, "learning_rate": 3.28409236690186e-06, "loss": 0.8455, "step": 30855 }, { "epoch": 0.3760983754402642, "grad_norm": 1.8800179958343506, "learning_rate": 3.2837716484926236e-06, "loss": 0.7819, "step": 30860 }, { "epoch": 0.376159311664412, "grad_norm": 2.1160011291503906, "learning_rate": 3.283450930083387e-06, "loss": 0.9319, "step": 30865 }, { "epoch": 0.37622024788855984, "grad_norm": 1.920762538909912, "learning_rate": 3.2831302116741505e-06, "loss": 0.84, "step": 30870 }, { "epoch": 0.37628118411270767, "grad_norm": 1.7937567234039307, "learning_rate": 3.2828094932649135e-06, "loss": 0.9229, "step": 30875 }, { "epoch": 0.37634212033685543, "grad_norm": 2.2194793224334717, "learning_rate": 3.282488774855677e-06, "loss": 0.8395, "step": 30880 }, { "epoch": 0.37640305656100326, "grad_norm": 1.7944296598434448, "learning_rate": 3.2821680564464404e-06, "loss": 0.8885, "step": 30885 }, { "epoch": 0.3764639927851511, "grad_norm": 1.6706502437591553, "learning_rate": 3.2818473380372034e-06, "loss": 0.8823, "step": 30890 }, { "epoch": 0.37652492900929885, "grad_norm": 2.012298107147217, "learning_rate": 3.2815266196279673e-06, "loss": 0.9123, "step": 30895 }, { "epoch": 0.37658586523344667, "grad_norm": 1.9802510738372803, "learning_rate": 3.2812059012187303e-06, "loss": 0.8585, "step": 30900 }, { "epoch": 0.3766468014575945, "grad_norm": 1.7496130466461182, "learning_rate": 3.2808851828094933e-06, "loss": 0.8484, "step": 30905 }, { "epoch": 0.3767077376817423, "grad_norm": 1.871804118156433, "learning_rate": 3.280564464400257e-06, "loss": 0.8531, "step": 30910 }, { "epoch": 0.3767686739058901, "grad_norm": 1.9037113189697266, "learning_rate": 3.2802437459910202e-06, "loss": 0.8659, "step": 30915 }, { "epoch": 0.3768296101300379, "grad_norm": 1.9884893894195557, "learning_rate": 3.2799230275817832e-06, "loss": 0.8841, "step": 30920 }, { "epoch": 0.37689054635418573, "grad_norm": 1.8481533527374268, "learning_rate": 3.2796023091725467e-06, "loss": 0.9063, "step": 30925 }, { "epoch": 0.3769514825783335, "grad_norm": 2.0266590118408203, "learning_rate": 3.27928159076331e-06, "loss": 0.8243, "step": 30930 }, { "epoch": 0.3770124188024813, "grad_norm": 1.5597586631774902, "learning_rate": 3.2789608723540736e-06, "loss": 0.9498, "step": 30935 }, { "epoch": 0.37707335502662914, "grad_norm": 1.9275047779083252, "learning_rate": 3.2786401539448366e-06, "loss": 0.8277, "step": 30940 }, { "epoch": 0.3771342912507769, "grad_norm": 1.6288758516311646, "learning_rate": 3.2783194355356e-06, "loss": 0.8534, "step": 30945 }, { "epoch": 0.37719522747492473, "grad_norm": 2.1875803470611572, "learning_rate": 3.2779987171263634e-06, "loss": 0.8325, "step": 30950 }, { "epoch": 0.37725616369907256, "grad_norm": 2.002713203430176, "learning_rate": 3.2776779987171265e-06, "loss": 0.8728, "step": 30955 }, { "epoch": 0.3773170999232204, "grad_norm": 2.495840072631836, "learning_rate": 3.2773572803078903e-06, "loss": 0.8851, "step": 30960 }, { "epoch": 0.37737803614736815, "grad_norm": 1.8968260288238525, "learning_rate": 3.2770365618986533e-06, "loss": 0.9225, "step": 30965 }, { "epoch": 0.37743897237151597, "grad_norm": 1.850957989692688, "learning_rate": 3.2767158434894164e-06, "loss": 0.8408, "step": 30970 }, { "epoch": 0.3774999085956638, "grad_norm": 2.082021474838257, "learning_rate": 3.2763951250801802e-06, "loss": 0.8826, "step": 30975 }, { "epoch": 0.37756084481981156, "grad_norm": 1.813712477684021, "learning_rate": 3.2760744066709432e-06, "loss": 0.8266, "step": 30980 }, { "epoch": 0.3776217810439594, "grad_norm": 1.7474430799484253, "learning_rate": 3.2757536882617063e-06, "loss": 0.8418, "step": 30985 }, { "epoch": 0.3776827172681072, "grad_norm": 2.669265031814575, "learning_rate": 3.27543296985247e-06, "loss": 0.9154, "step": 30990 }, { "epoch": 0.377743653492255, "grad_norm": 1.956889033317566, "learning_rate": 3.275112251443233e-06, "loss": 0.8002, "step": 30995 }, { "epoch": 0.3778045897164028, "grad_norm": 1.996707558631897, "learning_rate": 3.274791533033996e-06, "loss": 0.902, "step": 31000 }, { "epoch": 0.3778655259405506, "grad_norm": 2.0778005123138428, "learning_rate": 3.2744708146247596e-06, "loss": 0.8583, "step": 31005 }, { "epoch": 0.37792646216469844, "grad_norm": 1.8312932252883911, "learning_rate": 3.274150096215523e-06, "loss": 0.8926, "step": 31010 }, { "epoch": 0.3779873983888462, "grad_norm": 1.8957700729370117, "learning_rate": 3.2738293778062865e-06, "loss": 0.8255, "step": 31015 }, { "epoch": 0.37804833461299403, "grad_norm": 2.0878143310546875, "learning_rate": 3.2735086593970495e-06, "loss": 0.8107, "step": 31020 }, { "epoch": 0.37810927083714185, "grad_norm": 2.0989127159118652, "learning_rate": 3.273187940987813e-06, "loss": 0.908, "step": 31025 }, { "epoch": 0.3781702070612897, "grad_norm": 1.7451142072677612, "learning_rate": 3.2728672225785764e-06, "loss": 0.8154, "step": 31030 }, { "epoch": 0.37823114328543744, "grad_norm": 1.8948627710342407, "learning_rate": 3.2725465041693394e-06, "loss": 0.8334, "step": 31035 }, { "epoch": 0.37829207950958527, "grad_norm": 2.024543523788452, "learning_rate": 3.2722257857601033e-06, "loss": 0.9119, "step": 31040 }, { "epoch": 0.3783530157337331, "grad_norm": 2.06382155418396, "learning_rate": 3.2719050673508663e-06, "loss": 0.8766, "step": 31045 }, { "epoch": 0.37841395195788086, "grad_norm": 1.5637149810791016, "learning_rate": 3.2715843489416293e-06, "loss": 0.8555, "step": 31050 }, { "epoch": 0.3784748881820287, "grad_norm": 2.1026108264923096, "learning_rate": 3.271263630532393e-06, "loss": 0.8468, "step": 31055 }, { "epoch": 0.3785358244061765, "grad_norm": 1.7135670185089111, "learning_rate": 3.270942912123156e-06, "loss": 0.8376, "step": 31060 }, { "epoch": 0.3785967606303243, "grad_norm": 2.168954372406006, "learning_rate": 3.270622193713919e-06, "loss": 0.8984, "step": 31065 }, { "epoch": 0.3786576968544721, "grad_norm": 1.915967583656311, "learning_rate": 3.270301475304683e-06, "loss": 0.8947, "step": 31070 }, { "epoch": 0.3787186330786199, "grad_norm": 1.9000848531723022, "learning_rate": 3.269980756895446e-06, "loss": 0.8604, "step": 31075 }, { "epoch": 0.37877956930276774, "grad_norm": 1.981343150138855, "learning_rate": 3.269660038486209e-06, "loss": 0.8602, "step": 31080 }, { "epoch": 0.3788405055269155, "grad_norm": 1.9416165351867676, "learning_rate": 3.2693393200769725e-06, "loss": 0.774, "step": 31085 }, { "epoch": 0.37890144175106333, "grad_norm": 2.0065250396728516, "learning_rate": 3.269018601667736e-06, "loss": 0.941, "step": 31090 }, { "epoch": 0.37896237797521115, "grad_norm": 1.6745424270629883, "learning_rate": 3.2686978832584994e-06, "loss": 0.8297, "step": 31095 }, { "epoch": 0.379023314199359, "grad_norm": 2.277625322341919, "learning_rate": 3.2683771648492624e-06, "loss": 0.9001, "step": 31100 }, { "epoch": 0.37908425042350674, "grad_norm": 1.6700196266174316, "learning_rate": 3.268056446440026e-06, "loss": 0.9277, "step": 31105 }, { "epoch": 0.37914518664765456, "grad_norm": 2.176726818084717, "learning_rate": 3.2677357280307893e-06, "loss": 0.8471, "step": 31110 }, { "epoch": 0.3792061228718024, "grad_norm": 1.8332300186157227, "learning_rate": 3.2674150096215523e-06, "loss": 0.8558, "step": 31115 }, { "epoch": 0.37926705909595015, "grad_norm": 1.9477955102920532, "learning_rate": 3.267094291212316e-06, "loss": 0.9084, "step": 31120 }, { "epoch": 0.379327995320098, "grad_norm": 1.8703393936157227, "learning_rate": 3.266773572803079e-06, "loss": 0.8583, "step": 31125 }, { "epoch": 0.3793889315442458, "grad_norm": 1.9844611883163452, "learning_rate": 3.2664528543938422e-06, "loss": 0.8937, "step": 31130 }, { "epoch": 0.3794498677683936, "grad_norm": 1.8209649324417114, "learning_rate": 3.266132135984606e-06, "loss": 0.8328, "step": 31135 }, { "epoch": 0.3795108039925414, "grad_norm": 2.27891206741333, "learning_rate": 3.265811417575369e-06, "loss": 0.8436, "step": 31140 }, { "epoch": 0.3795717402166892, "grad_norm": 2.041435480117798, "learning_rate": 3.265490699166132e-06, "loss": 0.9323, "step": 31145 }, { "epoch": 0.37963267644083704, "grad_norm": 2.55180025100708, "learning_rate": 3.265169980756896e-06, "loss": 0.8206, "step": 31150 }, { "epoch": 0.3796936126649848, "grad_norm": 2.156956911087036, "learning_rate": 3.264849262347659e-06, "loss": 0.8966, "step": 31155 }, { "epoch": 0.3797545488891326, "grad_norm": 2.1050045490264893, "learning_rate": 3.2645285439384224e-06, "loss": 0.8742, "step": 31160 }, { "epoch": 0.37981548511328045, "grad_norm": 1.919182538986206, "learning_rate": 3.264207825529186e-06, "loss": 0.8534, "step": 31165 }, { "epoch": 0.37987642133742827, "grad_norm": 1.7536628246307373, "learning_rate": 3.263887107119949e-06, "loss": 0.8948, "step": 31170 }, { "epoch": 0.37993735756157604, "grad_norm": 1.588924765586853, "learning_rate": 3.2635663887107123e-06, "loss": 0.8386, "step": 31175 }, { "epoch": 0.37999829378572386, "grad_norm": 1.8063268661499023, "learning_rate": 3.2632456703014753e-06, "loss": 0.8558, "step": 31180 }, { "epoch": 0.3800592300098717, "grad_norm": 2.1787526607513428, "learning_rate": 3.2629249518922392e-06, "loss": 0.8586, "step": 31185 }, { "epoch": 0.38012016623401945, "grad_norm": 1.8034805059432983, "learning_rate": 3.2626042334830022e-06, "loss": 0.7866, "step": 31190 }, { "epoch": 0.3801811024581673, "grad_norm": 2.077585458755493, "learning_rate": 3.2622835150737652e-06, "loss": 0.8358, "step": 31195 }, { "epoch": 0.3802420386823151, "grad_norm": 1.6867552995681763, "learning_rate": 3.261962796664529e-06, "loss": 0.7886, "step": 31200 }, { "epoch": 0.3803029749064629, "grad_norm": 1.7845628261566162, "learning_rate": 3.261642078255292e-06, "loss": 0.8794, "step": 31205 }, { "epoch": 0.3803639111306107, "grad_norm": 2.0710196495056152, "learning_rate": 3.261321359846055e-06, "loss": 0.9758, "step": 31210 }, { "epoch": 0.3804248473547585, "grad_norm": 1.7412959337234497, "learning_rate": 3.261000641436819e-06, "loss": 0.7746, "step": 31215 }, { "epoch": 0.38048578357890633, "grad_norm": 1.588821530342102, "learning_rate": 3.260679923027582e-06, "loss": 0.8301, "step": 31220 }, { "epoch": 0.3805467198030541, "grad_norm": 2.5047004222869873, "learning_rate": 3.260359204618345e-06, "loss": 0.8334, "step": 31225 }, { "epoch": 0.3806076560272019, "grad_norm": 1.8998520374298096, "learning_rate": 3.260038486209109e-06, "loss": 0.8349, "step": 31230 }, { "epoch": 0.38066859225134975, "grad_norm": 1.7910946607589722, "learning_rate": 3.259717767799872e-06, "loss": 0.8212, "step": 31235 }, { "epoch": 0.38072952847549757, "grad_norm": 1.8580539226531982, "learning_rate": 3.2593970493906354e-06, "loss": 0.8346, "step": 31240 }, { "epoch": 0.38079046469964534, "grad_norm": 2.1265149116516113, "learning_rate": 3.259076330981399e-06, "loss": 0.8433, "step": 31245 }, { "epoch": 0.38085140092379316, "grad_norm": 1.8163962364196777, "learning_rate": 3.258755612572162e-06, "loss": 0.8486, "step": 31250 }, { "epoch": 0.380912337147941, "grad_norm": 1.8925505876541138, "learning_rate": 3.2584348941629253e-06, "loss": 0.8653, "step": 31255 }, { "epoch": 0.38097327337208875, "grad_norm": 1.8577532768249512, "learning_rate": 3.2581141757536883e-06, "loss": 0.8983, "step": 31260 }, { "epoch": 0.3810342095962366, "grad_norm": 2.1239378452301025, "learning_rate": 3.257793457344452e-06, "loss": 0.8818, "step": 31265 }, { "epoch": 0.3810951458203844, "grad_norm": 1.8605085611343384, "learning_rate": 3.257472738935215e-06, "loss": 0.7479, "step": 31270 }, { "epoch": 0.3811560820445322, "grad_norm": 2.045180320739746, "learning_rate": 3.257152020525978e-06, "loss": 0.9124, "step": 31275 }, { "epoch": 0.38121701826868, "grad_norm": 2.1687588691711426, "learning_rate": 3.256831302116742e-06, "loss": 0.9012, "step": 31280 }, { "epoch": 0.3812779544928278, "grad_norm": 1.7726796865463257, "learning_rate": 3.256510583707505e-06, "loss": 0.8243, "step": 31285 }, { "epoch": 0.38133889071697563, "grad_norm": 2.113752841949463, "learning_rate": 3.256189865298268e-06, "loss": 0.8824, "step": 31290 }, { "epoch": 0.3813998269411234, "grad_norm": 1.826391577720642, "learning_rate": 3.255869146889032e-06, "loss": 0.8462, "step": 31295 }, { "epoch": 0.3814607631652712, "grad_norm": 1.6294547319412231, "learning_rate": 3.255548428479795e-06, "loss": 0.9107, "step": 31300 }, { "epoch": 0.38152169938941904, "grad_norm": 1.965551733970642, "learning_rate": 3.255227710070558e-06, "loss": 0.8623, "step": 31305 }, { "epoch": 0.38158263561356687, "grad_norm": 1.9416171312332153, "learning_rate": 3.254906991661322e-06, "loss": 0.888, "step": 31310 }, { "epoch": 0.38164357183771463, "grad_norm": 1.8621854782104492, "learning_rate": 3.254586273252085e-06, "loss": 0.8288, "step": 31315 }, { "epoch": 0.38170450806186246, "grad_norm": 2.1889402866363525, "learning_rate": 3.2542655548428483e-06, "loss": 0.922, "step": 31320 }, { "epoch": 0.3817654442860103, "grad_norm": 2.0957391262054443, "learning_rate": 3.2539448364336117e-06, "loss": 0.9648, "step": 31325 }, { "epoch": 0.38182638051015805, "grad_norm": 2.256082534790039, "learning_rate": 3.2536241180243747e-06, "loss": 0.9097, "step": 31330 }, { "epoch": 0.38188731673430587, "grad_norm": 2.3970041275024414, "learning_rate": 3.253303399615138e-06, "loss": 0.8317, "step": 31335 }, { "epoch": 0.3819482529584537, "grad_norm": 1.8695255517959595, "learning_rate": 3.252982681205901e-06, "loss": 0.8625, "step": 31340 }, { "epoch": 0.3820091891826015, "grad_norm": 1.8239772319793701, "learning_rate": 3.252661962796665e-06, "loss": 0.848, "step": 31345 }, { "epoch": 0.3820701254067493, "grad_norm": 1.8936564922332764, "learning_rate": 3.252341244387428e-06, "loss": 0.8133, "step": 31350 }, { "epoch": 0.3821310616308971, "grad_norm": 1.7500207424163818, "learning_rate": 3.252020525978191e-06, "loss": 0.8337, "step": 31355 }, { "epoch": 0.38219199785504493, "grad_norm": 1.836451530456543, "learning_rate": 3.251699807568955e-06, "loss": 0.8623, "step": 31360 }, { "epoch": 0.3822529340791927, "grad_norm": 1.829851746559143, "learning_rate": 3.251379089159718e-06, "loss": 0.8238, "step": 31365 }, { "epoch": 0.3823138703033405, "grad_norm": 2.0229380130767822, "learning_rate": 3.251058370750481e-06, "loss": 0.8428, "step": 31370 }, { "epoch": 0.38237480652748834, "grad_norm": 1.875130295753479, "learning_rate": 3.250737652341245e-06, "loss": 0.7987, "step": 31375 }, { "epoch": 0.38243574275163617, "grad_norm": 1.9991015195846558, "learning_rate": 3.250416933932008e-06, "loss": 0.8284, "step": 31380 }, { "epoch": 0.38249667897578393, "grad_norm": 1.9786962270736694, "learning_rate": 3.250096215522771e-06, "loss": 0.8612, "step": 31385 }, { "epoch": 0.38255761519993176, "grad_norm": 1.890556812286377, "learning_rate": 3.2497754971135348e-06, "loss": 0.8724, "step": 31390 }, { "epoch": 0.3826185514240796, "grad_norm": 2.0723414421081543, "learning_rate": 3.2494547787042978e-06, "loss": 0.8957, "step": 31395 }, { "epoch": 0.38267948764822735, "grad_norm": 1.7967339754104614, "learning_rate": 3.2491340602950612e-06, "loss": 0.8402, "step": 31400 }, { "epoch": 0.38274042387237517, "grad_norm": 2.3023557662963867, "learning_rate": 3.2488133418858247e-06, "loss": 0.8664, "step": 31405 }, { "epoch": 0.382801360096523, "grad_norm": 1.9130555391311646, "learning_rate": 3.248492623476588e-06, "loss": 0.898, "step": 31410 }, { "epoch": 0.38286229632067076, "grad_norm": 1.8144394159317017, "learning_rate": 3.248171905067351e-06, "loss": 0.826, "step": 31415 }, { "epoch": 0.3829232325448186, "grad_norm": 1.8299851417541504, "learning_rate": 3.2478511866581146e-06, "loss": 0.8056, "step": 31420 }, { "epoch": 0.3829841687689664, "grad_norm": 2.0805938243865967, "learning_rate": 3.247530468248878e-06, "loss": 0.8415, "step": 31425 }, { "epoch": 0.3830451049931142, "grad_norm": 1.9602357149124146, "learning_rate": 3.247209749839641e-06, "loss": 0.8355, "step": 31430 }, { "epoch": 0.383106041217262, "grad_norm": 2.15995454788208, "learning_rate": 3.246889031430404e-06, "loss": 0.8602, "step": 31435 }, { "epoch": 0.3831669774414098, "grad_norm": 2.0116569995880127, "learning_rate": 3.246568313021168e-06, "loss": 0.8553, "step": 31440 }, { "epoch": 0.38322791366555764, "grad_norm": 1.998343825340271, "learning_rate": 3.246247594611931e-06, "loss": 0.8915, "step": 31445 }, { "epoch": 0.3832888498897054, "grad_norm": 1.7697309255599976, "learning_rate": 3.245926876202694e-06, "loss": 0.8235, "step": 31450 }, { "epoch": 0.38334978611385323, "grad_norm": 2.0614185333251953, "learning_rate": 3.245606157793458e-06, "loss": 0.8158, "step": 31455 }, { "epoch": 0.38341072233800105, "grad_norm": 1.8286218643188477, "learning_rate": 3.245285439384221e-06, "loss": 0.8958, "step": 31460 }, { "epoch": 0.3834716585621489, "grad_norm": 1.7386208772659302, "learning_rate": 3.2449647209749842e-06, "loss": 0.8358, "step": 31465 }, { "epoch": 0.38353259478629664, "grad_norm": 1.7057609558105469, "learning_rate": 3.2446440025657477e-06, "loss": 0.8147, "step": 31470 }, { "epoch": 0.38359353101044447, "grad_norm": 2.1577680110931396, "learning_rate": 3.2443232841565107e-06, "loss": 0.8726, "step": 31475 }, { "epoch": 0.3836544672345923, "grad_norm": 2.043623208999634, "learning_rate": 3.244002565747274e-06, "loss": 0.8321, "step": 31480 }, { "epoch": 0.38371540345874006, "grad_norm": 2.215425968170166, "learning_rate": 3.2436818473380376e-06, "loss": 0.8811, "step": 31485 }, { "epoch": 0.3837763396828879, "grad_norm": 1.9933569431304932, "learning_rate": 3.243361128928801e-06, "loss": 0.8334, "step": 31490 }, { "epoch": 0.3838372759070357, "grad_norm": 2.0663092136383057, "learning_rate": 3.243040410519564e-06, "loss": 0.9893, "step": 31495 }, { "epoch": 0.3838982121311835, "grad_norm": 2.842860221862793, "learning_rate": 3.2427196921103275e-06, "loss": 0.8562, "step": 31500 }, { "epoch": 0.3839591483553313, "grad_norm": 1.7198046445846558, "learning_rate": 3.242398973701091e-06, "loss": 0.7606, "step": 31505 }, { "epoch": 0.3840200845794791, "grad_norm": 2.3806121349334717, "learning_rate": 3.242078255291854e-06, "loss": 0.8762, "step": 31510 }, { "epoch": 0.38408102080362694, "grad_norm": 1.8133163452148438, "learning_rate": 3.241757536882617e-06, "loss": 0.7825, "step": 31515 }, { "epoch": 0.3841419570277747, "grad_norm": 1.7330564260482788, "learning_rate": 3.241436818473381e-06, "loss": 0.9089, "step": 31520 }, { "epoch": 0.38420289325192253, "grad_norm": 2.1278340816497803, "learning_rate": 3.241116100064144e-06, "loss": 0.8172, "step": 31525 }, { "epoch": 0.38426382947607035, "grad_norm": 2.114997148513794, "learning_rate": 3.240795381654907e-06, "loss": 0.9503, "step": 31530 }, { "epoch": 0.3843247657002182, "grad_norm": 2.008145570755005, "learning_rate": 3.2404746632456707e-06, "loss": 0.8358, "step": 31535 }, { "epoch": 0.38438570192436594, "grad_norm": 1.9030184745788574, "learning_rate": 3.2401539448364337e-06, "loss": 0.8902, "step": 31540 }, { "epoch": 0.38444663814851376, "grad_norm": 1.8139530420303345, "learning_rate": 3.239833226427197e-06, "loss": 0.8641, "step": 31545 }, { "epoch": 0.3845075743726616, "grad_norm": 1.7977253198623657, "learning_rate": 3.2395125080179606e-06, "loss": 0.8817, "step": 31550 }, { "epoch": 0.38456851059680935, "grad_norm": 2.1306238174438477, "learning_rate": 3.2391917896087236e-06, "loss": 0.7881, "step": 31555 }, { "epoch": 0.3846294468209572, "grad_norm": 1.716298222541809, "learning_rate": 3.238871071199487e-06, "loss": 0.8679, "step": 31560 }, { "epoch": 0.384690383045105, "grad_norm": 1.8353701829910278, "learning_rate": 3.2385503527902505e-06, "loss": 0.8098, "step": 31565 }, { "epoch": 0.3847513192692528, "grad_norm": 2.017988443374634, "learning_rate": 3.238229634381014e-06, "loss": 0.8037, "step": 31570 }, { "epoch": 0.3848122554934006, "grad_norm": 2.223405122756958, "learning_rate": 3.237908915971777e-06, "loss": 0.8895, "step": 31575 }, { "epoch": 0.3848731917175484, "grad_norm": 1.8609358072280884, "learning_rate": 3.2375881975625404e-06, "loss": 0.8114, "step": 31580 }, { "epoch": 0.38493412794169624, "grad_norm": 1.8500697612762451, "learning_rate": 3.237267479153304e-06, "loss": 0.8546, "step": 31585 }, { "epoch": 0.384995064165844, "grad_norm": 2.106757164001465, "learning_rate": 3.236946760744067e-06, "loss": 0.8932, "step": 31590 }, { "epoch": 0.3850560003899918, "grad_norm": 1.9227551221847534, "learning_rate": 3.23662604233483e-06, "loss": 0.8158, "step": 31595 }, { "epoch": 0.38511693661413965, "grad_norm": 2.0974481105804443, "learning_rate": 3.2363053239255937e-06, "loss": 0.9429, "step": 31600 }, { "epoch": 0.38517787283828747, "grad_norm": 1.750252604484558, "learning_rate": 3.2359846055163568e-06, "loss": 0.9086, "step": 31605 }, { "epoch": 0.38523880906243524, "grad_norm": 2.078805446624756, "learning_rate": 3.2356638871071198e-06, "loss": 0.8424, "step": 31610 }, { "epoch": 0.38529974528658306, "grad_norm": 2.038137197494507, "learning_rate": 3.2353431686978836e-06, "loss": 0.8565, "step": 31615 }, { "epoch": 0.3853606815107309, "grad_norm": 2.009807825088501, "learning_rate": 3.2350224502886467e-06, "loss": 0.8548, "step": 31620 }, { "epoch": 0.38542161773487865, "grad_norm": 2.1920318603515625, "learning_rate": 3.23470173187941e-06, "loss": 0.8134, "step": 31625 }, { "epoch": 0.3854825539590265, "grad_norm": 1.9301121234893799, "learning_rate": 3.2343810134701735e-06, "loss": 0.9262, "step": 31630 }, { "epoch": 0.3855434901831743, "grad_norm": 1.6787711381912231, "learning_rate": 3.234060295060937e-06, "loss": 0.8927, "step": 31635 }, { "epoch": 0.3856044264073221, "grad_norm": 2.0502119064331055, "learning_rate": 3.2337395766517e-06, "loss": 0.8741, "step": 31640 }, { "epoch": 0.3856653626314699, "grad_norm": 2.0631282329559326, "learning_rate": 3.2334188582424634e-06, "loss": 0.8286, "step": 31645 }, { "epoch": 0.3857262988556177, "grad_norm": 1.8277748823165894, "learning_rate": 3.233098139833227e-06, "loss": 0.8618, "step": 31650 }, { "epoch": 0.38578723507976553, "grad_norm": 1.8253957033157349, "learning_rate": 3.23277742142399e-06, "loss": 0.8747, "step": 31655 }, { "epoch": 0.3858481713039133, "grad_norm": 1.6394517421722412, "learning_rate": 3.2324567030147538e-06, "loss": 1.0017, "step": 31660 }, { "epoch": 0.3859091075280611, "grad_norm": 2.0671215057373047, "learning_rate": 3.2321359846055168e-06, "loss": 0.826, "step": 31665 }, { "epoch": 0.38597004375220895, "grad_norm": 1.8113633394241333, "learning_rate": 3.23181526619628e-06, "loss": 0.8679, "step": 31670 }, { "epoch": 0.38603097997635677, "grad_norm": 1.9777708053588867, "learning_rate": 3.231494547787043e-06, "loss": 0.9101, "step": 31675 }, { "epoch": 0.38609191620050454, "grad_norm": 1.8553630113601685, "learning_rate": 3.2311738293778067e-06, "loss": 0.8399, "step": 31680 }, { "epoch": 0.38615285242465236, "grad_norm": 1.9208847284317017, "learning_rate": 3.2308531109685697e-06, "loss": 0.8905, "step": 31685 }, { "epoch": 0.3862137886488002, "grad_norm": 1.9336788654327393, "learning_rate": 3.230532392559333e-06, "loss": 0.8273, "step": 31690 }, { "epoch": 0.38627472487294795, "grad_norm": 1.9272211790084839, "learning_rate": 3.2302116741500966e-06, "loss": 0.8821, "step": 31695 }, { "epoch": 0.3863356610970958, "grad_norm": 2.078458070755005, "learning_rate": 3.2298909557408596e-06, "loss": 0.8826, "step": 31700 }, { "epoch": 0.3863965973212436, "grad_norm": 1.8723281621932983, "learning_rate": 3.229570237331623e-06, "loss": 0.8079, "step": 31705 }, { "epoch": 0.3864575335453914, "grad_norm": 2.0103602409362793, "learning_rate": 3.2292495189223865e-06, "loss": 0.8269, "step": 31710 }, { "epoch": 0.3865184697695392, "grad_norm": 1.9250783920288086, "learning_rate": 3.22892880051315e-06, "loss": 0.8621, "step": 31715 }, { "epoch": 0.386579405993687, "grad_norm": 1.7781145572662354, "learning_rate": 3.228608082103913e-06, "loss": 0.8632, "step": 31720 }, { "epoch": 0.38664034221783483, "grad_norm": 1.995879054069519, "learning_rate": 3.2282873636946764e-06, "loss": 0.7744, "step": 31725 }, { "epoch": 0.3867012784419826, "grad_norm": 1.9395945072174072, "learning_rate": 3.22796664528544e-06, "loss": 0.9604, "step": 31730 }, { "epoch": 0.3867622146661304, "grad_norm": 1.7769579887390137, "learning_rate": 3.227645926876203e-06, "loss": 0.8277, "step": 31735 }, { "epoch": 0.38682315089027824, "grad_norm": 1.657047152519226, "learning_rate": 3.2273252084669667e-06, "loss": 0.8346, "step": 31740 }, { "epoch": 0.38688408711442607, "grad_norm": 1.8268827199935913, "learning_rate": 3.2270044900577297e-06, "loss": 0.8744, "step": 31745 }, { "epoch": 0.38694502333857383, "grad_norm": 1.9530119895935059, "learning_rate": 3.2266837716484927e-06, "loss": 0.8503, "step": 31750 }, { "epoch": 0.38700595956272166, "grad_norm": 1.7484487295150757, "learning_rate": 3.2263630532392566e-06, "loss": 0.9032, "step": 31755 }, { "epoch": 0.3870668957868695, "grad_norm": 2.2184507846832275, "learning_rate": 3.2260423348300196e-06, "loss": 0.8712, "step": 31760 }, { "epoch": 0.38712783201101725, "grad_norm": 2.001107692718506, "learning_rate": 3.2257216164207826e-06, "loss": 0.8224, "step": 31765 }, { "epoch": 0.38718876823516507, "grad_norm": 1.9789029359817505, "learning_rate": 3.225400898011546e-06, "loss": 0.91, "step": 31770 }, { "epoch": 0.3872497044593129, "grad_norm": 1.8678643703460693, "learning_rate": 3.2250801796023095e-06, "loss": 0.8363, "step": 31775 }, { "epoch": 0.3873106406834607, "grad_norm": 2.088054895401001, "learning_rate": 3.2247594611930725e-06, "loss": 0.897, "step": 31780 }, { "epoch": 0.3873715769076085, "grad_norm": 1.6877567768096924, "learning_rate": 3.224438742783836e-06, "loss": 0.8508, "step": 31785 }, { "epoch": 0.3874325131317563, "grad_norm": 2.0193777084350586, "learning_rate": 3.2241180243745994e-06, "loss": 0.9028, "step": 31790 }, { "epoch": 0.38749344935590413, "grad_norm": 1.7336243391036987, "learning_rate": 3.223797305965363e-06, "loss": 0.8625, "step": 31795 }, { "epoch": 0.3875543855800519, "grad_norm": 1.9129387140274048, "learning_rate": 3.223476587556126e-06, "loss": 0.7857, "step": 31800 }, { "epoch": 0.3876153218041997, "grad_norm": 1.8364158868789673, "learning_rate": 3.2231558691468893e-06, "loss": 0.8728, "step": 31805 }, { "epoch": 0.38767625802834754, "grad_norm": 2.2026891708374023, "learning_rate": 3.2228351507376527e-06, "loss": 0.8072, "step": 31810 }, { "epoch": 0.38773719425249537, "grad_norm": 2.4752554893493652, "learning_rate": 3.2225144323284157e-06, "loss": 0.9012, "step": 31815 }, { "epoch": 0.38779813047664313, "grad_norm": 1.8581228256225586, "learning_rate": 3.2221937139191796e-06, "loss": 0.8617, "step": 31820 }, { "epoch": 0.38785906670079096, "grad_norm": 1.6731282472610474, "learning_rate": 3.2218729955099426e-06, "loss": 0.8588, "step": 31825 }, { "epoch": 0.3879200029249388, "grad_norm": 1.8615460395812988, "learning_rate": 3.2215522771007056e-06, "loss": 0.85, "step": 31830 }, { "epoch": 0.38798093914908655, "grad_norm": 1.9760175943374634, "learning_rate": 3.2212315586914695e-06, "loss": 0.9164, "step": 31835 }, { "epoch": 0.38804187537323437, "grad_norm": 1.7991375923156738, "learning_rate": 3.2209108402822325e-06, "loss": 0.895, "step": 31840 }, { "epoch": 0.3881028115973822, "grad_norm": 1.8253040313720703, "learning_rate": 3.2205901218729955e-06, "loss": 0.8756, "step": 31845 }, { "epoch": 0.38816374782153, "grad_norm": 2.2289044857025146, "learning_rate": 3.220269403463759e-06, "loss": 0.8165, "step": 31850 }, { "epoch": 0.3882246840456778, "grad_norm": 2.348956823348999, "learning_rate": 3.2199486850545224e-06, "loss": 0.9128, "step": 31855 }, { "epoch": 0.3882856202698256, "grad_norm": 2.1352899074554443, "learning_rate": 3.2196279666452854e-06, "loss": 0.8003, "step": 31860 }, { "epoch": 0.3883465564939734, "grad_norm": 2.037425994873047, "learning_rate": 3.219307248236049e-06, "loss": 0.8894, "step": 31865 }, { "epoch": 0.3884074927181212, "grad_norm": 1.7585316896438599, "learning_rate": 3.2189865298268123e-06, "loss": 0.8565, "step": 31870 }, { "epoch": 0.388468428942269, "grad_norm": 1.7330501079559326, "learning_rate": 3.2186658114175758e-06, "loss": 0.8511, "step": 31875 }, { "epoch": 0.38852936516641684, "grad_norm": 1.6692770719528198, "learning_rate": 3.2183450930083388e-06, "loss": 0.8952, "step": 31880 }, { "epoch": 0.3885903013905646, "grad_norm": 1.953798532485962, "learning_rate": 3.2180243745991026e-06, "loss": 0.8522, "step": 31885 }, { "epoch": 0.38865123761471243, "grad_norm": 1.874168872833252, "learning_rate": 3.2177036561898657e-06, "loss": 0.7796, "step": 31890 }, { "epoch": 0.38871217383886025, "grad_norm": 2.1374433040618896, "learning_rate": 3.2173829377806287e-06, "loss": 0.8443, "step": 31895 }, { "epoch": 0.3887731100630081, "grad_norm": 2.217763662338257, "learning_rate": 3.2170622193713925e-06, "loss": 0.7905, "step": 31900 }, { "epoch": 0.38883404628715584, "grad_norm": 2.2408645153045654, "learning_rate": 3.2167415009621556e-06, "loss": 0.8472, "step": 31905 }, { "epoch": 0.38889498251130367, "grad_norm": 2.837494134902954, "learning_rate": 3.2164207825529186e-06, "loss": 0.8469, "step": 31910 }, { "epoch": 0.3889559187354515, "grad_norm": 2.056445360183716, "learning_rate": 3.2161000641436824e-06, "loss": 0.8967, "step": 31915 }, { "epoch": 0.38901685495959926, "grad_norm": 1.923869252204895, "learning_rate": 3.2157793457344455e-06, "loss": 0.8378, "step": 31920 }, { "epoch": 0.3890777911837471, "grad_norm": 2.191725254058838, "learning_rate": 3.2154586273252085e-06, "loss": 0.9019, "step": 31925 }, { "epoch": 0.3891387274078949, "grad_norm": 1.6715089082717896, "learning_rate": 3.215137908915972e-06, "loss": 0.839, "step": 31930 }, { "epoch": 0.3891996636320427, "grad_norm": 2.0368356704711914, "learning_rate": 3.2148171905067354e-06, "loss": 0.882, "step": 31935 }, { "epoch": 0.3892605998561905, "grad_norm": 1.7770792245864868, "learning_rate": 3.214496472097499e-06, "loss": 0.8831, "step": 31940 }, { "epoch": 0.3893215360803383, "grad_norm": 2.1243436336517334, "learning_rate": 3.214175753688262e-06, "loss": 0.7914, "step": 31945 }, { "epoch": 0.38938247230448614, "grad_norm": 1.7547452449798584, "learning_rate": 3.2138550352790252e-06, "loss": 0.8156, "step": 31950 }, { "epoch": 0.3894434085286339, "grad_norm": 2.286548376083374, "learning_rate": 3.2135343168697887e-06, "loss": 0.78, "step": 31955 }, { "epoch": 0.38950434475278173, "grad_norm": 1.9630110263824463, "learning_rate": 3.2132135984605517e-06, "loss": 0.9232, "step": 31960 }, { "epoch": 0.38956528097692955, "grad_norm": 1.9283857345581055, "learning_rate": 3.2128928800513156e-06, "loss": 0.8679, "step": 31965 }, { "epoch": 0.3896262172010774, "grad_norm": 1.9639078378677368, "learning_rate": 3.2125721616420786e-06, "loss": 0.8475, "step": 31970 }, { "epoch": 0.38968715342522514, "grad_norm": 1.952209234237671, "learning_rate": 3.2122514432328416e-06, "loss": 0.8814, "step": 31975 }, { "epoch": 0.38974808964937296, "grad_norm": 1.9981666803359985, "learning_rate": 3.2119307248236055e-06, "loss": 0.8413, "step": 31980 }, { "epoch": 0.3898090258735208, "grad_norm": 2.102315664291382, "learning_rate": 3.2116100064143685e-06, "loss": 0.8532, "step": 31985 }, { "epoch": 0.38986996209766855, "grad_norm": 1.879172682762146, "learning_rate": 3.2112892880051315e-06, "loss": 0.8006, "step": 31990 }, { "epoch": 0.3899308983218164, "grad_norm": 1.8997774124145508, "learning_rate": 3.2109685695958954e-06, "loss": 0.8373, "step": 31995 }, { "epoch": 0.3899918345459642, "grad_norm": 2.0409109592437744, "learning_rate": 3.2106478511866584e-06, "loss": 0.8355, "step": 32000 }, { "epoch": 0.390052770770112, "grad_norm": 1.9836238622665405, "learning_rate": 3.2103271327774214e-06, "loss": 0.8228, "step": 32005 }, { "epoch": 0.3901137069942598, "grad_norm": 1.842825174331665, "learning_rate": 3.210006414368185e-06, "loss": 0.8487, "step": 32010 }, { "epoch": 0.3901746432184076, "grad_norm": 1.972307562828064, "learning_rate": 3.2096856959589483e-06, "loss": 0.9113, "step": 32015 }, { "epoch": 0.39023557944255544, "grad_norm": 1.9671120643615723, "learning_rate": 3.2093649775497117e-06, "loss": 0.8988, "step": 32020 }, { "epoch": 0.3902965156667032, "grad_norm": 2.5576984882354736, "learning_rate": 3.2090442591404747e-06, "loss": 0.8982, "step": 32025 }, { "epoch": 0.390357451890851, "grad_norm": 2.1069908142089844, "learning_rate": 3.208723540731238e-06, "loss": 0.8603, "step": 32030 }, { "epoch": 0.39041838811499885, "grad_norm": 1.6807364225387573, "learning_rate": 3.2084028223220016e-06, "loss": 0.8894, "step": 32035 }, { "epoch": 0.39047932433914667, "grad_norm": 1.7426795959472656, "learning_rate": 3.2080821039127646e-06, "loss": 0.8149, "step": 32040 }, { "epoch": 0.39054026056329444, "grad_norm": 2.2534191608428955, "learning_rate": 3.2077613855035285e-06, "loss": 0.8113, "step": 32045 }, { "epoch": 0.39060119678744226, "grad_norm": 1.8525428771972656, "learning_rate": 3.2074406670942915e-06, "loss": 0.9069, "step": 32050 }, { "epoch": 0.3906621330115901, "grad_norm": 1.723910927772522, "learning_rate": 3.2071199486850545e-06, "loss": 0.914, "step": 32055 }, { "epoch": 0.39072306923573785, "grad_norm": 1.746387243270874, "learning_rate": 3.2067992302758184e-06, "loss": 0.8443, "step": 32060 }, { "epoch": 0.3907840054598857, "grad_norm": 2.126589059829712, "learning_rate": 3.2064785118665814e-06, "loss": 0.8822, "step": 32065 }, { "epoch": 0.3908449416840335, "grad_norm": 2.095574140548706, "learning_rate": 3.2061577934573444e-06, "loss": 0.8707, "step": 32070 }, { "epoch": 0.3909058779081813, "grad_norm": 1.830994963645935, "learning_rate": 3.2058370750481083e-06, "loss": 0.9075, "step": 32075 }, { "epoch": 0.3909668141323291, "grad_norm": 1.8523651361465454, "learning_rate": 3.2055163566388713e-06, "loss": 0.7751, "step": 32080 }, { "epoch": 0.3910277503564769, "grad_norm": 1.7526624202728271, "learning_rate": 3.2051956382296343e-06, "loss": 0.8137, "step": 32085 }, { "epoch": 0.39108868658062473, "grad_norm": 2.0024003982543945, "learning_rate": 3.204874919820398e-06, "loss": 0.8547, "step": 32090 }, { "epoch": 0.3911496228047725, "grad_norm": 2.1583356857299805, "learning_rate": 3.204554201411161e-06, "loss": 0.8105, "step": 32095 }, { "epoch": 0.3912105590289203, "grad_norm": 2.238006830215454, "learning_rate": 3.2042334830019246e-06, "loss": 0.8819, "step": 32100 }, { "epoch": 0.39127149525306815, "grad_norm": 1.7401765584945679, "learning_rate": 3.2039127645926877e-06, "loss": 0.9219, "step": 32105 }, { "epoch": 0.39133243147721597, "grad_norm": 1.6253776550292969, "learning_rate": 3.2035920461834515e-06, "loss": 0.8318, "step": 32110 }, { "epoch": 0.39139336770136374, "grad_norm": 1.8310338258743286, "learning_rate": 3.2032713277742145e-06, "loss": 0.8623, "step": 32115 }, { "epoch": 0.39145430392551156, "grad_norm": 1.9613369703292847, "learning_rate": 3.2029506093649776e-06, "loss": 0.9229, "step": 32120 }, { "epoch": 0.3915152401496594, "grad_norm": 2.3568480014801025, "learning_rate": 3.2026298909557414e-06, "loss": 0.8436, "step": 32125 }, { "epoch": 0.39157617637380715, "grad_norm": 1.7501633167266846, "learning_rate": 3.2023091725465044e-06, "loss": 0.8188, "step": 32130 }, { "epoch": 0.391637112597955, "grad_norm": 1.888069748878479, "learning_rate": 3.2019884541372675e-06, "loss": 0.887, "step": 32135 }, { "epoch": 0.3916980488221028, "grad_norm": 1.7820497751235962, "learning_rate": 3.2016677357280313e-06, "loss": 0.9031, "step": 32140 }, { "epoch": 0.3917589850462506, "grad_norm": 2.103283643722534, "learning_rate": 3.2013470173187943e-06, "loss": 0.8601, "step": 32145 }, { "epoch": 0.3918199212703984, "grad_norm": 2.943735361099243, "learning_rate": 3.2010262989095574e-06, "loss": 0.8951, "step": 32150 }, { "epoch": 0.3918808574945462, "grad_norm": 2.0767805576324463, "learning_rate": 3.2007055805003212e-06, "loss": 0.8704, "step": 32155 }, { "epoch": 0.39194179371869403, "grad_norm": 2.007690668106079, "learning_rate": 3.2003848620910842e-06, "loss": 0.8275, "step": 32160 }, { "epoch": 0.3920027299428418, "grad_norm": 2.1131980419158936, "learning_rate": 3.2000641436818477e-06, "loss": 0.7872, "step": 32165 }, { "epoch": 0.3920636661669896, "grad_norm": 1.9689818620681763, "learning_rate": 3.199743425272611e-06, "loss": 0.8221, "step": 32170 }, { "epoch": 0.39212460239113744, "grad_norm": 2.329192876815796, "learning_rate": 3.199422706863374e-06, "loss": 0.807, "step": 32175 }, { "epoch": 0.39218553861528527, "grad_norm": 1.8231337070465088, "learning_rate": 3.1991019884541376e-06, "loss": 0.8138, "step": 32180 }, { "epoch": 0.39224647483943303, "grad_norm": 1.8927817344665527, "learning_rate": 3.1987812700449006e-06, "loss": 0.8307, "step": 32185 }, { "epoch": 0.39230741106358086, "grad_norm": 1.7845300436019897, "learning_rate": 3.1984605516356645e-06, "loss": 0.8375, "step": 32190 }, { "epoch": 0.3923683472877287, "grad_norm": 1.9770528078079224, "learning_rate": 3.1981398332264275e-06, "loss": 0.7899, "step": 32195 }, { "epoch": 0.39242928351187645, "grad_norm": 1.7915140390396118, "learning_rate": 3.1978191148171905e-06, "loss": 0.7423, "step": 32200 }, { "epoch": 0.39249021973602427, "grad_norm": 2.0436453819274902, "learning_rate": 3.1974983964079544e-06, "loss": 0.9155, "step": 32205 }, { "epoch": 0.3925511559601721, "grad_norm": 2.0182087421417236, "learning_rate": 3.1971776779987174e-06, "loss": 0.8377, "step": 32210 }, { "epoch": 0.3926120921843199, "grad_norm": 1.379451870918274, "learning_rate": 3.1968569595894804e-06, "loss": 0.8193, "step": 32215 }, { "epoch": 0.3926730284084677, "grad_norm": 1.784355878829956, "learning_rate": 3.1965362411802442e-06, "loss": 0.81, "step": 32220 }, { "epoch": 0.3927339646326155, "grad_norm": 2.4330527782440186, "learning_rate": 3.1962155227710073e-06, "loss": 0.9665, "step": 32225 }, { "epoch": 0.39279490085676333, "grad_norm": 1.9031492471694946, "learning_rate": 3.1958948043617703e-06, "loss": 0.8312, "step": 32230 }, { "epoch": 0.3928558370809111, "grad_norm": 1.9202654361724854, "learning_rate": 3.195574085952534e-06, "loss": 0.8416, "step": 32235 }, { "epoch": 0.3929167733050589, "grad_norm": 1.9858980178833008, "learning_rate": 3.195253367543297e-06, "loss": 0.8946, "step": 32240 }, { "epoch": 0.39297770952920674, "grad_norm": 2.0176236629486084, "learning_rate": 3.1949326491340606e-06, "loss": 0.8641, "step": 32245 }, { "epoch": 0.39303864575335457, "grad_norm": 2.6867122650146484, "learning_rate": 3.194611930724824e-06, "loss": 0.8614, "step": 32250 }, { "epoch": 0.39309958197750233, "grad_norm": 2.352623462677002, "learning_rate": 3.194291212315587e-06, "loss": 0.9182, "step": 32255 }, { "epoch": 0.39316051820165016, "grad_norm": 2.4745633602142334, "learning_rate": 3.1939704939063505e-06, "loss": 0.7961, "step": 32260 }, { "epoch": 0.393221454425798, "grad_norm": 2.0061445236206055, "learning_rate": 3.1936497754971135e-06, "loss": 0.8285, "step": 32265 }, { "epoch": 0.39328239064994575, "grad_norm": 1.8397111892700195, "learning_rate": 3.1933290570878774e-06, "loss": 0.8335, "step": 32270 }, { "epoch": 0.39334332687409357, "grad_norm": 1.9159618616104126, "learning_rate": 3.1930083386786404e-06, "loss": 0.8501, "step": 32275 }, { "epoch": 0.3934042630982414, "grad_norm": 1.9719059467315674, "learning_rate": 3.1926876202694034e-06, "loss": 0.8648, "step": 32280 }, { "epoch": 0.3934651993223892, "grad_norm": 1.6894093751907349, "learning_rate": 3.1923669018601673e-06, "loss": 0.8751, "step": 32285 }, { "epoch": 0.393526135546537, "grad_norm": 1.8429359197616577, "learning_rate": 3.1920461834509303e-06, "loss": 0.8005, "step": 32290 }, { "epoch": 0.3935870717706848, "grad_norm": 2.1542677879333496, "learning_rate": 3.1917254650416933e-06, "loss": 0.8911, "step": 32295 }, { "epoch": 0.3936480079948326, "grad_norm": 1.6331946849822998, "learning_rate": 3.191404746632457e-06, "loss": 0.8874, "step": 32300 }, { "epoch": 0.3937089442189804, "grad_norm": 1.8805009126663208, "learning_rate": 3.19108402822322e-06, "loss": 0.8108, "step": 32305 }, { "epoch": 0.3937698804431282, "grad_norm": 1.6836613416671753, "learning_rate": 3.190763309813983e-06, "loss": 0.8186, "step": 32310 }, { "epoch": 0.39383081666727604, "grad_norm": 1.9685732126235962, "learning_rate": 3.190442591404747e-06, "loss": 0.8082, "step": 32315 }, { "epoch": 0.39389175289142386, "grad_norm": 2.156960964202881, "learning_rate": 3.19012187299551e-06, "loss": 0.8986, "step": 32320 }, { "epoch": 0.39395268911557163, "grad_norm": 2.3642303943634033, "learning_rate": 3.1898011545862735e-06, "loss": 0.8475, "step": 32325 }, { "epoch": 0.39401362533971945, "grad_norm": 1.7550904750823975, "learning_rate": 3.189480436177037e-06, "loss": 0.8251, "step": 32330 }, { "epoch": 0.3940745615638673, "grad_norm": 2.0371816158294678, "learning_rate": 3.1891597177678004e-06, "loss": 0.8245, "step": 32335 }, { "epoch": 0.39413549778801504, "grad_norm": 2.210106372833252, "learning_rate": 3.1888389993585634e-06, "loss": 0.9513, "step": 32340 }, { "epoch": 0.39419643401216287, "grad_norm": 1.9675236940383911, "learning_rate": 3.188518280949327e-06, "loss": 0.8937, "step": 32345 }, { "epoch": 0.3942573702363107, "grad_norm": 1.8136619329452515, "learning_rate": 3.1881975625400903e-06, "loss": 0.813, "step": 32350 }, { "epoch": 0.39431830646045846, "grad_norm": 1.954408049583435, "learning_rate": 3.1878768441308533e-06, "loss": 0.8316, "step": 32355 }, { "epoch": 0.3943792426846063, "grad_norm": 1.726799726486206, "learning_rate": 3.1875561257216163e-06, "loss": 0.9347, "step": 32360 }, { "epoch": 0.3944401789087541, "grad_norm": 2.0007951259613037, "learning_rate": 3.18723540731238e-06, "loss": 0.8124, "step": 32365 }, { "epoch": 0.3945011151329019, "grad_norm": 1.6252641677856445, "learning_rate": 3.1869146889031432e-06, "loss": 0.8181, "step": 32370 }, { "epoch": 0.3945620513570497, "grad_norm": 1.9252527952194214, "learning_rate": 3.1865939704939062e-06, "loss": 0.8475, "step": 32375 }, { "epoch": 0.3946229875811975, "grad_norm": 1.919615387916565, "learning_rate": 3.18627325208467e-06, "loss": 0.8477, "step": 32380 }, { "epoch": 0.39468392380534534, "grad_norm": 1.88115656375885, "learning_rate": 3.185952533675433e-06, "loss": 0.8677, "step": 32385 }, { "epoch": 0.3947448600294931, "grad_norm": 2.1692283153533936, "learning_rate": 3.1856318152661966e-06, "loss": 0.8327, "step": 32390 }, { "epoch": 0.39480579625364093, "grad_norm": 2.0155887603759766, "learning_rate": 3.18531109685696e-06, "loss": 0.8643, "step": 32395 }, { "epoch": 0.39486673247778875, "grad_norm": 2.0790648460388184, "learning_rate": 3.184990378447723e-06, "loss": 0.8273, "step": 32400 }, { "epoch": 0.3949276687019366, "grad_norm": 2.1041083335876465, "learning_rate": 3.1846696600384865e-06, "loss": 0.8809, "step": 32405 }, { "epoch": 0.39498860492608434, "grad_norm": 1.7711777687072754, "learning_rate": 3.18434894162925e-06, "loss": 0.8329, "step": 32410 }, { "epoch": 0.39504954115023216, "grad_norm": 2.479137659072876, "learning_rate": 3.1840282232200133e-06, "loss": 0.8975, "step": 32415 }, { "epoch": 0.39511047737438, "grad_norm": 1.9448394775390625, "learning_rate": 3.1837075048107764e-06, "loss": 0.8317, "step": 32420 }, { "epoch": 0.39517141359852775, "grad_norm": 2.365497350692749, "learning_rate": 3.18338678640154e-06, "loss": 0.8822, "step": 32425 }, { "epoch": 0.3952323498226756, "grad_norm": 1.9751390218734741, "learning_rate": 3.1830660679923032e-06, "loss": 0.864, "step": 32430 }, { "epoch": 0.3952932860468234, "grad_norm": 2.0090084075927734, "learning_rate": 3.1827453495830663e-06, "loss": 0.8466, "step": 32435 }, { "epoch": 0.3953542222709712, "grad_norm": 1.882038950920105, "learning_rate": 3.1824246311738293e-06, "loss": 0.7947, "step": 32440 }, { "epoch": 0.395415158495119, "grad_norm": 1.9142463207244873, "learning_rate": 3.182103912764593e-06, "loss": 0.8295, "step": 32445 }, { "epoch": 0.3954760947192668, "grad_norm": 1.6882851123809814, "learning_rate": 3.181783194355356e-06, "loss": 0.8578, "step": 32450 }, { "epoch": 0.39553703094341464, "grad_norm": 2.1453516483306885, "learning_rate": 3.181462475946119e-06, "loss": 0.8636, "step": 32455 }, { "epoch": 0.3955979671675624, "grad_norm": 2.03861927986145, "learning_rate": 3.181141757536883e-06, "loss": 0.8729, "step": 32460 }, { "epoch": 0.3956589033917102, "grad_norm": 1.829268217086792, "learning_rate": 3.180821039127646e-06, "loss": 0.8323, "step": 32465 }, { "epoch": 0.39571983961585805, "grad_norm": 2.175480842590332, "learning_rate": 3.1805003207184095e-06, "loss": 0.8359, "step": 32470 }, { "epoch": 0.39578077584000587, "grad_norm": 2.314093828201294, "learning_rate": 3.180179602309173e-06, "loss": 0.8168, "step": 32475 }, { "epoch": 0.39584171206415364, "grad_norm": 2.010152578353882, "learning_rate": 3.179858883899936e-06, "loss": 0.8741, "step": 32480 }, { "epoch": 0.39590264828830146, "grad_norm": 1.9544960260391235, "learning_rate": 3.1795381654906994e-06, "loss": 0.8591, "step": 32485 }, { "epoch": 0.3959635845124493, "grad_norm": 1.8545517921447754, "learning_rate": 3.179217447081463e-06, "loss": 0.836, "step": 32490 }, { "epoch": 0.39602452073659705, "grad_norm": 2.0022480487823486, "learning_rate": 3.1788967286722263e-06, "loss": 0.8069, "step": 32495 }, { "epoch": 0.3960854569607449, "grad_norm": 2.0352938175201416, "learning_rate": 3.1785760102629893e-06, "loss": 0.8307, "step": 32500 }, { "epoch": 0.3961463931848927, "grad_norm": 2.224903106689453, "learning_rate": 3.1782552918537527e-06, "loss": 0.8465, "step": 32505 }, { "epoch": 0.3962073294090405, "grad_norm": 2.0025153160095215, "learning_rate": 3.177934573444516e-06, "loss": 0.7655, "step": 32510 }, { "epoch": 0.3962682656331883, "grad_norm": 1.887286901473999, "learning_rate": 3.177613855035279e-06, "loss": 0.7908, "step": 32515 }, { "epoch": 0.3963292018573361, "grad_norm": 2.143465280532837, "learning_rate": 3.177293136626042e-06, "loss": 0.8679, "step": 32520 }, { "epoch": 0.39639013808148393, "grad_norm": 1.8826210498809814, "learning_rate": 3.176972418216806e-06, "loss": 0.8799, "step": 32525 }, { "epoch": 0.3964510743056317, "grad_norm": 1.8319429159164429, "learning_rate": 3.176651699807569e-06, "loss": 0.7569, "step": 32530 }, { "epoch": 0.3965120105297795, "grad_norm": 2.083456516265869, "learning_rate": 3.176330981398332e-06, "loss": 0.8044, "step": 32535 }, { "epoch": 0.39657294675392735, "grad_norm": 1.7496129274368286, "learning_rate": 3.176010262989096e-06, "loss": 0.8614, "step": 32540 }, { "epoch": 0.39663388297807517, "grad_norm": 1.618876576423645, "learning_rate": 3.175689544579859e-06, "loss": 0.7952, "step": 32545 }, { "epoch": 0.39669481920222294, "grad_norm": 1.8407493829727173, "learning_rate": 3.1753688261706224e-06, "loss": 0.8167, "step": 32550 }, { "epoch": 0.39675575542637076, "grad_norm": 1.5817300081253052, "learning_rate": 3.175048107761386e-06, "loss": 0.847, "step": 32555 }, { "epoch": 0.3968166916505186, "grad_norm": 1.7647217512130737, "learning_rate": 3.174727389352149e-06, "loss": 0.8752, "step": 32560 }, { "epoch": 0.39687762787466635, "grad_norm": 1.7280545234680176, "learning_rate": 3.1744066709429123e-06, "loss": 0.9137, "step": 32565 }, { "epoch": 0.3969385640988142, "grad_norm": 2.232309341430664, "learning_rate": 3.1740859525336758e-06, "loss": 0.874, "step": 32570 }, { "epoch": 0.396999500322962, "grad_norm": 1.6673576831817627, "learning_rate": 3.173765234124439e-06, "loss": 0.8246, "step": 32575 }, { "epoch": 0.3970604365471098, "grad_norm": 2.0685245990753174, "learning_rate": 3.173444515715202e-06, "loss": 0.8643, "step": 32580 }, { "epoch": 0.3971213727712576, "grad_norm": 1.8976364135742188, "learning_rate": 3.173123797305966e-06, "loss": 0.8189, "step": 32585 }, { "epoch": 0.3971823089954054, "grad_norm": 2.021592855453491, "learning_rate": 3.172803078896729e-06, "loss": 0.8063, "step": 32590 }, { "epoch": 0.39724324521955323, "grad_norm": 2.094808340072632, "learning_rate": 3.172482360487492e-06, "loss": 0.86, "step": 32595 }, { "epoch": 0.397304181443701, "grad_norm": 2.103717088699341, "learning_rate": 3.172161642078255e-06, "loss": 0.8194, "step": 32600 }, { "epoch": 0.3973651176678488, "grad_norm": 1.7893353700637817, "learning_rate": 3.171840923669019e-06, "loss": 0.835, "step": 32605 }, { "epoch": 0.39742605389199664, "grad_norm": 1.8217347860336304, "learning_rate": 3.171520205259782e-06, "loss": 0.8544, "step": 32610 }, { "epoch": 0.39748699011614447, "grad_norm": 2.0094072818756104, "learning_rate": 3.1711994868505454e-06, "loss": 0.7631, "step": 32615 }, { "epoch": 0.39754792634029223, "grad_norm": 1.8341470956802368, "learning_rate": 3.170878768441309e-06, "loss": 0.8661, "step": 32620 }, { "epoch": 0.39760886256444006, "grad_norm": 2.2138805389404297, "learning_rate": 3.170558050032072e-06, "loss": 0.7536, "step": 32625 }, { "epoch": 0.3976697987885879, "grad_norm": 1.883283257484436, "learning_rate": 3.1702373316228353e-06, "loss": 0.8552, "step": 32630 }, { "epoch": 0.39773073501273565, "grad_norm": 1.8065588474273682, "learning_rate": 3.1699166132135988e-06, "loss": 0.7705, "step": 32635 }, { "epoch": 0.39779167123688347, "grad_norm": 2.4892923831939697, "learning_rate": 3.1695958948043622e-06, "loss": 0.8873, "step": 32640 }, { "epoch": 0.3978526074610313, "grad_norm": 2.838059425354004, "learning_rate": 3.1692751763951252e-06, "loss": 0.9041, "step": 32645 }, { "epoch": 0.3979135436851791, "grad_norm": 2.6509246826171875, "learning_rate": 3.1689544579858887e-06, "loss": 0.8135, "step": 32650 }, { "epoch": 0.3979744799093269, "grad_norm": 1.7458486557006836, "learning_rate": 3.168633739576652e-06, "loss": 0.8813, "step": 32655 }, { "epoch": 0.3980354161334747, "grad_norm": 1.6095389127731323, "learning_rate": 3.168313021167415e-06, "loss": 0.8305, "step": 32660 }, { "epoch": 0.39809635235762253, "grad_norm": 2.0981266498565674, "learning_rate": 3.167992302758179e-06, "loss": 0.7458, "step": 32665 }, { "epoch": 0.3981572885817703, "grad_norm": 2.085468292236328, "learning_rate": 3.167671584348942e-06, "loss": 0.9062, "step": 32670 }, { "epoch": 0.3982182248059181, "grad_norm": 1.7502151727676392, "learning_rate": 3.167350865939705e-06, "loss": 0.8755, "step": 32675 }, { "epoch": 0.39827916103006594, "grad_norm": 2.18761944770813, "learning_rate": 3.167030147530469e-06, "loss": 0.8399, "step": 32680 }, { "epoch": 0.39834009725421377, "grad_norm": 1.8781522512435913, "learning_rate": 3.166709429121232e-06, "loss": 0.9423, "step": 32685 }, { "epoch": 0.39840103347836153, "grad_norm": 1.8951998949050903, "learning_rate": 3.166388710711995e-06, "loss": 0.8355, "step": 32690 }, { "epoch": 0.39846196970250936, "grad_norm": 2.0237460136413574, "learning_rate": 3.1660679923027584e-06, "loss": 0.8196, "step": 32695 }, { "epoch": 0.3985229059266572, "grad_norm": 2.330817699432373, "learning_rate": 3.165747273893522e-06, "loss": 0.9507, "step": 32700 }, { "epoch": 0.39858384215080495, "grad_norm": 1.9148738384246826, "learning_rate": 3.165426555484285e-06, "loss": 0.8108, "step": 32705 }, { "epoch": 0.39864477837495277, "grad_norm": 2.0673859119415283, "learning_rate": 3.1651058370750483e-06, "loss": 0.85, "step": 32710 }, { "epoch": 0.3987057145991006, "grad_norm": 2.267038583755493, "learning_rate": 3.1647851186658117e-06, "loss": 0.7772, "step": 32715 }, { "epoch": 0.3987666508232484, "grad_norm": 2.166264772415161, "learning_rate": 3.164464400256575e-06, "loss": 0.8345, "step": 32720 }, { "epoch": 0.3988275870473962, "grad_norm": 2.2393863201141357, "learning_rate": 3.164143681847338e-06, "loss": 0.7988, "step": 32725 }, { "epoch": 0.398888523271544, "grad_norm": 1.7352620363235474, "learning_rate": 3.1638229634381016e-06, "loss": 0.8646, "step": 32730 }, { "epoch": 0.3989494594956918, "grad_norm": 2.236422538757324, "learning_rate": 3.163502245028865e-06, "loss": 0.8263, "step": 32735 }, { "epoch": 0.3990103957198396, "grad_norm": 1.981824517250061, "learning_rate": 3.163181526619628e-06, "loss": 0.8232, "step": 32740 }, { "epoch": 0.3990713319439874, "grad_norm": 1.9002532958984375, "learning_rate": 3.162860808210392e-06, "loss": 0.8469, "step": 32745 }, { "epoch": 0.39913226816813524, "grad_norm": 2.277311086654663, "learning_rate": 3.162540089801155e-06, "loss": 0.8903, "step": 32750 }, { "epoch": 0.39919320439228306, "grad_norm": 2.2222142219543457, "learning_rate": 3.162219371391918e-06, "loss": 0.8895, "step": 32755 }, { "epoch": 0.39925414061643083, "grad_norm": 1.9765331745147705, "learning_rate": 3.161898652982682e-06, "loss": 0.8076, "step": 32760 }, { "epoch": 0.39931507684057865, "grad_norm": 1.9348790645599365, "learning_rate": 3.161577934573445e-06, "loss": 0.8529, "step": 32765 }, { "epoch": 0.3993760130647265, "grad_norm": 2.071493148803711, "learning_rate": 3.161257216164208e-06, "loss": 0.8416, "step": 32770 }, { "epoch": 0.39943694928887424, "grad_norm": 1.6492806673049927, "learning_rate": 3.1609364977549713e-06, "loss": 0.8968, "step": 32775 }, { "epoch": 0.39949788551302207, "grad_norm": 1.8338830471038818, "learning_rate": 3.1606157793457347e-06, "loss": 0.8598, "step": 32780 }, { "epoch": 0.3995588217371699, "grad_norm": 2.0580461025238037, "learning_rate": 3.1602950609364978e-06, "loss": 0.8717, "step": 32785 }, { "epoch": 0.3996197579613177, "grad_norm": 1.5043622255325317, "learning_rate": 3.159974342527261e-06, "loss": 0.8867, "step": 32790 }, { "epoch": 0.3996806941854655, "grad_norm": 1.842415690422058, "learning_rate": 3.1596536241180246e-06, "loss": 0.8301, "step": 32795 }, { "epoch": 0.3997416304096133, "grad_norm": 2.003654956817627, "learning_rate": 3.159332905708788e-06, "loss": 0.8813, "step": 32800 }, { "epoch": 0.3998025666337611, "grad_norm": 1.8224766254425049, "learning_rate": 3.159012187299551e-06, "loss": 0.8837, "step": 32805 }, { "epoch": 0.3998635028579089, "grad_norm": 2.7862532138824463, "learning_rate": 3.158691468890315e-06, "loss": 0.7866, "step": 32810 }, { "epoch": 0.3999244390820567, "grad_norm": 2.163536548614502, "learning_rate": 3.158370750481078e-06, "loss": 0.8776, "step": 32815 }, { "epoch": 0.39998537530620454, "grad_norm": 1.9537416696548462, "learning_rate": 3.158050032071841e-06, "loss": 0.8338, "step": 32820 }, { "epoch": 0.40004631153035236, "grad_norm": 1.5832358598709106, "learning_rate": 3.157729313662605e-06, "loss": 0.8714, "step": 32825 }, { "epoch": 0.40010724775450013, "grad_norm": 1.643754482269287, "learning_rate": 3.157408595253368e-06, "loss": 0.7741, "step": 32830 }, { "epoch": 0.40016818397864795, "grad_norm": 1.9923088550567627, "learning_rate": 3.157087876844131e-06, "loss": 0.864, "step": 32835 }, { "epoch": 0.4002291202027958, "grad_norm": 1.8557915687561035, "learning_rate": 3.1567671584348948e-06, "loss": 0.8508, "step": 32840 }, { "epoch": 0.40029005642694354, "grad_norm": 2.3038463592529297, "learning_rate": 3.1564464400256578e-06, "loss": 0.8966, "step": 32845 }, { "epoch": 0.40035099265109136, "grad_norm": 2.0060696601867676, "learning_rate": 3.1561257216164208e-06, "loss": 0.8504, "step": 32850 }, { "epoch": 0.4004119288752392, "grad_norm": 2.009122371673584, "learning_rate": 3.1558050032071842e-06, "loss": 0.8069, "step": 32855 }, { "epoch": 0.40047286509938695, "grad_norm": 2.1627302169799805, "learning_rate": 3.1554842847979477e-06, "loss": 0.8938, "step": 32860 }, { "epoch": 0.4005338013235348, "grad_norm": 2.689312696456909, "learning_rate": 3.155163566388711e-06, "loss": 0.8745, "step": 32865 }, { "epoch": 0.4005947375476826, "grad_norm": 1.8920531272888184, "learning_rate": 3.154842847979474e-06, "loss": 0.8674, "step": 32870 }, { "epoch": 0.4006556737718304, "grad_norm": 1.9106664657592773, "learning_rate": 3.1545221295702376e-06, "loss": 0.9555, "step": 32875 }, { "epoch": 0.4007166099959782, "grad_norm": 2.0950725078582764, "learning_rate": 3.154201411161001e-06, "loss": 0.9257, "step": 32880 }, { "epoch": 0.400777546220126, "grad_norm": 1.9058200120925903, "learning_rate": 3.153880692751764e-06, "loss": 0.8296, "step": 32885 }, { "epoch": 0.40083848244427384, "grad_norm": 1.6252281665802002, "learning_rate": 3.153559974342528e-06, "loss": 0.845, "step": 32890 }, { "epoch": 0.4008994186684216, "grad_norm": 1.7153066396713257, "learning_rate": 3.153239255933291e-06, "loss": 0.8381, "step": 32895 }, { "epoch": 0.4009603548925694, "grad_norm": 1.8947151899337769, "learning_rate": 3.152918537524054e-06, "loss": 0.8382, "step": 32900 }, { "epoch": 0.40102129111671725, "grad_norm": 2.123364210128784, "learning_rate": 3.1525978191148178e-06, "loss": 0.789, "step": 32905 }, { "epoch": 0.40108222734086507, "grad_norm": 1.837538242340088, "learning_rate": 3.152277100705581e-06, "loss": 0.8479, "step": 32910 }, { "epoch": 0.40114316356501284, "grad_norm": 2.072882890701294, "learning_rate": 3.151956382296344e-06, "loss": 0.9612, "step": 32915 }, { "epoch": 0.40120409978916066, "grad_norm": 1.7107057571411133, "learning_rate": 3.1516356638871077e-06, "loss": 0.8358, "step": 32920 }, { "epoch": 0.4012650360133085, "grad_norm": 1.8938089609146118, "learning_rate": 3.1513149454778707e-06, "loss": 0.9035, "step": 32925 }, { "epoch": 0.40132597223745625, "grad_norm": 2.066054105758667, "learning_rate": 3.1509942270686337e-06, "loss": 0.8999, "step": 32930 }, { "epoch": 0.4013869084616041, "grad_norm": 2.219785213470459, "learning_rate": 3.1506735086593976e-06, "loss": 0.8767, "step": 32935 }, { "epoch": 0.4014478446857519, "grad_norm": 3.0316009521484375, "learning_rate": 3.1503527902501606e-06, "loss": 0.8464, "step": 32940 }, { "epoch": 0.4015087809098997, "grad_norm": 1.718691349029541, "learning_rate": 3.150032071840924e-06, "loss": 0.808, "step": 32945 }, { "epoch": 0.4015697171340475, "grad_norm": 1.872550368309021, "learning_rate": 3.149711353431687e-06, "loss": 0.8427, "step": 32950 }, { "epoch": 0.4016306533581953, "grad_norm": 1.6546156406402588, "learning_rate": 3.1493906350224505e-06, "loss": 0.8819, "step": 32955 }, { "epoch": 0.40169158958234313, "grad_norm": 2.006767749786377, "learning_rate": 3.149069916613214e-06, "loss": 0.8263, "step": 32960 }, { "epoch": 0.4017525258064909, "grad_norm": 2.0220189094543457, "learning_rate": 3.148749198203977e-06, "loss": 0.8232, "step": 32965 }, { "epoch": 0.4018134620306387, "grad_norm": 2.7033469676971436, "learning_rate": 3.148428479794741e-06, "loss": 0.8295, "step": 32970 }, { "epoch": 0.40187439825478655, "grad_norm": 1.8942055702209473, "learning_rate": 3.148107761385504e-06, "loss": 0.8761, "step": 32975 }, { "epoch": 0.40193533447893437, "grad_norm": 2.1468632221221924, "learning_rate": 3.147787042976267e-06, "loss": 0.8681, "step": 32980 }, { "epoch": 0.40199627070308214, "grad_norm": 1.9055148363113403, "learning_rate": 3.1474663245670307e-06, "loss": 0.8503, "step": 32985 }, { "epoch": 0.40205720692722996, "grad_norm": 1.8499889373779297, "learning_rate": 3.1471456061577937e-06, "loss": 0.7876, "step": 32990 }, { "epoch": 0.4021181431513778, "grad_norm": 1.7811988592147827, "learning_rate": 3.1468248877485567e-06, "loss": 0.9065, "step": 32995 }, { "epoch": 0.40217907937552555, "grad_norm": 1.7733440399169922, "learning_rate": 3.1465041693393206e-06, "loss": 0.8643, "step": 33000 }, { "epoch": 0.4022400155996734, "grad_norm": 1.8379216194152832, "learning_rate": 3.1461834509300836e-06, "loss": 0.9011, "step": 33005 }, { "epoch": 0.4023009518238212, "grad_norm": 2.010918140411377, "learning_rate": 3.1458627325208466e-06, "loss": 0.8699, "step": 33010 }, { "epoch": 0.402361888047969, "grad_norm": 1.9510301351547241, "learning_rate": 3.1455420141116105e-06, "loss": 0.8149, "step": 33015 }, { "epoch": 0.4024228242721168, "grad_norm": 2.030468463897705, "learning_rate": 3.1452212957023735e-06, "loss": 0.9677, "step": 33020 }, { "epoch": 0.4024837604962646, "grad_norm": 2.095499038696289, "learning_rate": 3.144900577293137e-06, "loss": 0.9037, "step": 33025 }, { "epoch": 0.40254469672041243, "grad_norm": 1.97451913356781, "learning_rate": 3.1445798588839e-06, "loss": 0.7741, "step": 33030 }, { "epoch": 0.4026056329445602, "grad_norm": 2.053774833679199, "learning_rate": 3.144259140474664e-06, "loss": 0.8291, "step": 33035 }, { "epoch": 0.402666569168708, "grad_norm": 1.880306363105774, "learning_rate": 3.143938422065427e-06, "loss": 0.8062, "step": 33040 }, { "epoch": 0.40272750539285584, "grad_norm": 2.010668992996216, "learning_rate": 3.14361770365619e-06, "loss": 0.8226, "step": 33045 }, { "epoch": 0.40278844161700367, "grad_norm": 2.2955944538116455, "learning_rate": 3.1432969852469537e-06, "loss": 0.8511, "step": 33050 }, { "epoch": 0.40284937784115143, "grad_norm": 1.8517634868621826, "learning_rate": 3.1429762668377168e-06, "loss": 0.8788, "step": 33055 }, { "epoch": 0.40291031406529926, "grad_norm": 1.7854655981063843, "learning_rate": 3.1426555484284798e-06, "loss": 0.8394, "step": 33060 }, { "epoch": 0.4029712502894471, "grad_norm": 2.119103193283081, "learning_rate": 3.1423348300192436e-06, "loss": 0.8711, "step": 33065 }, { "epoch": 0.40303218651359485, "grad_norm": 2.0269792079925537, "learning_rate": 3.1420141116100067e-06, "loss": 0.8545, "step": 33070 }, { "epoch": 0.40309312273774267, "grad_norm": 2.1697678565979004, "learning_rate": 3.1416933932007697e-06, "loss": 0.8692, "step": 33075 }, { "epoch": 0.4031540589618905, "grad_norm": 1.8827838897705078, "learning_rate": 3.1413726747915335e-06, "loss": 0.7929, "step": 33080 }, { "epoch": 0.4032149951860383, "grad_norm": 2.4354658126831055, "learning_rate": 3.1410519563822965e-06, "loss": 0.8579, "step": 33085 }, { "epoch": 0.4032759314101861, "grad_norm": 2.4065287113189697, "learning_rate": 3.14073123797306e-06, "loss": 0.7826, "step": 33090 }, { "epoch": 0.4033368676343339, "grad_norm": 1.9670090675354004, "learning_rate": 3.1404105195638234e-06, "loss": 0.8776, "step": 33095 }, { "epoch": 0.40339780385848173, "grad_norm": 2.077653646469116, "learning_rate": 3.1400898011545864e-06, "loss": 0.8577, "step": 33100 }, { "epoch": 0.4034587400826295, "grad_norm": 1.7703701257705688, "learning_rate": 3.13976908274535e-06, "loss": 0.8264, "step": 33105 }, { "epoch": 0.4035196763067773, "grad_norm": 2.1012048721313477, "learning_rate": 3.139448364336113e-06, "loss": 0.8866, "step": 33110 }, { "epoch": 0.40358061253092514, "grad_norm": 1.741450309753418, "learning_rate": 3.1391276459268768e-06, "loss": 0.8274, "step": 33115 }, { "epoch": 0.40364154875507297, "grad_norm": 2.215758800506592, "learning_rate": 3.1388069275176398e-06, "loss": 0.8156, "step": 33120 }, { "epoch": 0.40370248497922073, "grad_norm": 2.0768537521362305, "learning_rate": 3.138486209108403e-06, "loss": 0.8614, "step": 33125 }, { "epoch": 0.40376342120336856, "grad_norm": 2.20542311668396, "learning_rate": 3.1381654906991667e-06, "loss": 0.8647, "step": 33130 }, { "epoch": 0.4038243574275164, "grad_norm": 1.7045594453811646, "learning_rate": 3.1378447722899297e-06, "loss": 0.8502, "step": 33135 }, { "epoch": 0.40388529365166415, "grad_norm": 1.685859203338623, "learning_rate": 3.1375240538806927e-06, "loss": 0.8135, "step": 33140 }, { "epoch": 0.40394622987581197, "grad_norm": 1.5563679933547974, "learning_rate": 3.1372033354714566e-06, "loss": 0.8334, "step": 33145 }, { "epoch": 0.4040071660999598, "grad_norm": 2.441063642501831, "learning_rate": 3.1368826170622196e-06, "loss": 0.87, "step": 33150 }, { "epoch": 0.4040681023241076, "grad_norm": 1.9451189041137695, "learning_rate": 3.1365618986529826e-06, "loss": 0.8342, "step": 33155 }, { "epoch": 0.4041290385482554, "grad_norm": 1.8522173166275024, "learning_rate": 3.1362411802437465e-06, "loss": 0.8248, "step": 33160 }, { "epoch": 0.4041899747724032, "grad_norm": 2.5683255195617676, "learning_rate": 3.1359204618345095e-06, "loss": 0.883, "step": 33165 }, { "epoch": 0.404250910996551, "grad_norm": 1.797874093055725, "learning_rate": 3.135599743425273e-06, "loss": 0.8295, "step": 33170 }, { "epoch": 0.4043118472206988, "grad_norm": 1.7706540822982788, "learning_rate": 3.1352790250160364e-06, "loss": 0.8093, "step": 33175 }, { "epoch": 0.4043727834448466, "grad_norm": 2.1047511100769043, "learning_rate": 3.1349583066067994e-06, "loss": 0.8765, "step": 33180 }, { "epoch": 0.40443371966899444, "grad_norm": 1.9294251203536987, "learning_rate": 3.134637588197563e-06, "loss": 0.856, "step": 33185 }, { "epoch": 0.40449465589314226, "grad_norm": 1.9173316955566406, "learning_rate": 3.134316869788326e-06, "loss": 0.8456, "step": 33190 }, { "epoch": 0.40455559211729003, "grad_norm": 2.134737968444824, "learning_rate": 3.1339961513790897e-06, "loss": 0.8139, "step": 33195 }, { "epoch": 0.40461652834143785, "grad_norm": 1.688866138458252, "learning_rate": 3.1336754329698527e-06, "loss": 0.8669, "step": 33200 }, { "epoch": 0.4046774645655857, "grad_norm": 1.9206711053848267, "learning_rate": 3.1333547145606157e-06, "loss": 0.8812, "step": 33205 }, { "epoch": 0.40473840078973344, "grad_norm": 1.9870905876159668, "learning_rate": 3.1330339961513796e-06, "loss": 0.7951, "step": 33210 }, { "epoch": 0.40479933701388127, "grad_norm": 2.1071033477783203, "learning_rate": 3.1327132777421426e-06, "loss": 0.8378, "step": 33215 }, { "epoch": 0.4048602732380291, "grad_norm": 1.8504996299743652, "learning_rate": 3.1323925593329056e-06, "loss": 0.8236, "step": 33220 }, { "epoch": 0.4049212094621769, "grad_norm": 2.0103328227996826, "learning_rate": 3.1320718409236695e-06, "loss": 0.878, "step": 33225 }, { "epoch": 0.4049821456863247, "grad_norm": 2.104149580001831, "learning_rate": 3.1317511225144325e-06, "loss": 0.8346, "step": 33230 }, { "epoch": 0.4050430819104725, "grad_norm": 1.8116812705993652, "learning_rate": 3.1314304041051955e-06, "loss": 0.8251, "step": 33235 }, { "epoch": 0.4051040181346203, "grad_norm": 1.832524299621582, "learning_rate": 3.1311096856959594e-06, "loss": 0.8078, "step": 33240 }, { "epoch": 0.4051649543587681, "grad_norm": 2.5456292629241943, "learning_rate": 3.1307889672867224e-06, "loss": 0.8862, "step": 33245 }, { "epoch": 0.4052258905829159, "grad_norm": 2.2494094371795654, "learning_rate": 3.130468248877486e-06, "loss": 0.886, "step": 33250 }, { "epoch": 0.40528682680706374, "grad_norm": 1.916765570640564, "learning_rate": 3.1301475304682493e-06, "loss": 0.8654, "step": 33255 }, { "epoch": 0.40534776303121156, "grad_norm": 1.8265297412872314, "learning_rate": 3.1298268120590123e-06, "loss": 0.8288, "step": 33260 }, { "epoch": 0.40540869925535933, "grad_norm": 2.213156223297119, "learning_rate": 3.1295060936497757e-06, "loss": 0.8498, "step": 33265 }, { "epoch": 0.40546963547950715, "grad_norm": 2.1180195808410645, "learning_rate": 3.129185375240539e-06, "loss": 0.8594, "step": 33270 }, { "epoch": 0.405530571703655, "grad_norm": 2.149423122406006, "learning_rate": 3.1288646568313026e-06, "loss": 0.8746, "step": 33275 }, { "epoch": 0.40559150792780274, "grad_norm": 2.108656883239746, "learning_rate": 3.1285439384220656e-06, "loss": 0.8049, "step": 33280 }, { "epoch": 0.40565244415195056, "grad_norm": 2.1248950958251953, "learning_rate": 3.1282232200128287e-06, "loss": 0.8617, "step": 33285 }, { "epoch": 0.4057133803760984, "grad_norm": 1.6048023700714111, "learning_rate": 3.1279025016035925e-06, "loss": 0.8412, "step": 33290 }, { "epoch": 0.4057743166002462, "grad_norm": 1.8730419874191284, "learning_rate": 3.1275817831943555e-06, "loss": 0.8984, "step": 33295 }, { "epoch": 0.405835252824394, "grad_norm": 2.148263931274414, "learning_rate": 3.1272610647851186e-06, "loss": 0.8318, "step": 33300 }, { "epoch": 0.4058961890485418, "grad_norm": 2.101910352706909, "learning_rate": 3.1269403463758824e-06, "loss": 0.8535, "step": 33305 }, { "epoch": 0.4059571252726896, "grad_norm": 2.096116781234741, "learning_rate": 3.1266196279666454e-06, "loss": 0.8918, "step": 33310 }, { "epoch": 0.4060180614968374, "grad_norm": 1.9261306524276733, "learning_rate": 3.126298909557409e-06, "loss": 0.8121, "step": 33315 }, { "epoch": 0.4060789977209852, "grad_norm": 2.045929431915283, "learning_rate": 3.1259781911481723e-06, "loss": 0.8715, "step": 33320 }, { "epoch": 0.40613993394513304, "grad_norm": 1.8724348545074463, "learning_rate": 3.1256574727389353e-06, "loss": 0.8693, "step": 33325 }, { "epoch": 0.4062008701692808, "grad_norm": 1.73966646194458, "learning_rate": 3.1253367543296988e-06, "loss": 0.8268, "step": 33330 }, { "epoch": 0.4062618063934286, "grad_norm": 1.7726067304611206, "learning_rate": 3.125016035920462e-06, "loss": 0.8521, "step": 33335 }, { "epoch": 0.40632274261757645, "grad_norm": 1.8683723211288452, "learning_rate": 3.1246953175112257e-06, "loss": 0.8214, "step": 33340 }, { "epoch": 0.40638367884172427, "grad_norm": 1.9359931945800781, "learning_rate": 3.1243745991019887e-06, "loss": 0.8519, "step": 33345 }, { "epoch": 0.40644461506587204, "grad_norm": 1.7915915250778198, "learning_rate": 3.124053880692752e-06, "loss": 0.852, "step": 33350 }, { "epoch": 0.40650555129001986, "grad_norm": 1.837931513786316, "learning_rate": 3.1237331622835155e-06, "loss": 0.8144, "step": 33355 }, { "epoch": 0.4065664875141677, "grad_norm": 2.0228238105773926, "learning_rate": 3.1234124438742786e-06, "loss": 0.9134, "step": 33360 }, { "epoch": 0.40662742373831545, "grad_norm": 2.0294909477233887, "learning_rate": 3.1230917254650416e-06, "loss": 0.8592, "step": 33365 }, { "epoch": 0.4066883599624633, "grad_norm": 1.7339402437210083, "learning_rate": 3.1227710070558054e-06, "loss": 0.8962, "step": 33370 }, { "epoch": 0.4067492961866111, "grad_norm": 1.860897183418274, "learning_rate": 3.1224502886465685e-06, "loss": 0.8455, "step": 33375 }, { "epoch": 0.4068102324107589, "grad_norm": 1.7370320558547974, "learning_rate": 3.1221295702373315e-06, "loss": 0.8899, "step": 33380 }, { "epoch": 0.4068711686349067, "grad_norm": 1.9674769639968872, "learning_rate": 3.1218088518280953e-06, "loss": 0.9024, "step": 33385 }, { "epoch": 0.4069321048590545, "grad_norm": 2.04146671295166, "learning_rate": 3.1214881334188584e-06, "loss": 0.8431, "step": 33390 }, { "epoch": 0.40699304108320233, "grad_norm": 1.9363585710525513, "learning_rate": 3.121167415009622e-06, "loss": 0.8381, "step": 33395 }, { "epoch": 0.4070539773073501, "grad_norm": 1.812267780303955, "learning_rate": 3.1208466966003852e-06, "loss": 0.853, "step": 33400 }, { "epoch": 0.4071149135314979, "grad_norm": 2.213883876800537, "learning_rate": 3.1205259781911483e-06, "loss": 0.8496, "step": 33405 }, { "epoch": 0.40717584975564575, "grad_norm": 2.120302438735962, "learning_rate": 3.1202052597819117e-06, "loss": 0.8812, "step": 33410 }, { "epoch": 0.40723678597979357, "grad_norm": 2.1035478115081787, "learning_rate": 3.119884541372675e-06, "loss": 0.8701, "step": 33415 }, { "epoch": 0.40729772220394134, "grad_norm": 1.678080677986145, "learning_rate": 3.1195638229634386e-06, "loss": 0.8121, "step": 33420 }, { "epoch": 0.40735865842808916, "grad_norm": 1.8187984228134155, "learning_rate": 3.1192431045542016e-06, "loss": 0.9231, "step": 33425 }, { "epoch": 0.407419594652237, "grad_norm": 1.802229642868042, "learning_rate": 3.118922386144965e-06, "loss": 0.8553, "step": 33430 }, { "epoch": 0.40748053087638475, "grad_norm": 1.7680541276931763, "learning_rate": 3.1186016677357285e-06, "loss": 0.8134, "step": 33435 }, { "epoch": 0.4075414671005326, "grad_norm": 1.9165048599243164, "learning_rate": 3.1182809493264915e-06, "loss": 0.9558, "step": 33440 }, { "epoch": 0.4076024033246804, "grad_norm": 1.6475722789764404, "learning_rate": 3.1179602309172545e-06, "loss": 0.8043, "step": 33445 }, { "epoch": 0.4076633395488282, "grad_norm": 2.152364730834961, "learning_rate": 3.1176395125080184e-06, "loss": 0.8698, "step": 33450 }, { "epoch": 0.407724275772976, "grad_norm": 1.7918986082077026, "learning_rate": 3.1173187940987814e-06, "loss": 0.8672, "step": 33455 }, { "epoch": 0.4077852119971238, "grad_norm": 1.6494455337524414, "learning_rate": 3.1169980756895444e-06, "loss": 0.9143, "step": 33460 }, { "epoch": 0.40784614822127163, "grad_norm": 1.8191198110580444, "learning_rate": 3.1166773572803083e-06, "loss": 0.9107, "step": 33465 }, { "epoch": 0.4079070844454194, "grad_norm": 2.0414464473724365, "learning_rate": 3.1163566388710713e-06, "loss": 0.9095, "step": 33470 }, { "epoch": 0.4079680206695672, "grad_norm": 1.7886558771133423, "learning_rate": 3.1160359204618347e-06, "loss": 0.8102, "step": 33475 }, { "epoch": 0.40802895689371504, "grad_norm": 2.3275201320648193, "learning_rate": 3.115715202052598e-06, "loss": 0.8328, "step": 33480 }, { "epoch": 0.40808989311786287, "grad_norm": 1.9625113010406494, "learning_rate": 3.115394483643361e-06, "loss": 0.9894, "step": 33485 }, { "epoch": 0.40815082934201063, "grad_norm": 2.120138645172119, "learning_rate": 3.1150737652341246e-06, "loss": 0.8611, "step": 33490 }, { "epoch": 0.40821176556615846, "grad_norm": 1.8569175004959106, "learning_rate": 3.114753046824888e-06, "loss": 0.8117, "step": 33495 }, { "epoch": 0.4082727017903063, "grad_norm": 1.8305503129959106, "learning_rate": 3.1144323284156515e-06, "loss": 0.882, "step": 33500 }, { "epoch": 0.40833363801445405, "grad_norm": 1.969081163406372, "learning_rate": 3.1141116100064145e-06, "loss": 0.8007, "step": 33505 }, { "epoch": 0.40839457423860187, "grad_norm": 1.9468159675598145, "learning_rate": 3.1137908915971784e-06, "loss": 0.8406, "step": 33510 }, { "epoch": 0.4084555104627497, "grad_norm": 2.040681838989258, "learning_rate": 3.1134701731879414e-06, "loss": 0.8064, "step": 33515 }, { "epoch": 0.4085164466868975, "grad_norm": 2.0877511501312256, "learning_rate": 3.1131494547787044e-06, "loss": 0.8929, "step": 33520 }, { "epoch": 0.4085773829110453, "grad_norm": 2.294623851776123, "learning_rate": 3.1128287363694674e-06, "loss": 0.874, "step": 33525 }, { "epoch": 0.4086383191351931, "grad_norm": 1.7905725240707397, "learning_rate": 3.1125080179602313e-06, "loss": 0.858, "step": 33530 }, { "epoch": 0.40869925535934093, "grad_norm": 1.815568447113037, "learning_rate": 3.1121872995509943e-06, "loss": 0.8271, "step": 33535 }, { "epoch": 0.4087601915834887, "grad_norm": 2.2839245796203613, "learning_rate": 3.1118665811417578e-06, "loss": 0.8723, "step": 33540 }, { "epoch": 0.4088211278076365, "grad_norm": 2.031324625015259, "learning_rate": 3.111545862732521e-06, "loss": 0.8248, "step": 33545 }, { "epoch": 0.40888206403178434, "grad_norm": 2.4529290199279785, "learning_rate": 3.1112251443232842e-06, "loss": 0.8751, "step": 33550 }, { "epoch": 0.40894300025593217, "grad_norm": 1.8751696348190308, "learning_rate": 3.1109044259140477e-06, "loss": 0.7865, "step": 33555 }, { "epoch": 0.40900393648007993, "grad_norm": 1.7455142736434937, "learning_rate": 3.110583707504811e-06, "loss": 0.8227, "step": 33560 }, { "epoch": 0.40906487270422776, "grad_norm": 1.727982521057129, "learning_rate": 3.1102629890955745e-06, "loss": 0.8285, "step": 33565 }, { "epoch": 0.4091258089283756, "grad_norm": 1.9296149015426636, "learning_rate": 3.1099422706863376e-06, "loss": 0.8991, "step": 33570 }, { "epoch": 0.40918674515252335, "grad_norm": 1.794331431388855, "learning_rate": 3.109621552277101e-06, "loss": 0.853, "step": 33575 }, { "epoch": 0.40924768137667117, "grad_norm": 1.7156926393508911, "learning_rate": 3.1093008338678644e-06, "loss": 0.84, "step": 33580 }, { "epoch": 0.409308617600819, "grad_norm": 1.8651931285858154, "learning_rate": 3.1089801154586274e-06, "loss": 0.8374, "step": 33585 }, { "epoch": 0.4093695538249668, "grad_norm": 2.454693078994751, "learning_rate": 3.1086593970493913e-06, "loss": 0.9328, "step": 33590 }, { "epoch": 0.4094304900491146, "grad_norm": 1.9073361158370972, "learning_rate": 3.1083386786401543e-06, "loss": 0.7981, "step": 33595 }, { "epoch": 0.4094914262732624, "grad_norm": 1.8251335620880127, "learning_rate": 3.1080179602309173e-06, "loss": 0.8388, "step": 33600 }, { "epoch": 0.4095523624974102, "grad_norm": 1.7879893779754639, "learning_rate": 3.107697241821681e-06, "loss": 0.8593, "step": 33605 }, { "epoch": 0.409613298721558, "grad_norm": 1.8567043542861938, "learning_rate": 3.1073765234124442e-06, "loss": 0.8386, "step": 33610 }, { "epoch": 0.4096742349457058, "grad_norm": 1.7035975456237793, "learning_rate": 3.1070558050032072e-06, "loss": 0.8354, "step": 33615 }, { "epoch": 0.40973517116985364, "grad_norm": 2.340510368347168, "learning_rate": 3.1067350865939707e-06, "loss": 0.8457, "step": 33620 }, { "epoch": 0.40979610739400146, "grad_norm": 2.1044416427612305, "learning_rate": 3.106414368184734e-06, "loss": 0.8806, "step": 33625 }, { "epoch": 0.40985704361814923, "grad_norm": 2.6672310829162598, "learning_rate": 3.106093649775497e-06, "loss": 0.9251, "step": 33630 }, { "epoch": 0.40991797984229705, "grad_norm": 1.9897468090057373, "learning_rate": 3.1057729313662606e-06, "loss": 0.9478, "step": 33635 }, { "epoch": 0.4099789160664449, "grad_norm": 2.9205522537231445, "learning_rate": 3.105452212957024e-06, "loss": 0.8244, "step": 33640 }, { "epoch": 0.41003985229059264, "grad_norm": 1.9532877206802368, "learning_rate": 3.1051314945477875e-06, "loss": 0.8282, "step": 33645 }, { "epoch": 0.41010078851474047, "grad_norm": 2.0420966148376465, "learning_rate": 3.1048107761385505e-06, "loss": 0.8559, "step": 33650 }, { "epoch": 0.4101617247388883, "grad_norm": 2.000455379486084, "learning_rate": 3.104490057729314e-06, "loss": 0.9854, "step": 33655 }, { "epoch": 0.4102226609630361, "grad_norm": 2.0362465381622314, "learning_rate": 3.1041693393200774e-06, "loss": 0.801, "step": 33660 }, { "epoch": 0.4102835971871839, "grad_norm": 2.0700929164886475, "learning_rate": 3.1038486209108404e-06, "loss": 0.812, "step": 33665 }, { "epoch": 0.4103445334113317, "grad_norm": 1.6451389789581299, "learning_rate": 3.1035279025016042e-06, "loss": 0.8361, "step": 33670 }, { "epoch": 0.4104054696354795, "grad_norm": 1.9332115650177002, "learning_rate": 3.1032071840923673e-06, "loss": 0.8027, "step": 33675 }, { "epoch": 0.4104664058596273, "grad_norm": 1.9477044343948364, "learning_rate": 3.1028864656831303e-06, "loss": 0.9155, "step": 33680 }, { "epoch": 0.4105273420837751, "grad_norm": 1.7392257452011108, "learning_rate": 3.102565747273894e-06, "loss": 0.8023, "step": 33685 }, { "epoch": 0.41058827830792294, "grad_norm": 1.8356114625930786, "learning_rate": 3.102245028864657e-06, "loss": 0.9351, "step": 33690 }, { "epoch": 0.41064921453207076, "grad_norm": 1.975977897644043, "learning_rate": 3.10192431045542e-06, "loss": 0.7954, "step": 33695 }, { "epoch": 0.41071015075621853, "grad_norm": 1.9801814556121826, "learning_rate": 3.1016035920461836e-06, "loss": 0.8738, "step": 33700 }, { "epoch": 0.41077108698036635, "grad_norm": 1.903133511543274, "learning_rate": 3.101282873636947e-06, "loss": 0.856, "step": 33705 }, { "epoch": 0.4108320232045142, "grad_norm": 1.7403531074523926, "learning_rate": 3.10096215522771e-06, "loss": 0.8112, "step": 33710 }, { "epoch": 0.41089295942866194, "grad_norm": 2.4757213592529297, "learning_rate": 3.1006414368184735e-06, "loss": 0.8728, "step": 33715 }, { "epoch": 0.41095389565280976, "grad_norm": 2.6977591514587402, "learning_rate": 3.100320718409237e-06, "loss": 0.8414, "step": 33720 }, { "epoch": 0.4110148318769576, "grad_norm": 2.0170600414276123, "learning_rate": 3.1000000000000004e-06, "loss": 0.8288, "step": 33725 }, { "epoch": 0.4110757681011054, "grad_norm": 2.1123206615448, "learning_rate": 3.0996792815907634e-06, "loss": 0.8117, "step": 33730 }, { "epoch": 0.4111367043252532, "grad_norm": 1.8459712266921997, "learning_rate": 3.0993585631815273e-06, "loss": 0.8461, "step": 33735 }, { "epoch": 0.411197640549401, "grad_norm": 1.9879721403121948, "learning_rate": 3.0990378447722903e-06, "loss": 0.8561, "step": 33740 }, { "epoch": 0.4112585767735488, "grad_norm": 2.2645251750946045, "learning_rate": 3.0987171263630533e-06, "loss": 0.8788, "step": 33745 }, { "epoch": 0.4113195129976966, "grad_norm": 1.699779987335205, "learning_rate": 3.098396407953817e-06, "loss": 0.8329, "step": 33750 }, { "epoch": 0.4113804492218444, "grad_norm": 1.7944118976593018, "learning_rate": 3.09807568954458e-06, "loss": 0.8224, "step": 33755 }, { "epoch": 0.41144138544599224, "grad_norm": 1.8218395709991455, "learning_rate": 3.097754971135343e-06, "loss": 0.7943, "step": 33760 }, { "epoch": 0.41150232167014006, "grad_norm": 1.9142204523086548, "learning_rate": 3.097434252726107e-06, "loss": 0.8181, "step": 33765 }, { "epoch": 0.4115632578942878, "grad_norm": 1.9554989337921143, "learning_rate": 3.09711353431687e-06, "loss": 0.8226, "step": 33770 }, { "epoch": 0.41162419411843565, "grad_norm": 1.5747547149658203, "learning_rate": 3.096792815907633e-06, "loss": 0.7945, "step": 33775 }, { "epoch": 0.41168513034258347, "grad_norm": 1.9527989625930786, "learning_rate": 3.0964720974983965e-06, "loss": 0.835, "step": 33780 }, { "epoch": 0.41174606656673124, "grad_norm": 1.8679620027542114, "learning_rate": 3.09615137908916e-06, "loss": 0.8382, "step": 33785 }, { "epoch": 0.41180700279087906, "grad_norm": 2.1353375911712646, "learning_rate": 3.0958306606799234e-06, "loss": 0.9133, "step": 33790 }, { "epoch": 0.4118679390150269, "grad_norm": 2.149850845336914, "learning_rate": 3.0955099422706864e-06, "loss": 0.8517, "step": 33795 }, { "epoch": 0.41192887523917465, "grad_norm": 1.9887158870697021, "learning_rate": 3.09518922386145e-06, "loss": 0.8896, "step": 33800 }, { "epoch": 0.4119898114633225, "grad_norm": 1.7219600677490234, "learning_rate": 3.0948685054522133e-06, "loss": 0.8267, "step": 33805 }, { "epoch": 0.4120507476874703, "grad_norm": 2.0108413696289062, "learning_rate": 3.0945477870429763e-06, "loss": 0.9083, "step": 33810 }, { "epoch": 0.4121116839116181, "grad_norm": 2.1436784267425537, "learning_rate": 3.09422706863374e-06, "loss": 0.773, "step": 33815 }, { "epoch": 0.4121726201357659, "grad_norm": 2.523651361465454, "learning_rate": 3.0939063502245032e-06, "loss": 0.883, "step": 33820 }, { "epoch": 0.4122335563599137, "grad_norm": 2.1444311141967773, "learning_rate": 3.0935856318152662e-06, "loss": 0.9199, "step": 33825 }, { "epoch": 0.41229449258406153, "grad_norm": 1.9331332445144653, "learning_rate": 3.09326491340603e-06, "loss": 0.8654, "step": 33830 }, { "epoch": 0.4123554288082093, "grad_norm": 1.9739582538604736, "learning_rate": 3.092944194996793e-06, "loss": 0.9046, "step": 33835 }, { "epoch": 0.4124163650323571, "grad_norm": 2.107633590698242, "learning_rate": 3.092623476587556e-06, "loss": 0.7594, "step": 33840 }, { "epoch": 0.41247730125650495, "grad_norm": 1.736116886138916, "learning_rate": 3.09230275817832e-06, "loss": 0.8415, "step": 33845 }, { "epoch": 0.41253823748065277, "grad_norm": 2.4555976390838623, "learning_rate": 3.091982039769083e-06, "loss": 0.8657, "step": 33850 }, { "epoch": 0.41259917370480054, "grad_norm": 1.9899741411209106, "learning_rate": 3.091661321359846e-06, "loss": 0.828, "step": 33855 }, { "epoch": 0.41266010992894836, "grad_norm": 1.8664065599441528, "learning_rate": 3.09134060295061e-06, "loss": 0.8062, "step": 33860 }, { "epoch": 0.4127210461530962, "grad_norm": 1.9619951248168945, "learning_rate": 3.091019884541373e-06, "loss": 0.8483, "step": 33865 }, { "epoch": 0.41278198237724395, "grad_norm": 1.8586888313293457, "learning_rate": 3.0906991661321363e-06, "loss": 0.8559, "step": 33870 }, { "epoch": 0.4128429186013918, "grad_norm": 2.0335004329681396, "learning_rate": 3.0903784477228994e-06, "loss": 0.8486, "step": 33875 }, { "epoch": 0.4129038548255396, "grad_norm": 1.6853364706039429, "learning_rate": 3.090057729313663e-06, "loss": 0.8336, "step": 33880 }, { "epoch": 0.4129647910496874, "grad_norm": 2.042001724243164, "learning_rate": 3.0897370109044262e-06, "loss": 0.8378, "step": 33885 }, { "epoch": 0.4130257272738352, "grad_norm": 1.8611326217651367, "learning_rate": 3.0894162924951893e-06, "loss": 0.8609, "step": 33890 }, { "epoch": 0.413086663497983, "grad_norm": 2.221898078918457, "learning_rate": 3.089095574085953e-06, "loss": 0.8091, "step": 33895 }, { "epoch": 0.41314759972213083, "grad_norm": 1.9337393045425415, "learning_rate": 3.088774855676716e-06, "loss": 0.9037, "step": 33900 }, { "epoch": 0.4132085359462786, "grad_norm": 1.939247488975525, "learning_rate": 3.088454137267479e-06, "loss": 0.941, "step": 33905 }, { "epoch": 0.4132694721704264, "grad_norm": 2.470318078994751, "learning_rate": 3.088133418858243e-06, "loss": 0.8986, "step": 33910 }, { "epoch": 0.41333040839457424, "grad_norm": 1.9108659029006958, "learning_rate": 3.087812700449006e-06, "loss": 0.8022, "step": 33915 }, { "epoch": 0.41339134461872207, "grad_norm": 1.7415931224822998, "learning_rate": 3.087491982039769e-06, "loss": 0.8414, "step": 33920 }, { "epoch": 0.41345228084286983, "grad_norm": 2.019580602645874, "learning_rate": 3.087171263630533e-06, "loss": 0.8209, "step": 33925 }, { "epoch": 0.41351321706701766, "grad_norm": 2.1340954303741455, "learning_rate": 3.086850545221296e-06, "loss": 0.8598, "step": 33930 }, { "epoch": 0.4135741532911655, "grad_norm": 2.0531091690063477, "learning_rate": 3.086529826812059e-06, "loss": 0.8749, "step": 33935 }, { "epoch": 0.41363508951531325, "grad_norm": 2.215867042541504, "learning_rate": 3.086209108402823e-06, "loss": 0.8583, "step": 33940 }, { "epoch": 0.41369602573946107, "grad_norm": 1.964531660079956, "learning_rate": 3.085888389993586e-06, "loss": 0.8378, "step": 33945 }, { "epoch": 0.4137569619636089, "grad_norm": 1.885358452796936, "learning_rate": 3.0855676715843493e-06, "loss": 0.814, "step": 33950 }, { "epoch": 0.4138178981877567, "grad_norm": 1.909737229347229, "learning_rate": 3.0852469531751123e-06, "loss": 0.8576, "step": 33955 }, { "epoch": 0.4138788344119045, "grad_norm": 1.9256260395050049, "learning_rate": 3.0849262347658757e-06, "loss": 0.7772, "step": 33960 }, { "epoch": 0.4139397706360523, "grad_norm": 2.560581684112549, "learning_rate": 3.084605516356639e-06, "loss": 0.8585, "step": 33965 }, { "epoch": 0.41400070686020013, "grad_norm": 1.9857035875320435, "learning_rate": 3.084284797947402e-06, "loss": 0.8175, "step": 33970 }, { "epoch": 0.4140616430843479, "grad_norm": 2.0662949085235596, "learning_rate": 3.083964079538166e-06, "loss": 0.8168, "step": 33975 }, { "epoch": 0.4141225793084957, "grad_norm": 1.6579238176345825, "learning_rate": 3.083643361128929e-06, "loss": 0.8333, "step": 33980 }, { "epoch": 0.41418351553264354, "grad_norm": 2.0440406799316406, "learning_rate": 3.083322642719692e-06, "loss": 0.8405, "step": 33985 }, { "epoch": 0.41424445175679137, "grad_norm": 1.999243974685669, "learning_rate": 3.083001924310456e-06, "loss": 0.8186, "step": 33990 }, { "epoch": 0.41430538798093913, "grad_norm": 2.015739679336548, "learning_rate": 3.082681205901219e-06, "loss": 0.8126, "step": 33995 }, { "epoch": 0.41436632420508696, "grad_norm": 1.9962990283966064, "learning_rate": 3.082360487491982e-06, "loss": 0.8475, "step": 34000 }, { "epoch": 0.4144272604292348, "grad_norm": 1.790648102760315, "learning_rate": 3.082039769082746e-06, "loss": 0.8615, "step": 34005 }, { "epoch": 0.41448819665338255, "grad_norm": 1.9233694076538086, "learning_rate": 3.081719050673509e-06, "loss": 0.9001, "step": 34010 }, { "epoch": 0.41454913287753037, "grad_norm": 1.9492970705032349, "learning_rate": 3.0813983322642723e-06, "loss": 0.8742, "step": 34015 }, { "epoch": 0.4146100691016782, "grad_norm": 1.9028326272964478, "learning_rate": 3.0810776138550357e-06, "loss": 0.8348, "step": 34020 }, { "epoch": 0.414671005325826, "grad_norm": 1.9412552118301392, "learning_rate": 3.0807568954457988e-06, "loss": 0.8709, "step": 34025 }, { "epoch": 0.4147319415499738, "grad_norm": 1.7712739706039429, "learning_rate": 3.080436177036562e-06, "loss": 0.869, "step": 34030 }, { "epoch": 0.4147928777741216, "grad_norm": 2.072063684463501, "learning_rate": 3.0801154586273252e-06, "loss": 0.8253, "step": 34035 }, { "epoch": 0.4148538139982694, "grad_norm": 1.7119635343551636, "learning_rate": 3.079794740218089e-06, "loss": 0.7922, "step": 34040 }, { "epoch": 0.4149147502224172, "grad_norm": 2.143864631652832, "learning_rate": 3.079474021808852e-06, "loss": 0.8776, "step": 34045 }, { "epoch": 0.414975686446565, "grad_norm": 1.9646501541137695, "learning_rate": 3.079153303399615e-06, "loss": 0.8323, "step": 34050 }, { "epoch": 0.41503662267071284, "grad_norm": 2.0649161338806152, "learning_rate": 3.078832584990379e-06, "loss": 0.9056, "step": 34055 }, { "epoch": 0.41509755889486066, "grad_norm": 1.790977120399475, "learning_rate": 3.078511866581142e-06, "loss": 0.8559, "step": 34060 }, { "epoch": 0.41515849511900843, "grad_norm": 2.2804486751556396, "learning_rate": 3.078191148171905e-06, "loss": 0.8238, "step": 34065 }, { "epoch": 0.41521943134315625, "grad_norm": 1.7038044929504395, "learning_rate": 3.077870429762669e-06, "loss": 0.8571, "step": 34070 }, { "epoch": 0.4152803675673041, "grad_norm": 1.560355305671692, "learning_rate": 3.077549711353432e-06, "loss": 0.762, "step": 34075 }, { "epoch": 0.41534130379145184, "grad_norm": 1.9133245944976807, "learning_rate": 3.077228992944195e-06, "loss": 0.8516, "step": 34080 }, { "epoch": 0.41540224001559967, "grad_norm": 1.841841220855713, "learning_rate": 3.0769082745349588e-06, "loss": 0.7866, "step": 34085 }, { "epoch": 0.4154631762397475, "grad_norm": 1.7541598081588745, "learning_rate": 3.076587556125722e-06, "loss": 0.8495, "step": 34090 }, { "epoch": 0.4155241124638953, "grad_norm": 1.9666540622711182, "learning_rate": 3.0762668377164852e-06, "loss": 0.8032, "step": 34095 }, { "epoch": 0.4155850486880431, "grad_norm": 2.081482172012329, "learning_rate": 3.0759461193072487e-06, "loss": 0.8997, "step": 34100 }, { "epoch": 0.4156459849121909, "grad_norm": 2.1469860076904297, "learning_rate": 3.0756254008980117e-06, "loss": 0.8296, "step": 34105 }, { "epoch": 0.4157069211363387, "grad_norm": 1.747714638710022, "learning_rate": 3.075304682488775e-06, "loss": 0.9163, "step": 34110 }, { "epoch": 0.4157678573604865, "grad_norm": 1.8440494537353516, "learning_rate": 3.074983964079538e-06, "loss": 0.8098, "step": 34115 }, { "epoch": 0.4158287935846343, "grad_norm": 1.9777344465255737, "learning_rate": 3.074663245670302e-06, "loss": 0.8168, "step": 34120 }, { "epoch": 0.41588972980878214, "grad_norm": 1.8806411027908325, "learning_rate": 3.074342527261065e-06, "loss": 0.8187, "step": 34125 }, { "epoch": 0.41595066603292996, "grad_norm": 1.9746869802474976, "learning_rate": 3.074021808851828e-06, "loss": 0.8811, "step": 34130 }, { "epoch": 0.41601160225707773, "grad_norm": 1.8272253274917603, "learning_rate": 3.073701090442592e-06, "loss": 0.8699, "step": 34135 }, { "epoch": 0.41607253848122555, "grad_norm": 2.3135814666748047, "learning_rate": 3.073380372033355e-06, "loss": 0.8898, "step": 34140 }, { "epoch": 0.4161334747053734, "grad_norm": 2.4141905307769775, "learning_rate": 3.073059653624118e-06, "loss": 0.8373, "step": 34145 }, { "epoch": 0.41619441092952114, "grad_norm": 1.979730248451233, "learning_rate": 3.072738935214882e-06, "loss": 0.8818, "step": 34150 }, { "epoch": 0.41625534715366896, "grad_norm": 2.179405450820923, "learning_rate": 3.072418216805645e-06, "loss": 0.8751, "step": 34155 }, { "epoch": 0.4163162833778168, "grad_norm": 1.7951337099075317, "learning_rate": 3.072097498396408e-06, "loss": 0.8431, "step": 34160 }, { "epoch": 0.4163772196019646, "grad_norm": 1.9378056526184082, "learning_rate": 3.0717767799871717e-06, "loss": 0.8343, "step": 34165 }, { "epoch": 0.4164381558261124, "grad_norm": 1.6888067722320557, "learning_rate": 3.0714560615779347e-06, "loss": 0.9101, "step": 34170 }, { "epoch": 0.4164990920502602, "grad_norm": 1.6432934999465942, "learning_rate": 3.071135343168698e-06, "loss": 0.8031, "step": 34175 }, { "epoch": 0.416560028274408, "grad_norm": 2.015598773956299, "learning_rate": 3.0708146247594616e-06, "loss": 0.8397, "step": 34180 }, { "epoch": 0.4166209644985558, "grad_norm": 2.0468931198120117, "learning_rate": 3.0704939063502246e-06, "loss": 0.8896, "step": 34185 }, { "epoch": 0.4166819007227036, "grad_norm": 2.16628098487854, "learning_rate": 3.070173187940988e-06, "loss": 0.8069, "step": 34190 }, { "epoch": 0.41674283694685144, "grad_norm": 2.1131298542022705, "learning_rate": 3.0698524695317515e-06, "loss": 0.8045, "step": 34195 }, { "epoch": 0.41680377317099926, "grad_norm": 1.9878039360046387, "learning_rate": 3.069531751122515e-06, "loss": 0.7941, "step": 34200 }, { "epoch": 0.416864709395147, "grad_norm": 1.856148600578308, "learning_rate": 3.069211032713278e-06, "loss": 0.8826, "step": 34205 }, { "epoch": 0.41692564561929485, "grad_norm": 1.7658071517944336, "learning_rate": 3.068890314304041e-06, "loss": 0.853, "step": 34210 }, { "epoch": 0.41698658184344267, "grad_norm": 2.351440191268921, "learning_rate": 3.068569595894805e-06, "loss": 0.9312, "step": 34215 }, { "epoch": 0.41704751806759044, "grad_norm": 1.9195255041122437, "learning_rate": 3.068248877485568e-06, "loss": 0.8415, "step": 34220 }, { "epoch": 0.41710845429173826, "grad_norm": 1.934295654296875, "learning_rate": 3.067928159076331e-06, "loss": 0.8876, "step": 34225 }, { "epoch": 0.4171693905158861, "grad_norm": 2.07140851020813, "learning_rate": 3.0676074406670947e-06, "loss": 0.8607, "step": 34230 }, { "epoch": 0.4172303267400339, "grad_norm": 1.9307516813278198, "learning_rate": 3.0672867222578577e-06, "loss": 0.8392, "step": 34235 }, { "epoch": 0.4172912629641817, "grad_norm": 1.9900226593017578, "learning_rate": 3.066966003848621e-06, "loss": 0.865, "step": 34240 }, { "epoch": 0.4173521991883295, "grad_norm": 1.953157663345337, "learning_rate": 3.0666452854393846e-06, "loss": 0.9034, "step": 34245 }, { "epoch": 0.4174131354124773, "grad_norm": 1.9168007373809814, "learning_rate": 3.0663245670301476e-06, "loss": 0.9211, "step": 34250 }, { "epoch": 0.4174740716366251, "grad_norm": 2.0383710861206055, "learning_rate": 3.066003848620911e-06, "loss": 0.7649, "step": 34255 }, { "epoch": 0.4175350078607729, "grad_norm": 1.708028793334961, "learning_rate": 3.0656831302116745e-06, "loss": 0.8472, "step": 34260 }, { "epoch": 0.41759594408492073, "grad_norm": 1.7895896434783936, "learning_rate": 3.065362411802438e-06, "loss": 0.7801, "step": 34265 }, { "epoch": 0.4176568803090685, "grad_norm": 2.0527031421661377, "learning_rate": 3.065041693393201e-06, "loss": 0.8654, "step": 34270 }, { "epoch": 0.4177178165332163, "grad_norm": 1.8819156885147095, "learning_rate": 3.0647209749839644e-06, "loss": 0.8712, "step": 34275 }, { "epoch": 0.41777875275736415, "grad_norm": 1.6890802383422852, "learning_rate": 3.064400256574728e-06, "loss": 0.9246, "step": 34280 }, { "epoch": 0.41783968898151197, "grad_norm": 2.1999573707580566, "learning_rate": 3.064079538165491e-06, "loss": 0.8942, "step": 34285 }, { "epoch": 0.41790062520565974, "grad_norm": 1.8410495519638062, "learning_rate": 3.063758819756254e-06, "loss": 0.9139, "step": 34290 }, { "epoch": 0.41796156142980756, "grad_norm": 2.1603333950042725, "learning_rate": 3.0634381013470178e-06, "loss": 0.8673, "step": 34295 }, { "epoch": 0.4180224976539554, "grad_norm": 1.8717089891433716, "learning_rate": 3.0631173829377808e-06, "loss": 0.8073, "step": 34300 }, { "epoch": 0.41808343387810315, "grad_norm": 2.350905418395996, "learning_rate": 3.062796664528544e-06, "loss": 0.8499, "step": 34305 }, { "epoch": 0.418144370102251, "grad_norm": 2.2565689086914062, "learning_rate": 3.0624759461193077e-06, "loss": 0.9234, "step": 34310 }, { "epoch": 0.4182053063263988, "grad_norm": 1.9832507371902466, "learning_rate": 3.0621552277100707e-06, "loss": 0.8713, "step": 34315 }, { "epoch": 0.4182662425505466, "grad_norm": 2.4986701011657715, "learning_rate": 3.061834509300834e-06, "loss": 0.7632, "step": 34320 }, { "epoch": 0.4183271787746944, "grad_norm": 1.967621922492981, "learning_rate": 3.0615137908915976e-06, "loss": 0.9009, "step": 34325 }, { "epoch": 0.4183881149988422, "grad_norm": 1.9687551259994507, "learning_rate": 3.0611930724823606e-06, "loss": 0.7981, "step": 34330 }, { "epoch": 0.41844905122299003, "grad_norm": 1.9801260232925415, "learning_rate": 3.060872354073124e-06, "loss": 0.8752, "step": 34335 }, { "epoch": 0.4185099874471378, "grad_norm": 1.9543335437774658, "learning_rate": 3.0605516356638875e-06, "loss": 0.8525, "step": 34340 }, { "epoch": 0.4185709236712856, "grad_norm": 1.9555186033248901, "learning_rate": 3.060230917254651e-06, "loss": 0.8463, "step": 34345 }, { "epoch": 0.41863185989543344, "grad_norm": 1.9063457250595093, "learning_rate": 3.059910198845414e-06, "loss": 0.8286, "step": 34350 }, { "epoch": 0.41869279611958127, "grad_norm": 1.7640972137451172, "learning_rate": 3.0595894804361773e-06, "loss": 0.8299, "step": 34355 }, { "epoch": 0.41875373234372903, "grad_norm": 1.8854022026062012, "learning_rate": 3.059268762026941e-06, "loss": 0.8141, "step": 34360 }, { "epoch": 0.41881466856787686, "grad_norm": 1.8711159229278564, "learning_rate": 3.058948043617704e-06, "loss": 0.8942, "step": 34365 }, { "epoch": 0.4188756047920247, "grad_norm": 1.8442281484603882, "learning_rate": 3.058627325208467e-06, "loss": 0.9244, "step": 34370 }, { "epoch": 0.41893654101617245, "grad_norm": 1.9676860570907593, "learning_rate": 3.0583066067992307e-06, "loss": 0.8902, "step": 34375 }, { "epoch": 0.41899747724032027, "grad_norm": 2.2880632877349854, "learning_rate": 3.0579858883899937e-06, "loss": 0.8245, "step": 34380 }, { "epoch": 0.4190584134644681, "grad_norm": 2.3693323135375977, "learning_rate": 3.0576651699807567e-06, "loss": 0.7787, "step": 34385 }, { "epoch": 0.4191193496886159, "grad_norm": 1.8030569553375244, "learning_rate": 3.0573444515715206e-06, "loss": 0.8411, "step": 34390 }, { "epoch": 0.4191802859127637, "grad_norm": 1.8387463092803955, "learning_rate": 3.0570237331622836e-06, "loss": 0.8561, "step": 34395 }, { "epoch": 0.4192412221369115, "grad_norm": 1.6887507438659668, "learning_rate": 3.056703014753047e-06, "loss": 0.886, "step": 34400 }, { "epoch": 0.41930215836105933, "grad_norm": 1.7529970407485962, "learning_rate": 3.0563822963438105e-06, "loss": 0.842, "step": 34405 }, { "epoch": 0.4193630945852071, "grad_norm": 1.946715235710144, "learning_rate": 3.0560615779345735e-06, "loss": 0.8827, "step": 34410 }, { "epoch": 0.4194240308093549, "grad_norm": 1.9829127788543701, "learning_rate": 3.055740859525337e-06, "loss": 0.8172, "step": 34415 }, { "epoch": 0.41948496703350274, "grad_norm": 1.9929841756820679, "learning_rate": 3.0554201411161004e-06, "loss": 0.9003, "step": 34420 }, { "epoch": 0.41954590325765057, "grad_norm": 1.9449992179870605, "learning_rate": 3.055099422706864e-06, "loss": 0.8592, "step": 34425 }, { "epoch": 0.41960683948179833, "grad_norm": 1.7445982694625854, "learning_rate": 3.054778704297627e-06, "loss": 0.8472, "step": 34430 }, { "epoch": 0.41966777570594616, "grad_norm": 1.8454735279083252, "learning_rate": 3.0544579858883903e-06, "loss": 0.8559, "step": 34435 }, { "epoch": 0.419728711930094, "grad_norm": 1.8819553852081299, "learning_rate": 3.0541372674791537e-06, "loss": 0.8136, "step": 34440 }, { "epoch": 0.41978964815424175, "grad_norm": 1.997478723526001, "learning_rate": 3.0538165490699167e-06, "loss": 0.8672, "step": 34445 }, { "epoch": 0.41985058437838957, "grad_norm": 1.885210633277893, "learning_rate": 3.0534958306606806e-06, "loss": 0.7623, "step": 34450 }, { "epoch": 0.4199115206025374, "grad_norm": 1.893836498260498, "learning_rate": 3.0531751122514436e-06, "loss": 0.8298, "step": 34455 }, { "epoch": 0.4199724568266852, "grad_norm": 1.8610364198684692, "learning_rate": 3.0528543938422066e-06, "loss": 0.8472, "step": 34460 }, { "epoch": 0.420033393050833, "grad_norm": 2.4186980724334717, "learning_rate": 3.0525336754329696e-06, "loss": 0.9147, "step": 34465 }, { "epoch": 0.4200943292749808, "grad_norm": 2.080662250518799, "learning_rate": 3.0522129570237335e-06, "loss": 0.8731, "step": 34470 }, { "epoch": 0.4201552654991286, "grad_norm": 1.8330446481704712, "learning_rate": 3.0518922386144965e-06, "loss": 0.8061, "step": 34475 }, { "epoch": 0.4202162017232764, "grad_norm": 1.7657750844955444, "learning_rate": 3.05157152020526e-06, "loss": 0.9487, "step": 34480 }, { "epoch": 0.4202771379474242, "grad_norm": 1.9099197387695312, "learning_rate": 3.0512508017960234e-06, "loss": 0.8038, "step": 34485 }, { "epoch": 0.42033807417157204, "grad_norm": 1.8102079629898071, "learning_rate": 3.050930083386787e-06, "loss": 0.8357, "step": 34490 }, { "epoch": 0.42039901039571986, "grad_norm": 2.350797414779663, "learning_rate": 3.05060936497755e-06, "loss": 0.8367, "step": 34495 }, { "epoch": 0.42045994661986763, "grad_norm": 2.1943414211273193, "learning_rate": 3.0502886465683133e-06, "loss": 0.8999, "step": 34500 }, { "epoch": 0.42052088284401545, "grad_norm": 1.9607664346694946, "learning_rate": 3.0499679281590767e-06, "loss": 0.8081, "step": 34505 }, { "epoch": 0.4205818190681633, "grad_norm": 1.793870449066162, "learning_rate": 3.0496472097498398e-06, "loss": 0.8941, "step": 34510 }, { "epoch": 0.42064275529231104, "grad_norm": 2.076045274734497, "learning_rate": 3.0493264913406036e-06, "loss": 0.8905, "step": 34515 }, { "epoch": 0.42070369151645887, "grad_norm": 2.0315823554992676, "learning_rate": 3.0490057729313666e-06, "loss": 0.8781, "step": 34520 }, { "epoch": 0.4207646277406067, "grad_norm": 2.535731792449951, "learning_rate": 3.0486850545221297e-06, "loss": 0.9177, "step": 34525 }, { "epoch": 0.4208255639647545, "grad_norm": 1.8415035009384155, "learning_rate": 3.0483643361128935e-06, "loss": 0.7983, "step": 34530 }, { "epoch": 0.4208865001889023, "grad_norm": 2.2732198238372803, "learning_rate": 3.0480436177036565e-06, "loss": 0.9114, "step": 34535 }, { "epoch": 0.4209474364130501, "grad_norm": 1.7990775108337402, "learning_rate": 3.0477228992944196e-06, "loss": 0.9203, "step": 34540 }, { "epoch": 0.4210083726371979, "grad_norm": 2.053912401199341, "learning_rate": 3.047402180885183e-06, "loss": 0.8476, "step": 34545 }, { "epoch": 0.4210693088613457, "grad_norm": 1.8246808052062988, "learning_rate": 3.0470814624759464e-06, "loss": 0.8688, "step": 34550 }, { "epoch": 0.4211302450854935, "grad_norm": 2.0623772144317627, "learning_rate": 3.0467607440667095e-06, "loss": 0.8126, "step": 34555 }, { "epoch": 0.42119118130964134, "grad_norm": 1.9479713439941406, "learning_rate": 3.046440025657473e-06, "loss": 0.8528, "step": 34560 }, { "epoch": 0.42125211753378916, "grad_norm": 1.8521583080291748, "learning_rate": 3.0461193072482363e-06, "loss": 0.864, "step": 34565 }, { "epoch": 0.42131305375793693, "grad_norm": 1.6791514158248901, "learning_rate": 3.0457985888389998e-06, "loss": 0.8295, "step": 34570 }, { "epoch": 0.42137398998208475, "grad_norm": 1.7901960611343384, "learning_rate": 3.045477870429763e-06, "loss": 0.9308, "step": 34575 }, { "epoch": 0.4214349262062326, "grad_norm": 2.0329325199127197, "learning_rate": 3.0451571520205262e-06, "loss": 0.8133, "step": 34580 }, { "epoch": 0.42149586243038034, "grad_norm": 1.6820108890533447, "learning_rate": 3.0448364336112897e-06, "loss": 0.7612, "step": 34585 }, { "epoch": 0.42155679865452816, "grad_norm": 1.6516882181167603, "learning_rate": 3.0445157152020527e-06, "loss": 0.8539, "step": 34590 }, { "epoch": 0.421617734878676, "grad_norm": 2.4524712562561035, "learning_rate": 3.0441949967928166e-06, "loss": 0.8782, "step": 34595 }, { "epoch": 0.4216786711028238, "grad_norm": 1.9687784910202026, "learning_rate": 3.0438742783835796e-06, "loss": 0.8504, "step": 34600 }, { "epoch": 0.4217396073269716, "grad_norm": 2.2507247924804688, "learning_rate": 3.0435535599743426e-06, "loss": 0.8122, "step": 34605 }, { "epoch": 0.4218005435511194, "grad_norm": 1.6985161304473877, "learning_rate": 3.0432328415651065e-06, "loss": 0.8399, "step": 34610 }, { "epoch": 0.4218614797752672, "grad_norm": 2.0490829944610596, "learning_rate": 3.0429121231558695e-06, "loss": 0.86, "step": 34615 }, { "epoch": 0.421922415999415, "grad_norm": 1.8396950960159302, "learning_rate": 3.0425914047466325e-06, "loss": 0.7829, "step": 34620 }, { "epoch": 0.4219833522235628, "grad_norm": 1.7483346462249756, "learning_rate": 3.042270686337396e-06, "loss": 0.8322, "step": 34625 }, { "epoch": 0.42204428844771064, "grad_norm": 2.212695598602295, "learning_rate": 3.0419499679281594e-06, "loss": 0.8719, "step": 34630 }, { "epoch": 0.42210522467185846, "grad_norm": 2.1747188568115234, "learning_rate": 3.0416292495189224e-06, "loss": 0.9084, "step": 34635 }, { "epoch": 0.4221661608960062, "grad_norm": 2.1865551471710205, "learning_rate": 3.041308531109686e-06, "loss": 0.8294, "step": 34640 }, { "epoch": 0.42222709712015405, "grad_norm": 2.153310537338257, "learning_rate": 3.0409878127004493e-06, "loss": 0.9031, "step": 34645 }, { "epoch": 0.42228803334430187, "grad_norm": 1.788417100906372, "learning_rate": 3.0406670942912127e-06, "loss": 0.85, "step": 34650 }, { "epoch": 0.42234896956844964, "grad_norm": 1.7389031648635864, "learning_rate": 3.0403463758819757e-06, "loss": 0.857, "step": 34655 }, { "epoch": 0.42240990579259746, "grad_norm": 2.012298107147217, "learning_rate": 3.040025657472739e-06, "loss": 0.823, "step": 34660 }, { "epoch": 0.4224708420167453, "grad_norm": 1.937637209892273, "learning_rate": 3.0397049390635026e-06, "loss": 0.8634, "step": 34665 }, { "epoch": 0.4225317782408931, "grad_norm": 2.1356823444366455, "learning_rate": 3.0393842206542656e-06, "loss": 0.8299, "step": 34670 }, { "epoch": 0.4225927144650409, "grad_norm": 1.974013328552246, "learning_rate": 3.0390635022450295e-06, "loss": 0.8423, "step": 34675 }, { "epoch": 0.4226536506891887, "grad_norm": 2.173398971557617, "learning_rate": 3.0387427838357925e-06, "loss": 0.8832, "step": 34680 }, { "epoch": 0.4227145869133365, "grad_norm": 1.8287780284881592, "learning_rate": 3.0384220654265555e-06, "loss": 0.8175, "step": 34685 }, { "epoch": 0.4227755231374843, "grad_norm": 2.1448967456817627, "learning_rate": 3.0381013470173194e-06, "loss": 0.8167, "step": 34690 }, { "epoch": 0.4228364593616321, "grad_norm": 2.0372257232666016, "learning_rate": 3.0377806286080824e-06, "loss": 0.8978, "step": 34695 }, { "epoch": 0.42289739558577993, "grad_norm": 1.7761993408203125, "learning_rate": 3.0374599101988454e-06, "loss": 0.9053, "step": 34700 }, { "epoch": 0.42295833180992776, "grad_norm": 1.9683170318603516, "learning_rate": 3.037139191789609e-06, "loss": 0.8457, "step": 34705 }, { "epoch": 0.4230192680340755, "grad_norm": 1.9848929643630981, "learning_rate": 3.0368184733803723e-06, "loss": 0.8358, "step": 34710 }, { "epoch": 0.42308020425822335, "grad_norm": 2.216160774230957, "learning_rate": 3.0364977549711357e-06, "loss": 0.8674, "step": 34715 }, { "epoch": 0.42314114048237117, "grad_norm": 2.3453197479248047, "learning_rate": 3.0361770365618987e-06, "loss": 0.8632, "step": 34720 }, { "epoch": 0.42320207670651894, "grad_norm": 1.7588250637054443, "learning_rate": 3.035856318152662e-06, "loss": 0.8197, "step": 34725 }, { "epoch": 0.42326301293066676, "grad_norm": 1.885666847229004, "learning_rate": 3.0355355997434256e-06, "loss": 0.8089, "step": 34730 }, { "epoch": 0.4233239491548146, "grad_norm": 1.9016751050949097, "learning_rate": 3.0352148813341886e-06, "loss": 0.9392, "step": 34735 }, { "epoch": 0.4233848853789624, "grad_norm": 2.074561357498169, "learning_rate": 3.0348941629249525e-06, "loss": 0.7357, "step": 34740 }, { "epoch": 0.4234458216031102, "grad_norm": 2.1935157775878906, "learning_rate": 3.0345734445157155e-06, "loss": 0.8276, "step": 34745 }, { "epoch": 0.423506757827258, "grad_norm": 2.0073564052581787, "learning_rate": 3.0342527261064785e-06, "loss": 0.8593, "step": 34750 }, { "epoch": 0.4235676940514058, "grad_norm": 2.337772846221924, "learning_rate": 3.0339320076972424e-06, "loss": 0.8546, "step": 34755 }, { "epoch": 0.4236286302755536, "grad_norm": 1.978991985321045, "learning_rate": 3.0336112892880054e-06, "loss": 0.936, "step": 34760 }, { "epoch": 0.4236895664997014, "grad_norm": 1.7737751007080078, "learning_rate": 3.0332905708787684e-06, "loss": 0.7946, "step": 34765 }, { "epoch": 0.42375050272384923, "grad_norm": 2.2423009872436523, "learning_rate": 3.0329698524695323e-06, "loss": 0.8705, "step": 34770 }, { "epoch": 0.423811438947997, "grad_norm": 2.4037506580352783, "learning_rate": 3.0326491340602953e-06, "loss": 0.9146, "step": 34775 }, { "epoch": 0.4238723751721448, "grad_norm": 1.7108477354049683, "learning_rate": 3.0323284156510583e-06, "loss": 0.8574, "step": 34780 }, { "epoch": 0.42393331139629264, "grad_norm": 1.9261281490325928, "learning_rate": 3.032007697241822e-06, "loss": 0.8904, "step": 34785 }, { "epoch": 0.42399424762044047, "grad_norm": 1.9177676439285278, "learning_rate": 3.0316869788325852e-06, "loss": 0.8488, "step": 34790 }, { "epoch": 0.42405518384458823, "grad_norm": 1.8742775917053223, "learning_rate": 3.0313662604233487e-06, "loss": 0.8887, "step": 34795 }, { "epoch": 0.42411612006873606, "grad_norm": 2.0598208904266357, "learning_rate": 3.0310455420141117e-06, "loss": 0.7997, "step": 34800 }, { "epoch": 0.4241770562928839, "grad_norm": 1.941321849822998, "learning_rate": 3.030724823604875e-06, "loss": 0.8263, "step": 34805 }, { "epoch": 0.42423799251703165, "grad_norm": 2.0616157054901123, "learning_rate": 3.0304041051956386e-06, "loss": 0.8243, "step": 34810 }, { "epoch": 0.42429892874117947, "grad_norm": 2.053004264831543, "learning_rate": 3.0300833867864016e-06, "loss": 0.876, "step": 34815 }, { "epoch": 0.4243598649653273, "grad_norm": 2.143709659576416, "learning_rate": 3.0297626683771654e-06, "loss": 0.8323, "step": 34820 }, { "epoch": 0.4244208011894751, "grad_norm": 2.080841541290283, "learning_rate": 3.0294419499679285e-06, "loss": 0.8419, "step": 34825 }, { "epoch": 0.4244817374136229, "grad_norm": 1.6754494905471802, "learning_rate": 3.0291212315586915e-06, "loss": 0.844, "step": 34830 }, { "epoch": 0.4245426736377707, "grad_norm": 1.864526391029358, "learning_rate": 3.0288005131494553e-06, "loss": 0.7657, "step": 34835 }, { "epoch": 0.42460360986191853, "grad_norm": 2.061359167098999, "learning_rate": 3.0284797947402184e-06, "loss": 0.8265, "step": 34840 }, { "epoch": 0.4246645460860663, "grad_norm": 1.9720426797866821, "learning_rate": 3.0281590763309814e-06, "loss": 0.8465, "step": 34845 }, { "epoch": 0.4247254823102141, "grad_norm": 1.7925461530685425, "learning_rate": 3.0278383579217452e-06, "loss": 0.8293, "step": 34850 }, { "epoch": 0.42478641853436194, "grad_norm": 2.011965751647949, "learning_rate": 3.0275176395125082e-06, "loss": 0.8772, "step": 34855 }, { "epoch": 0.42484735475850977, "grad_norm": 1.9164131879806519, "learning_rate": 3.0271969211032713e-06, "loss": 0.8603, "step": 34860 }, { "epoch": 0.42490829098265753, "grad_norm": 2.3936705589294434, "learning_rate": 3.026876202694035e-06, "loss": 0.8697, "step": 34865 }, { "epoch": 0.42496922720680536, "grad_norm": 1.6707756519317627, "learning_rate": 3.026555484284798e-06, "loss": 0.7891, "step": 34870 }, { "epoch": 0.4250301634309532, "grad_norm": 2.1843504905700684, "learning_rate": 3.0262347658755616e-06, "loss": 0.8043, "step": 34875 }, { "epoch": 0.42509109965510095, "grad_norm": 2.0916714668273926, "learning_rate": 3.0259140474663246e-06, "loss": 0.9406, "step": 34880 }, { "epoch": 0.42515203587924877, "grad_norm": 1.6595025062561035, "learning_rate": 3.025593329057088e-06, "loss": 0.8604, "step": 34885 }, { "epoch": 0.4252129721033966, "grad_norm": 1.757330060005188, "learning_rate": 3.0252726106478515e-06, "loss": 0.8358, "step": 34890 }, { "epoch": 0.4252739083275444, "grad_norm": 1.8417532444000244, "learning_rate": 3.0249518922386145e-06, "loss": 0.857, "step": 34895 }, { "epoch": 0.4253348445516922, "grad_norm": 1.7665138244628906, "learning_rate": 3.0246311738293784e-06, "loss": 0.8197, "step": 34900 }, { "epoch": 0.42539578077584, "grad_norm": 2.092958688735962, "learning_rate": 3.0243104554201414e-06, "loss": 0.8814, "step": 34905 }, { "epoch": 0.4254567169999878, "grad_norm": 1.7529715299606323, "learning_rate": 3.0239897370109044e-06, "loss": 0.8765, "step": 34910 }, { "epoch": 0.4255176532241356, "grad_norm": 1.8643863201141357, "learning_rate": 3.0236690186016683e-06, "loss": 0.7656, "step": 34915 }, { "epoch": 0.4255785894482834, "grad_norm": 2.092787981033325, "learning_rate": 3.0233483001924313e-06, "loss": 0.8705, "step": 34920 }, { "epoch": 0.42563952567243124, "grad_norm": 2.080392599105835, "learning_rate": 3.0230275817831943e-06, "loss": 0.8915, "step": 34925 }, { "epoch": 0.42570046189657906, "grad_norm": 1.9226069450378418, "learning_rate": 3.022706863373958e-06, "loss": 0.8501, "step": 34930 }, { "epoch": 0.42576139812072683, "grad_norm": 2.309701919555664, "learning_rate": 3.022386144964721e-06, "loss": 0.8383, "step": 34935 }, { "epoch": 0.42582233434487465, "grad_norm": 1.9204561710357666, "learning_rate": 3.0220654265554846e-06, "loss": 0.8144, "step": 34940 }, { "epoch": 0.4258832705690225, "grad_norm": 1.8696556091308594, "learning_rate": 3.021744708146248e-06, "loss": 0.7751, "step": 34945 }, { "epoch": 0.42594420679317024, "grad_norm": 1.916461706161499, "learning_rate": 3.021423989737011e-06, "loss": 0.7869, "step": 34950 }, { "epoch": 0.42600514301731807, "grad_norm": 1.920570969581604, "learning_rate": 3.0211032713277745e-06, "loss": 0.8694, "step": 34955 }, { "epoch": 0.4260660792414659, "grad_norm": 1.8829983472824097, "learning_rate": 3.0207825529185375e-06, "loss": 0.8624, "step": 34960 }, { "epoch": 0.4261270154656137, "grad_norm": 2.064875602722168, "learning_rate": 3.0204618345093014e-06, "loss": 0.8814, "step": 34965 }, { "epoch": 0.4261879516897615, "grad_norm": 1.8374468088150024, "learning_rate": 3.0201411161000644e-06, "loss": 0.8484, "step": 34970 }, { "epoch": 0.4262488879139093, "grad_norm": 1.93013334274292, "learning_rate": 3.0198203976908274e-06, "loss": 0.8961, "step": 34975 }, { "epoch": 0.4263098241380571, "grad_norm": 2.425889730453491, "learning_rate": 3.0194996792815913e-06, "loss": 0.8178, "step": 34980 }, { "epoch": 0.4263707603622049, "grad_norm": 1.69874906539917, "learning_rate": 3.0191789608723543e-06, "loss": 0.8492, "step": 34985 }, { "epoch": 0.4264316965863527, "grad_norm": 2.032130241394043, "learning_rate": 3.0188582424631173e-06, "loss": 0.884, "step": 34990 }, { "epoch": 0.42649263281050054, "grad_norm": 2.016599655151367, "learning_rate": 3.018537524053881e-06, "loss": 0.8445, "step": 34995 }, { "epoch": 0.42655356903464836, "grad_norm": 1.9799062013626099, "learning_rate": 3.018216805644644e-06, "loss": 0.8032, "step": 35000 }, { "epoch": 0.42661450525879613, "grad_norm": 1.7831919193267822, "learning_rate": 3.0178960872354072e-06, "loss": 0.8184, "step": 35005 }, { "epoch": 0.42667544148294395, "grad_norm": 1.8593477010726929, "learning_rate": 3.017575368826171e-06, "loss": 0.9205, "step": 35010 }, { "epoch": 0.4267363777070918, "grad_norm": 1.9726014137268066, "learning_rate": 3.017254650416934e-06, "loss": 0.8534, "step": 35015 }, { "epoch": 0.42679731393123954, "grad_norm": 1.8180644512176514, "learning_rate": 3.0169339320076975e-06, "loss": 0.7822, "step": 35020 }, { "epoch": 0.42685825015538736, "grad_norm": 2.1710197925567627, "learning_rate": 3.016613213598461e-06, "loss": 0.8133, "step": 35025 }, { "epoch": 0.4269191863795352, "grad_norm": 2.0024971961975098, "learning_rate": 3.016292495189224e-06, "loss": 0.8472, "step": 35030 }, { "epoch": 0.426980122603683, "grad_norm": 2.3519272804260254, "learning_rate": 3.0159717767799874e-06, "loss": 0.869, "step": 35035 }, { "epoch": 0.4270410588278308, "grad_norm": 1.9289915561676025, "learning_rate": 3.0156510583707505e-06, "loss": 0.7739, "step": 35040 }, { "epoch": 0.4271019950519786, "grad_norm": 1.7505207061767578, "learning_rate": 3.0153303399615143e-06, "loss": 0.8303, "step": 35045 }, { "epoch": 0.4271629312761264, "grad_norm": 1.8477953672409058, "learning_rate": 3.0150096215522773e-06, "loss": 0.8566, "step": 35050 }, { "epoch": 0.4272238675002742, "grad_norm": 1.85166335105896, "learning_rate": 3.0146889031430404e-06, "loss": 0.8999, "step": 35055 }, { "epoch": 0.427284803724422, "grad_norm": 2.1460416316986084, "learning_rate": 3.0143681847338042e-06, "loss": 0.8719, "step": 35060 }, { "epoch": 0.42734573994856984, "grad_norm": 2.206510305404663, "learning_rate": 3.0140474663245672e-06, "loss": 0.8437, "step": 35065 }, { "epoch": 0.42740667617271766, "grad_norm": 2.0815892219543457, "learning_rate": 3.0137267479153303e-06, "loss": 0.9257, "step": 35070 }, { "epoch": 0.4274676123968654, "grad_norm": 1.743653416633606, "learning_rate": 3.013406029506094e-06, "loss": 0.872, "step": 35075 }, { "epoch": 0.42752854862101325, "grad_norm": 1.8498305082321167, "learning_rate": 3.013085311096857e-06, "loss": 0.824, "step": 35080 }, { "epoch": 0.42758948484516107, "grad_norm": 1.9911352396011353, "learning_rate": 3.01276459268762e-06, "loss": 0.7831, "step": 35085 }, { "epoch": 0.42765042106930884, "grad_norm": 1.8065134286880493, "learning_rate": 3.012443874278384e-06, "loss": 0.8567, "step": 35090 }, { "epoch": 0.42771135729345666, "grad_norm": 1.9145338535308838, "learning_rate": 3.012123155869147e-06, "loss": 0.885, "step": 35095 }, { "epoch": 0.4277722935176045, "grad_norm": 2.053370237350464, "learning_rate": 3.0118024374599105e-06, "loss": 0.9032, "step": 35100 }, { "epoch": 0.4278332297417523, "grad_norm": 2.190919876098633, "learning_rate": 3.011481719050674e-06, "loss": 0.8171, "step": 35105 }, { "epoch": 0.4278941659659001, "grad_norm": 2.0936689376831055, "learning_rate": 3.011161000641437e-06, "loss": 0.924, "step": 35110 }, { "epoch": 0.4279551021900479, "grad_norm": 1.8472199440002441, "learning_rate": 3.0108402822322004e-06, "loss": 0.889, "step": 35115 }, { "epoch": 0.4280160384141957, "grad_norm": 2.155844211578369, "learning_rate": 3.010519563822964e-06, "loss": 0.8436, "step": 35120 }, { "epoch": 0.4280769746383435, "grad_norm": 1.569496512413025, "learning_rate": 3.0101988454137272e-06, "loss": 0.837, "step": 35125 }, { "epoch": 0.4281379108624913, "grad_norm": 1.7930097579956055, "learning_rate": 3.0098781270044903e-06, "loss": 0.9142, "step": 35130 }, { "epoch": 0.42819884708663913, "grad_norm": 1.8340154886245728, "learning_rate": 3.0095574085952533e-06, "loss": 0.8249, "step": 35135 }, { "epoch": 0.42825978331078696, "grad_norm": 2.1317241191864014, "learning_rate": 3.009236690186017e-06, "loss": 0.8572, "step": 35140 }, { "epoch": 0.4283207195349347, "grad_norm": 2.139770269393921, "learning_rate": 3.00891597177678e-06, "loss": 0.8854, "step": 35145 }, { "epoch": 0.42838165575908255, "grad_norm": 1.992035984992981, "learning_rate": 3.008595253367543e-06, "loss": 0.875, "step": 35150 }, { "epoch": 0.42844259198323037, "grad_norm": 1.9923350811004639, "learning_rate": 3.008274534958307e-06, "loss": 0.849, "step": 35155 }, { "epoch": 0.42850352820737814, "grad_norm": 1.7789418697357178, "learning_rate": 3.00795381654907e-06, "loss": 0.8264, "step": 35160 }, { "epoch": 0.42856446443152596, "grad_norm": 2.1549487113952637, "learning_rate": 3.007633098139833e-06, "loss": 0.8219, "step": 35165 }, { "epoch": 0.4286254006556738, "grad_norm": 1.728714942932129, "learning_rate": 3.007312379730597e-06, "loss": 0.8245, "step": 35170 }, { "epoch": 0.4286863368798216, "grad_norm": 2.261526584625244, "learning_rate": 3.00699166132136e-06, "loss": 0.7934, "step": 35175 }, { "epoch": 0.4287472731039694, "grad_norm": 2.2644269466400146, "learning_rate": 3.0066709429121234e-06, "loss": 0.8126, "step": 35180 }, { "epoch": 0.4288082093281172, "grad_norm": 2.059847354888916, "learning_rate": 3.006350224502887e-06, "loss": 0.8421, "step": 35185 }, { "epoch": 0.428869145552265, "grad_norm": 1.8426045179367065, "learning_rate": 3.0060295060936503e-06, "loss": 0.8962, "step": 35190 }, { "epoch": 0.4289300817764128, "grad_norm": 2.443516254425049, "learning_rate": 3.0057087876844133e-06, "loss": 0.8377, "step": 35195 }, { "epoch": 0.4289910180005606, "grad_norm": 1.6294137239456177, "learning_rate": 3.0053880692751767e-06, "loss": 0.8889, "step": 35200 }, { "epoch": 0.42905195422470843, "grad_norm": 1.5880197286605835, "learning_rate": 3.00506735086594e-06, "loss": 0.8598, "step": 35205 }, { "epoch": 0.42911289044885625, "grad_norm": 1.9128425121307373, "learning_rate": 3.004746632456703e-06, "loss": 0.8614, "step": 35210 }, { "epoch": 0.429173826673004, "grad_norm": 1.935895323753357, "learning_rate": 3.004425914047466e-06, "loss": 0.7986, "step": 35215 }, { "epoch": 0.42923476289715184, "grad_norm": 2.191567897796631, "learning_rate": 3.00410519563823e-06, "loss": 0.8412, "step": 35220 }, { "epoch": 0.42929569912129967, "grad_norm": 1.7824937105178833, "learning_rate": 3.003784477228993e-06, "loss": 0.821, "step": 35225 }, { "epoch": 0.42935663534544743, "grad_norm": 1.8095813989639282, "learning_rate": 3.003463758819756e-06, "loss": 0.8898, "step": 35230 }, { "epoch": 0.42941757156959526, "grad_norm": 2.1620635986328125, "learning_rate": 3.00314304041052e-06, "loss": 0.8646, "step": 35235 }, { "epoch": 0.4294785077937431, "grad_norm": 1.8643922805786133, "learning_rate": 3.002822322001283e-06, "loss": 0.7899, "step": 35240 }, { "epoch": 0.42953944401789085, "grad_norm": 1.8140337467193604, "learning_rate": 3.0025016035920464e-06, "loss": 0.7945, "step": 35245 }, { "epoch": 0.42960038024203867, "grad_norm": 1.7034478187561035, "learning_rate": 3.00218088518281e-06, "loss": 0.8986, "step": 35250 }, { "epoch": 0.4296613164661865, "grad_norm": 1.8692207336425781, "learning_rate": 3.001860166773573e-06, "loss": 0.8052, "step": 35255 }, { "epoch": 0.4297222526903343, "grad_norm": 1.831275463104248, "learning_rate": 3.0015394483643363e-06, "loss": 0.8052, "step": 35260 }, { "epoch": 0.4297831889144821, "grad_norm": 2.3250479698181152, "learning_rate": 3.0012187299550998e-06, "loss": 0.8982, "step": 35265 }, { "epoch": 0.4298441251386299, "grad_norm": 1.9519811868667603, "learning_rate": 3.000898011545863e-06, "loss": 0.8131, "step": 35270 }, { "epoch": 0.42990506136277773, "grad_norm": 1.9368349313735962, "learning_rate": 3.0005772931366262e-06, "loss": 0.8576, "step": 35275 }, { "epoch": 0.4299659975869255, "grad_norm": 1.8563733100891113, "learning_rate": 3.0002565747273897e-06, "loss": 0.831, "step": 35280 }, { "epoch": 0.4300269338110733, "grad_norm": 1.7199187278747559, "learning_rate": 2.999935856318153e-06, "loss": 0.8835, "step": 35285 }, { "epoch": 0.43008787003522114, "grad_norm": 1.9144864082336426, "learning_rate": 2.999615137908916e-06, "loss": 0.776, "step": 35290 }, { "epoch": 0.43014880625936897, "grad_norm": 1.867552399635315, "learning_rate": 2.999294419499679e-06, "loss": 0.8544, "step": 35295 }, { "epoch": 0.43020974248351673, "grad_norm": 2.0694286823272705, "learning_rate": 2.998973701090443e-06, "loss": 0.7961, "step": 35300 }, { "epoch": 0.43027067870766456, "grad_norm": 1.8011221885681152, "learning_rate": 2.998652982681206e-06, "loss": 0.8177, "step": 35305 }, { "epoch": 0.4303316149318124, "grad_norm": 1.9728786945343018, "learning_rate": 2.998332264271969e-06, "loss": 0.8188, "step": 35310 }, { "epoch": 0.43039255115596015, "grad_norm": 2.0880515575408936, "learning_rate": 2.998011545862733e-06, "loss": 0.8569, "step": 35315 }, { "epoch": 0.43045348738010797, "grad_norm": 1.9452593326568604, "learning_rate": 2.997690827453496e-06, "loss": 0.8107, "step": 35320 }, { "epoch": 0.4305144236042558, "grad_norm": 1.9147270917892456, "learning_rate": 2.9973701090442594e-06, "loss": 0.8909, "step": 35325 }, { "epoch": 0.4305753598284036, "grad_norm": 1.7402392625808716, "learning_rate": 2.997049390635023e-06, "loss": 0.8043, "step": 35330 }, { "epoch": 0.4306362960525514, "grad_norm": 1.6244233846664429, "learning_rate": 2.996728672225786e-06, "loss": 0.825, "step": 35335 }, { "epoch": 0.4306972322766992, "grad_norm": 1.928637981414795, "learning_rate": 2.9964079538165493e-06, "loss": 0.8015, "step": 35340 }, { "epoch": 0.430758168500847, "grad_norm": 1.9667154550552368, "learning_rate": 2.9960872354073127e-06, "loss": 0.8677, "step": 35345 }, { "epoch": 0.4308191047249948, "grad_norm": 2.3189914226531982, "learning_rate": 2.995766516998076e-06, "loss": 0.8901, "step": 35350 }, { "epoch": 0.4308800409491426, "grad_norm": 2.074470043182373, "learning_rate": 2.995445798588839e-06, "loss": 0.8215, "step": 35355 }, { "epoch": 0.43094097717329044, "grad_norm": 1.789475440979004, "learning_rate": 2.9951250801796026e-06, "loss": 0.8161, "step": 35360 }, { "epoch": 0.43100191339743826, "grad_norm": 2.1387743949890137, "learning_rate": 2.994804361770366e-06, "loss": 0.8371, "step": 35365 }, { "epoch": 0.43106284962158603, "grad_norm": 2.3904764652252197, "learning_rate": 2.994483643361129e-06, "loss": 0.8628, "step": 35370 }, { "epoch": 0.43112378584573385, "grad_norm": 2.083763837814331, "learning_rate": 2.994162924951893e-06, "loss": 0.837, "step": 35375 }, { "epoch": 0.4311847220698817, "grad_norm": 2.4427449703216553, "learning_rate": 2.993842206542656e-06, "loss": 0.8539, "step": 35380 }, { "epoch": 0.43124565829402944, "grad_norm": 1.822725772857666, "learning_rate": 2.993521488133419e-06, "loss": 0.8174, "step": 35385 }, { "epoch": 0.43130659451817727, "grad_norm": 2.21716570854187, "learning_rate": 2.993200769724182e-06, "loss": 0.7811, "step": 35390 }, { "epoch": 0.4313675307423251, "grad_norm": 1.8328114748001099, "learning_rate": 2.992880051314946e-06, "loss": 0.9007, "step": 35395 }, { "epoch": 0.4314284669664729, "grad_norm": 2.26554799079895, "learning_rate": 2.992559332905709e-06, "loss": 0.9009, "step": 35400 }, { "epoch": 0.4314894031906207, "grad_norm": 1.9343595504760742, "learning_rate": 2.9922386144964723e-06, "loss": 0.8014, "step": 35405 }, { "epoch": 0.4315503394147685, "grad_norm": 1.8750841617584229, "learning_rate": 2.9919178960872357e-06, "loss": 0.8764, "step": 35410 }, { "epoch": 0.4316112756389163, "grad_norm": 1.6048027276992798, "learning_rate": 2.991597177677999e-06, "loss": 0.8216, "step": 35415 }, { "epoch": 0.4316722118630641, "grad_norm": 1.7875170707702637, "learning_rate": 2.991276459268762e-06, "loss": 0.7761, "step": 35420 }, { "epoch": 0.4317331480872119, "grad_norm": 1.9400699138641357, "learning_rate": 2.9909557408595256e-06, "loss": 0.868, "step": 35425 }, { "epoch": 0.43179408431135974, "grad_norm": 1.714342474937439, "learning_rate": 2.990635022450289e-06, "loss": 0.8464, "step": 35430 }, { "epoch": 0.43185502053550756, "grad_norm": 1.8881865739822388, "learning_rate": 2.990314304041052e-06, "loss": 0.8391, "step": 35435 }, { "epoch": 0.43191595675965533, "grad_norm": 2.01356840133667, "learning_rate": 2.989993585631816e-06, "loss": 0.8675, "step": 35440 }, { "epoch": 0.43197689298380315, "grad_norm": 2.1757562160491943, "learning_rate": 2.989672867222579e-06, "loss": 0.86, "step": 35445 }, { "epoch": 0.432037829207951, "grad_norm": 1.9708836078643799, "learning_rate": 2.989352148813342e-06, "loss": 0.8685, "step": 35450 }, { "epoch": 0.43209876543209874, "grad_norm": 2.2239346504211426, "learning_rate": 2.989031430404106e-06, "loss": 0.8541, "step": 35455 }, { "epoch": 0.43215970165624656, "grad_norm": 2.6005117893218994, "learning_rate": 2.988710711994869e-06, "loss": 0.8333, "step": 35460 }, { "epoch": 0.4322206378803944, "grad_norm": 2.0823311805725098, "learning_rate": 2.988389993585632e-06, "loss": 0.832, "step": 35465 }, { "epoch": 0.4322815741045422, "grad_norm": 2.036139726638794, "learning_rate": 2.9880692751763953e-06, "loss": 0.8684, "step": 35470 }, { "epoch": 0.43234251032869, "grad_norm": 1.9785876274108887, "learning_rate": 2.9877485567671588e-06, "loss": 0.8489, "step": 35475 }, { "epoch": 0.4324034465528378, "grad_norm": 1.5880775451660156, "learning_rate": 2.9874278383579218e-06, "loss": 0.8441, "step": 35480 }, { "epoch": 0.4324643827769856, "grad_norm": 1.7914977073669434, "learning_rate": 2.987107119948685e-06, "loss": 0.8291, "step": 35485 }, { "epoch": 0.4325253190011334, "grad_norm": 1.8266392946243286, "learning_rate": 2.9867864015394486e-06, "loss": 0.8402, "step": 35490 }, { "epoch": 0.4325862552252812, "grad_norm": 2.1539313793182373, "learning_rate": 2.986465683130212e-06, "loss": 0.8095, "step": 35495 }, { "epoch": 0.43264719144942904, "grad_norm": 1.9365371465682983, "learning_rate": 2.986144964720975e-06, "loss": 0.789, "step": 35500 }, { "epoch": 0.43270812767357686, "grad_norm": 2.2455801963806152, "learning_rate": 2.9858242463117385e-06, "loss": 0.8809, "step": 35505 }, { "epoch": 0.4327690638977246, "grad_norm": 1.8159732818603516, "learning_rate": 2.985503527902502e-06, "loss": 0.8586, "step": 35510 }, { "epoch": 0.43283000012187245, "grad_norm": 1.9126336574554443, "learning_rate": 2.985182809493265e-06, "loss": 0.8737, "step": 35515 }, { "epoch": 0.43289093634602027, "grad_norm": 1.9022778272628784, "learning_rate": 2.984862091084029e-06, "loss": 0.8866, "step": 35520 }, { "epoch": 0.43295187257016804, "grad_norm": 1.8711962699890137, "learning_rate": 2.984541372674792e-06, "loss": 0.8182, "step": 35525 }, { "epoch": 0.43301280879431586, "grad_norm": 1.884233832359314, "learning_rate": 2.984220654265555e-06, "loss": 0.8657, "step": 35530 }, { "epoch": 0.4330737450184637, "grad_norm": 1.852347493171692, "learning_rate": 2.9838999358563188e-06, "loss": 0.853, "step": 35535 }, { "epoch": 0.4331346812426115, "grad_norm": 1.9921622276306152, "learning_rate": 2.9835792174470818e-06, "loss": 0.9032, "step": 35540 }, { "epoch": 0.4331956174667593, "grad_norm": 1.8545500040054321, "learning_rate": 2.983258499037845e-06, "loss": 0.8281, "step": 35545 }, { "epoch": 0.4332565536909071, "grad_norm": 1.8057222366333008, "learning_rate": 2.9829377806286082e-06, "loss": 0.833, "step": 35550 }, { "epoch": 0.4333174899150549, "grad_norm": 1.9442670345306396, "learning_rate": 2.9826170622193717e-06, "loss": 0.8291, "step": 35555 }, { "epoch": 0.4333784261392027, "grad_norm": 2.005725860595703, "learning_rate": 2.9822963438101347e-06, "loss": 0.8657, "step": 35560 }, { "epoch": 0.4334393623633505, "grad_norm": 1.7817927598953247, "learning_rate": 2.981975625400898e-06, "loss": 0.8246, "step": 35565 }, { "epoch": 0.43350029858749833, "grad_norm": 1.9201925992965698, "learning_rate": 2.9816549069916616e-06, "loss": 0.8029, "step": 35570 }, { "epoch": 0.43356123481164616, "grad_norm": 1.9528815746307373, "learning_rate": 2.981334188582425e-06, "loss": 0.8139, "step": 35575 }, { "epoch": 0.4336221710357939, "grad_norm": 1.9334487915039062, "learning_rate": 2.981013470173188e-06, "loss": 0.7614, "step": 35580 }, { "epoch": 0.43368310725994175, "grad_norm": 1.7448370456695557, "learning_rate": 2.9806927517639515e-06, "loss": 0.8319, "step": 35585 }, { "epoch": 0.43374404348408957, "grad_norm": 1.810657262802124, "learning_rate": 2.980372033354715e-06, "loss": 0.9017, "step": 35590 }, { "epoch": 0.43380497970823734, "grad_norm": 1.9428589344024658, "learning_rate": 2.980051314945478e-06, "loss": 0.8458, "step": 35595 }, { "epoch": 0.43386591593238516, "grad_norm": 2.1609842777252197, "learning_rate": 2.979730596536242e-06, "loss": 0.8856, "step": 35600 }, { "epoch": 0.433926852156533, "grad_norm": 2.0080740451812744, "learning_rate": 2.979409878127005e-06, "loss": 0.9309, "step": 35605 }, { "epoch": 0.4339877883806808, "grad_norm": 2.1346704959869385, "learning_rate": 2.979089159717768e-06, "loss": 0.8052, "step": 35610 }, { "epoch": 0.4340487246048286, "grad_norm": 1.71002197265625, "learning_rate": 2.9787684413085317e-06, "loss": 0.8938, "step": 35615 }, { "epoch": 0.4341096608289764, "grad_norm": 2.0844454765319824, "learning_rate": 2.9784477228992947e-06, "loss": 0.8882, "step": 35620 }, { "epoch": 0.4341705970531242, "grad_norm": 1.7947885990142822, "learning_rate": 2.9781270044900577e-06, "loss": 0.8465, "step": 35625 }, { "epoch": 0.434231533277272, "grad_norm": 1.7814356088638306, "learning_rate": 2.977806286080821e-06, "loss": 0.7763, "step": 35630 }, { "epoch": 0.4342924695014198, "grad_norm": 1.8711017370224, "learning_rate": 2.9774855676715846e-06, "loss": 0.8126, "step": 35635 }, { "epoch": 0.43435340572556763, "grad_norm": 1.9642901420593262, "learning_rate": 2.9771648492623476e-06, "loss": 0.8858, "step": 35640 }, { "epoch": 0.43441434194971545, "grad_norm": 2.1925549507141113, "learning_rate": 2.976844130853111e-06, "loss": 0.8899, "step": 35645 }, { "epoch": 0.4344752781738632, "grad_norm": 2.124109983444214, "learning_rate": 2.9765234124438745e-06, "loss": 0.862, "step": 35650 }, { "epoch": 0.43453621439801104, "grad_norm": 1.66398286819458, "learning_rate": 2.976202694034638e-06, "loss": 0.7966, "step": 35655 }, { "epoch": 0.43459715062215887, "grad_norm": 2.0376298427581787, "learning_rate": 2.975881975625401e-06, "loss": 0.8048, "step": 35660 }, { "epoch": 0.43465808684630663, "grad_norm": 2.2171292304992676, "learning_rate": 2.975561257216165e-06, "loss": 0.8275, "step": 35665 }, { "epoch": 0.43471902307045446, "grad_norm": 1.3309258222579956, "learning_rate": 2.975240538806928e-06, "loss": 0.8596, "step": 35670 }, { "epoch": 0.4347799592946023, "grad_norm": 1.9682488441467285, "learning_rate": 2.974919820397691e-06, "loss": 0.7967, "step": 35675 }, { "epoch": 0.4348408955187501, "grad_norm": 1.7791246175765991, "learning_rate": 2.9745991019884547e-06, "loss": 0.8732, "step": 35680 }, { "epoch": 0.43490183174289787, "grad_norm": 1.734655737876892, "learning_rate": 2.9742783835792177e-06, "loss": 0.8172, "step": 35685 }, { "epoch": 0.4349627679670457, "grad_norm": 2.074453830718994, "learning_rate": 2.9739576651699808e-06, "loss": 0.8186, "step": 35690 }, { "epoch": 0.4350237041911935, "grad_norm": 1.8826876878738403, "learning_rate": 2.9736369467607446e-06, "loss": 0.7801, "step": 35695 }, { "epoch": 0.4350846404153413, "grad_norm": 2.2890255451202393, "learning_rate": 2.9733162283515076e-06, "loss": 0.9462, "step": 35700 }, { "epoch": 0.4351455766394891, "grad_norm": 1.7801876068115234, "learning_rate": 2.9729955099422707e-06, "loss": 0.8208, "step": 35705 }, { "epoch": 0.43520651286363693, "grad_norm": 1.964881420135498, "learning_rate": 2.9726747915330345e-06, "loss": 0.9046, "step": 35710 }, { "epoch": 0.4352674490877847, "grad_norm": 1.9428939819335938, "learning_rate": 2.9723540731237975e-06, "loss": 0.9558, "step": 35715 }, { "epoch": 0.4353283853119325, "grad_norm": 1.8957083225250244, "learning_rate": 2.972033354714561e-06, "loss": 0.8265, "step": 35720 }, { "epoch": 0.43538932153608034, "grad_norm": 1.8294750452041626, "learning_rate": 2.971712636305324e-06, "loss": 0.7469, "step": 35725 }, { "epoch": 0.43545025776022817, "grad_norm": 1.9503507614135742, "learning_rate": 2.9713919178960874e-06, "loss": 0.8517, "step": 35730 }, { "epoch": 0.43551119398437593, "grad_norm": 2.922093629837036, "learning_rate": 2.971071199486851e-06, "loss": 0.8669, "step": 35735 }, { "epoch": 0.43557213020852376, "grad_norm": 1.8825536966323853, "learning_rate": 2.970750481077614e-06, "loss": 0.9156, "step": 35740 }, { "epoch": 0.4356330664326716, "grad_norm": 2.128945827484131, "learning_rate": 2.9704297626683778e-06, "loss": 0.7993, "step": 35745 }, { "epoch": 0.43569400265681935, "grad_norm": 1.6892465353012085, "learning_rate": 2.9701090442591408e-06, "loss": 0.7949, "step": 35750 }, { "epoch": 0.43575493888096717, "grad_norm": 1.9713281393051147, "learning_rate": 2.9697883258499038e-06, "loss": 0.8337, "step": 35755 }, { "epoch": 0.435815875105115, "grad_norm": 1.8407124280929565, "learning_rate": 2.9694676074406676e-06, "loss": 0.8303, "step": 35760 }, { "epoch": 0.4358768113292628, "grad_norm": 2.0485293865203857, "learning_rate": 2.9691468890314307e-06, "loss": 0.9071, "step": 35765 }, { "epoch": 0.4359377475534106, "grad_norm": 1.8856263160705566, "learning_rate": 2.9688261706221937e-06, "loss": 0.778, "step": 35770 }, { "epoch": 0.4359986837775584, "grad_norm": 2.5989835262298584, "learning_rate": 2.9685054522129575e-06, "loss": 0.8218, "step": 35775 }, { "epoch": 0.4360596200017062, "grad_norm": 1.8903945684432983, "learning_rate": 2.9681847338037206e-06, "loss": 0.8498, "step": 35780 }, { "epoch": 0.436120556225854, "grad_norm": 1.7273166179656982, "learning_rate": 2.9678640153944836e-06, "loss": 0.8466, "step": 35785 }, { "epoch": 0.4361814924500018, "grad_norm": 1.8626867532730103, "learning_rate": 2.9675432969852474e-06, "loss": 0.8545, "step": 35790 }, { "epoch": 0.43624242867414964, "grad_norm": 1.9173507690429688, "learning_rate": 2.9672225785760105e-06, "loss": 0.8331, "step": 35795 }, { "epoch": 0.43630336489829746, "grad_norm": 2.009636402130127, "learning_rate": 2.966901860166774e-06, "loss": 0.8015, "step": 35800 }, { "epoch": 0.43636430112244523, "grad_norm": 1.7927706241607666, "learning_rate": 2.966581141757537e-06, "loss": 0.8224, "step": 35805 }, { "epoch": 0.43642523734659305, "grad_norm": 1.8503566980361938, "learning_rate": 2.9662604233483004e-06, "loss": 0.8947, "step": 35810 }, { "epoch": 0.4364861735707409, "grad_norm": 2.0974483489990234, "learning_rate": 2.965939704939064e-06, "loss": 0.8656, "step": 35815 }, { "epoch": 0.43654710979488864, "grad_norm": 1.8779444694519043, "learning_rate": 2.965618986529827e-06, "loss": 0.8222, "step": 35820 }, { "epoch": 0.43660804601903647, "grad_norm": 2.0198466777801514, "learning_rate": 2.9652982681205907e-06, "loss": 0.9252, "step": 35825 }, { "epoch": 0.4366689822431843, "grad_norm": 2.0921390056610107, "learning_rate": 2.9649775497113537e-06, "loss": 0.8207, "step": 35830 }, { "epoch": 0.4367299184673321, "grad_norm": 1.9712132215499878, "learning_rate": 2.9646568313021167e-06, "loss": 0.8104, "step": 35835 }, { "epoch": 0.4367908546914799, "grad_norm": 1.8555160760879517, "learning_rate": 2.9643361128928806e-06, "loss": 0.8771, "step": 35840 }, { "epoch": 0.4368517909156277, "grad_norm": 1.8749423027038574, "learning_rate": 2.9640153944836436e-06, "loss": 0.841, "step": 35845 }, { "epoch": 0.4369127271397755, "grad_norm": 1.9552013874053955, "learning_rate": 2.9636946760744066e-06, "loss": 0.8467, "step": 35850 }, { "epoch": 0.4369736633639233, "grad_norm": 2.7117388248443604, "learning_rate": 2.9633739576651705e-06, "loss": 0.8402, "step": 35855 }, { "epoch": 0.4370345995880711, "grad_norm": 2.011530637741089, "learning_rate": 2.9630532392559335e-06, "loss": 0.8279, "step": 35860 }, { "epoch": 0.43709553581221894, "grad_norm": 2.01133394241333, "learning_rate": 2.9627325208466965e-06, "loss": 0.8721, "step": 35865 }, { "epoch": 0.43715647203636676, "grad_norm": 1.850055456161499, "learning_rate": 2.9624118024374604e-06, "loss": 0.8508, "step": 35870 }, { "epoch": 0.43721740826051453, "grad_norm": 1.7554683685302734, "learning_rate": 2.9620910840282234e-06, "loss": 0.9186, "step": 35875 }, { "epoch": 0.43727834448466235, "grad_norm": 1.833083152770996, "learning_rate": 2.961770365618987e-06, "loss": 0.819, "step": 35880 }, { "epoch": 0.4373392807088102, "grad_norm": 1.979455828666687, "learning_rate": 2.96144964720975e-06, "loss": 0.8388, "step": 35885 }, { "epoch": 0.43740021693295794, "grad_norm": 2.286777973175049, "learning_rate": 2.9611289288005137e-06, "loss": 0.884, "step": 35890 }, { "epoch": 0.43746115315710576, "grad_norm": 1.8774884939193726, "learning_rate": 2.9608082103912767e-06, "loss": 0.8452, "step": 35895 }, { "epoch": 0.4375220893812536, "grad_norm": 1.7739609479904175, "learning_rate": 2.9604874919820397e-06, "loss": 0.8795, "step": 35900 }, { "epoch": 0.4375830256054014, "grad_norm": 1.8088693618774414, "learning_rate": 2.9601667735728036e-06, "loss": 0.8683, "step": 35905 }, { "epoch": 0.4376439618295492, "grad_norm": 1.7933868169784546, "learning_rate": 2.9598460551635666e-06, "loss": 0.8706, "step": 35910 }, { "epoch": 0.437704898053697, "grad_norm": 2.269327163696289, "learning_rate": 2.9595253367543296e-06, "loss": 0.7692, "step": 35915 }, { "epoch": 0.4377658342778448, "grad_norm": 2.0130579471588135, "learning_rate": 2.9592046183450935e-06, "loss": 0.7866, "step": 35920 }, { "epoch": 0.4378267705019926, "grad_norm": 1.7645015716552734, "learning_rate": 2.9588838999358565e-06, "loss": 0.8603, "step": 35925 }, { "epoch": 0.4378877067261404, "grad_norm": 2.1752047538757324, "learning_rate": 2.9585631815266195e-06, "loss": 0.7775, "step": 35930 }, { "epoch": 0.43794864295028824, "grad_norm": 1.7513810396194458, "learning_rate": 2.9582424631173834e-06, "loss": 0.8452, "step": 35935 }, { "epoch": 0.43800957917443606, "grad_norm": 1.878499150276184, "learning_rate": 2.9579217447081464e-06, "loss": 0.8475, "step": 35940 }, { "epoch": 0.4380705153985838, "grad_norm": 1.8700412511825562, "learning_rate": 2.95760102629891e-06, "loss": 0.753, "step": 35945 }, { "epoch": 0.43813145162273165, "grad_norm": 2.069265842437744, "learning_rate": 2.9572803078896733e-06, "loss": 0.8503, "step": 35950 }, { "epoch": 0.43819238784687947, "grad_norm": 1.9466758966445923, "learning_rate": 2.9569595894804363e-06, "loss": 0.8377, "step": 35955 }, { "epoch": 0.43825332407102724, "grad_norm": 2.0915863513946533, "learning_rate": 2.9566388710711998e-06, "loss": 0.7877, "step": 35960 }, { "epoch": 0.43831426029517506, "grad_norm": 1.9573180675506592, "learning_rate": 2.956318152661963e-06, "loss": 0.8111, "step": 35965 }, { "epoch": 0.4383751965193229, "grad_norm": 2.035006523132324, "learning_rate": 2.9559974342527266e-06, "loss": 0.8156, "step": 35970 }, { "epoch": 0.4384361327434707, "grad_norm": 2.0066778659820557, "learning_rate": 2.9556767158434897e-06, "loss": 0.7955, "step": 35975 }, { "epoch": 0.4384970689676185, "grad_norm": 1.963664174079895, "learning_rate": 2.9553559974342527e-06, "loss": 0.8649, "step": 35980 }, { "epoch": 0.4385580051917663, "grad_norm": 2.443657398223877, "learning_rate": 2.9550352790250165e-06, "loss": 0.8434, "step": 35985 }, { "epoch": 0.4386189414159141, "grad_norm": 1.9673612117767334, "learning_rate": 2.9547145606157795e-06, "loss": 0.8892, "step": 35990 }, { "epoch": 0.4386798776400619, "grad_norm": 1.8515596389770508, "learning_rate": 2.9543938422065426e-06, "loss": 0.852, "step": 35995 }, { "epoch": 0.4387408138642097, "grad_norm": 1.7523202896118164, "learning_rate": 2.9540731237973064e-06, "loss": 0.8077, "step": 36000 }, { "epoch": 0.43880175008835753, "grad_norm": 1.9802157878875732, "learning_rate": 2.9537524053880694e-06, "loss": 0.7495, "step": 36005 }, { "epoch": 0.43886268631250536, "grad_norm": 1.6881203651428223, "learning_rate": 2.9534316869788325e-06, "loss": 0.8271, "step": 36010 }, { "epoch": 0.4389236225366531, "grad_norm": 2.7523856163024902, "learning_rate": 2.9531109685695963e-06, "loss": 0.8995, "step": 36015 }, { "epoch": 0.43898455876080095, "grad_norm": 2.041679620742798, "learning_rate": 2.9527902501603593e-06, "loss": 0.8138, "step": 36020 }, { "epoch": 0.43904549498494877, "grad_norm": 2.0021703243255615, "learning_rate": 2.9524695317511228e-06, "loss": 0.9128, "step": 36025 }, { "epoch": 0.43910643120909654, "grad_norm": 1.8392771482467651, "learning_rate": 2.9521488133418862e-06, "loss": 0.8448, "step": 36030 }, { "epoch": 0.43916736743324436, "grad_norm": 2.19274640083313, "learning_rate": 2.9518280949326492e-06, "loss": 0.8471, "step": 36035 }, { "epoch": 0.4392283036573922, "grad_norm": 2.16326642036438, "learning_rate": 2.9515073765234127e-06, "loss": 0.8929, "step": 36040 }, { "epoch": 0.43928923988154, "grad_norm": 2.553030252456665, "learning_rate": 2.951186658114176e-06, "loss": 0.854, "step": 36045 }, { "epoch": 0.4393501761056878, "grad_norm": 1.9470642805099487, "learning_rate": 2.9508659397049396e-06, "loss": 0.9018, "step": 36050 }, { "epoch": 0.4394111123298356, "grad_norm": 2.227085590362549, "learning_rate": 2.9505452212957026e-06, "loss": 0.8382, "step": 36055 }, { "epoch": 0.4394720485539834, "grad_norm": 2.0051703453063965, "learning_rate": 2.9502245028864656e-06, "loss": 0.8963, "step": 36060 }, { "epoch": 0.4395329847781312, "grad_norm": 2.6055710315704346, "learning_rate": 2.9499037844772295e-06, "loss": 0.8191, "step": 36065 }, { "epoch": 0.439593921002279, "grad_norm": 1.956125259399414, "learning_rate": 2.9495830660679925e-06, "loss": 0.8627, "step": 36070 }, { "epoch": 0.43965485722642683, "grad_norm": 1.8885393142700195, "learning_rate": 2.9492623476587555e-06, "loss": 0.9076, "step": 36075 }, { "epoch": 0.43971579345057465, "grad_norm": 1.806334137916565, "learning_rate": 2.9489416292495194e-06, "loss": 0.8632, "step": 36080 }, { "epoch": 0.4397767296747224, "grad_norm": 1.8999488353729248, "learning_rate": 2.9486209108402824e-06, "loss": 0.8182, "step": 36085 }, { "epoch": 0.43983766589887024, "grad_norm": 1.9430644512176514, "learning_rate": 2.9483001924310454e-06, "loss": 0.9183, "step": 36090 }, { "epoch": 0.43989860212301807, "grad_norm": 2.0524017810821533, "learning_rate": 2.9479794740218093e-06, "loss": 0.895, "step": 36095 }, { "epoch": 0.43995953834716583, "grad_norm": 1.5636299848556519, "learning_rate": 2.9476587556125723e-06, "loss": 0.8994, "step": 36100 }, { "epoch": 0.44002047457131366, "grad_norm": 2.129185199737549, "learning_rate": 2.9473380372033357e-06, "loss": 0.8798, "step": 36105 }, { "epoch": 0.4400814107954615, "grad_norm": 2.0134825706481934, "learning_rate": 2.947017318794099e-06, "loss": 0.9138, "step": 36110 }, { "epoch": 0.4401423470196093, "grad_norm": 1.9510842561721802, "learning_rate": 2.9466966003848626e-06, "loss": 0.8749, "step": 36115 }, { "epoch": 0.44020328324375707, "grad_norm": 1.752060890197754, "learning_rate": 2.9463758819756256e-06, "loss": 0.8281, "step": 36120 }, { "epoch": 0.4402642194679049, "grad_norm": 2.3248770236968994, "learning_rate": 2.946055163566389e-06, "loss": 0.8587, "step": 36125 }, { "epoch": 0.4403251556920527, "grad_norm": 1.7905811071395874, "learning_rate": 2.9457344451571525e-06, "loss": 0.8878, "step": 36130 }, { "epoch": 0.4403860919162005, "grad_norm": 2.274608850479126, "learning_rate": 2.9454137267479155e-06, "loss": 0.899, "step": 36135 }, { "epoch": 0.4404470281403483, "grad_norm": 1.6790741682052612, "learning_rate": 2.9450930083386785e-06, "loss": 0.8234, "step": 36140 }, { "epoch": 0.44050796436449613, "grad_norm": 1.83406400680542, "learning_rate": 2.9447722899294424e-06, "loss": 0.8605, "step": 36145 }, { "epoch": 0.44056890058864395, "grad_norm": 2.2241909503936768, "learning_rate": 2.9444515715202054e-06, "loss": 0.7925, "step": 36150 }, { "epoch": 0.4406298368127917, "grad_norm": 1.9945014715194702, "learning_rate": 2.9441308531109684e-06, "loss": 0.9123, "step": 36155 }, { "epoch": 0.44069077303693954, "grad_norm": 1.8823641538619995, "learning_rate": 2.9438101347017323e-06, "loss": 0.8648, "step": 36160 }, { "epoch": 0.44075170926108737, "grad_norm": 2.7187156677246094, "learning_rate": 2.9434894162924953e-06, "loss": 0.8843, "step": 36165 }, { "epoch": 0.44081264548523513, "grad_norm": 1.9728107452392578, "learning_rate": 2.9431686978832587e-06, "loss": 0.8602, "step": 36170 }, { "epoch": 0.44087358170938296, "grad_norm": 1.6192129850387573, "learning_rate": 2.942847979474022e-06, "loss": 0.8324, "step": 36175 }, { "epoch": 0.4409345179335308, "grad_norm": 1.6230506896972656, "learning_rate": 2.942527261064785e-06, "loss": 0.8667, "step": 36180 }, { "epoch": 0.4409954541576786, "grad_norm": 2.547865152359009, "learning_rate": 2.9422065426555486e-06, "loss": 0.924, "step": 36185 }, { "epoch": 0.44105639038182637, "grad_norm": 2.047302007675171, "learning_rate": 2.941885824246312e-06, "loss": 0.7828, "step": 36190 }, { "epoch": 0.4411173266059742, "grad_norm": 1.9377996921539307, "learning_rate": 2.9415651058370755e-06, "loss": 0.8173, "step": 36195 }, { "epoch": 0.441178262830122, "grad_norm": 1.7560864686965942, "learning_rate": 2.9412443874278385e-06, "loss": 0.764, "step": 36200 }, { "epoch": 0.4412391990542698, "grad_norm": 2.2826895713806152, "learning_rate": 2.940923669018602e-06, "loss": 0.8365, "step": 36205 }, { "epoch": 0.4413001352784176, "grad_norm": 1.7615662813186646, "learning_rate": 2.9406029506093654e-06, "loss": 0.9046, "step": 36210 }, { "epoch": 0.4413610715025654, "grad_norm": 1.8441405296325684, "learning_rate": 2.9402822322001284e-06, "loss": 0.8832, "step": 36215 }, { "epoch": 0.4414220077267132, "grad_norm": 2.1411798000335693, "learning_rate": 2.9399615137908915e-06, "loss": 0.8274, "step": 36220 }, { "epoch": 0.441482943950861, "grad_norm": 1.889902949333191, "learning_rate": 2.9396407953816553e-06, "loss": 0.9273, "step": 36225 }, { "epoch": 0.44154388017500884, "grad_norm": 1.9721200466156006, "learning_rate": 2.9393200769724183e-06, "loss": 0.8888, "step": 36230 }, { "epoch": 0.44160481639915666, "grad_norm": 2.077974796295166, "learning_rate": 2.9389993585631813e-06, "loss": 0.8849, "step": 36235 }, { "epoch": 0.44166575262330443, "grad_norm": 1.7068195343017578, "learning_rate": 2.938678640153945e-06, "loss": 0.9098, "step": 36240 }, { "epoch": 0.44172668884745225, "grad_norm": 1.8143879175186157, "learning_rate": 2.9383579217447082e-06, "loss": 0.8995, "step": 36245 }, { "epoch": 0.4417876250716001, "grad_norm": 2.0306591987609863, "learning_rate": 2.9380372033354717e-06, "loss": 0.8823, "step": 36250 }, { "epoch": 0.44184856129574784, "grad_norm": 1.8287880420684814, "learning_rate": 2.937716484926235e-06, "loss": 0.9038, "step": 36255 }, { "epoch": 0.44190949751989567, "grad_norm": 2.0848209857940674, "learning_rate": 2.937395766516998e-06, "loss": 0.8912, "step": 36260 }, { "epoch": 0.4419704337440435, "grad_norm": 1.7975026369094849, "learning_rate": 2.9370750481077616e-06, "loss": 0.8942, "step": 36265 }, { "epoch": 0.4420313699681913, "grad_norm": 2.0250680446624756, "learning_rate": 2.936754329698525e-06, "loss": 0.8713, "step": 36270 }, { "epoch": 0.4420923061923391, "grad_norm": 1.9673936367034912, "learning_rate": 2.9364336112892884e-06, "loss": 0.817, "step": 36275 }, { "epoch": 0.4421532424164869, "grad_norm": 2.073638677597046, "learning_rate": 2.9361128928800515e-06, "loss": 0.7903, "step": 36280 }, { "epoch": 0.4422141786406347, "grad_norm": 1.8351824283599854, "learning_rate": 2.935792174470815e-06, "loss": 0.8596, "step": 36285 }, { "epoch": 0.4422751148647825, "grad_norm": 2.144219398498535, "learning_rate": 2.9354714560615783e-06, "loss": 0.8821, "step": 36290 }, { "epoch": 0.4423360510889303, "grad_norm": 1.697034239768982, "learning_rate": 2.9351507376523414e-06, "loss": 0.7661, "step": 36295 }, { "epoch": 0.44239698731307814, "grad_norm": 1.834382176399231, "learning_rate": 2.9348300192431052e-06, "loss": 0.8988, "step": 36300 }, { "epoch": 0.44245792353722596, "grad_norm": 2.1760425567626953, "learning_rate": 2.9345093008338682e-06, "loss": 0.86, "step": 36305 }, { "epoch": 0.44251885976137373, "grad_norm": 1.8803811073303223, "learning_rate": 2.9341885824246313e-06, "loss": 0.8018, "step": 36310 }, { "epoch": 0.44257979598552155, "grad_norm": 2.0052952766418457, "learning_rate": 2.9338678640153943e-06, "loss": 0.8549, "step": 36315 }, { "epoch": 0.4426407322096694, "grad_norm": 1.7647950649261475, "learning_rate": 2.933547145606158e-06, "loss": 0.8045, "step": 36320 }, { "epoch": 0.44270166843381714, "grad_norm": 1.823858380317688, "learning_rate": 2.933226427196921e-06, "loss": 0.8219, "step": 36325 }, { "epoch": 0.44276260465796496, "grad_norm": 1.9065251350402832, "learning_rate": 2.9329057087876846e-06, "loss": 0.7897, "step": 36330 }, { "epoch": 0.4428235408821128, "grad_norm": 1.8937792778015137, "learning_rate": 2.932584990378448e-06, "loss": 0.8072, "step": 36335 }, { "epoch": 0.4428844771062606, "grad_norm": 1.969205617904663, "learning_rate": 2.932264271969211e-06, "loss": 0.8256, "step": 36340 }, { "epoch": 0.4429454133304084, "grad_norm": 2.239342451095581, "learning_rate": 2.9319435535599745e-06, "loss": 0.8373, "step": 36345 }, { "epoch": 0.4430063495545562, "grad_norm": 2.0020318031311035, "learning_rate": 2.931622835150738e-06, "loss": 0.8657, "step": 36350 }, { "epoch": 0.443067285778704, "grad_norm": 1.615033745765686, "learning_rate": 2.9313021167415014e-06, "loss": 0.8752, "step": 36355 }, { "epoch": 0.4431282220028518, "grad_norm": 1.864382028579712, "learning_rate": 2.9309813983322644e-06, "loss": 0.7765, "step": 36360 }, { "epoch": 0.4431891582269996, "grad_norm": 2.043452262878418, "learning_rate": 2.9306606799230283e-06, "loss": 0.8384, "step": 36365 }, { "epoch": 0.44325009445114744, "grad_norm": 2.0045390129089355, "learning_rate": 2.9303399615137913e-06, "loss": 0.8554, "step": 36370 }, { "epoch": 0.44331103067529526, "grad_norm": 1.9800801277160645, "learning_rate": 2.9300192431045543e-06, "loss": 0.903, "step": 36375 }, { "epoch": 0.443371966899443, "grad_norm": 1.8270998001098633, "learning_rate": 2.929698524695318e-06, "loss": 0.9221, "step": 36380 }, { "epoch": 0.44343290312359085, "grad_norm": 2.219562292098999, "learning_rate": 2.929377806286081e-06, "loss": 0.884, "step": 36385 }, { "epoch": 0.44349383934773867, "grad_norm": 2.0105412006378174, "learning_rate": 2.929057087876844e-06, "loss": 0.8681, "step": 36390 }, { "epoch": 0.44355477557188644, "grad_norm": 1.6838340759277344, "learning_rate": 2.9287363694676076e-06, "loss": 0.8673, "step": 36395 }, { "epoch": 0.44361571179603426, "grad_norm": 1.5795629024505615, "learning_rate": 2.928415651058371e-06, "loss": 0.8202, "step": 36400 }, { "epoch": 0.4436766480201821, "grad_norm": 2.1800429821014404, "learning_rate": 2.928094932649134e-06, "loss": 0.9011, "step": 36405 }, { "epoch": 0.4437375842443299, "grad_norm": 2.180323600769043, "learning_rate": 2.9277742142398975e-06, "loss": 0.8596, "step": 36410 }, { "epoch": 0.4437985204684777, "grad_norm": 2.235445022583008, "learning_rate": 2.927453495830661e-06, "loss": 0.7829, "step": 36415 }, { "epoch": 0.4438594566926255, "grad_norm": 1.9798181056976318, "learning_rate": 2.9271327774214244e-06, "loss": 0.8014, "step": 36420 }, { "epoch": 0.4439203929167733, "grad_norm": 1.8281530141830444, "learning_rate": 2.9268120590121874e-06, "loss": 0.8442, "step": 36425 }, { "epoch": 0.4439813291409211, "grad_norm": 2.341567277908325, "learning_rate": 2.926491340602951e-06, "loss": 0.8935, "step": 36430 }, { "epoch": 0.4440422653650689, "grad_norm": 2.1054768562316895, "learning_rate": 2.9261706221937143e-06, "loss": 0.9274, "step": 36435 }, { "epoch": 0.44410320158921673, "grad_norm": 2.278390884399414, "learning_rate": 2.9258499037844773e-06, "loss": 0.8755, "step": 36440 }, { "epoch": 0.44416413781336456, "grad_norm": 1.7032030820846558, "learning_rate": 2.925529185375241e-06, "loss": 0.8443, "step": 36445 }, { "epoch": 0.4442250740375123, "grad_norm": 1.9178704023361206, "learning_rate": 2.925208466966004e-06, "loss": 0.855, "step": 36450 }, { "epoch": 0.44428601026166015, "grad_norm": 1.663511037826538, "learning_rate": 2.9248877485567672e-06, "loss": 0.8446, "step": 36455 }, { "epoch": 0.44434694648580797, "grad_norm": 1.849719524383545, "learning_rate": 2.924567030147531e-06, "loss": 0.8842, "step": 36460 }, { "epoch": 0.44440788270995574, "grad_norm": 2.2099292278289795, "learning_rate": 2.924246311738294e-06, "loss": 0.7817, "step": 36465 }, { "epoch": 0.44446881893410356, "grad_norm": 2.162341594696045, "learning_rate": 2.923925593329057e-06, "loss": 0.866, "step": 36470 }, { "epoch": 0.4445297551582514, "grad_norm": 1.9253937005996704, "learning_rate": 2.9236048749198206e-06, "loss": 0.8339, "step": 36475 }, { "epoch": 0.4445906913823992, "grad_norm": 2.3029122352600098, "learning_rate": 2.923284156510584e-06, "loss": 0.8148, "step": 36480 }, { "epoch": 0.444651627606547, "grad_norm": 1.9665395021438599, "learning_rate": 2.922963438101347e-06, "loss": 0.8342, "step": 36485 }, { "epoch": 0.4447125638306948, "grad_norm": 1.9734790325164795, "learning_rate": 2.9226427196921104e-06, "loss": 0.8962, "step": 36490 }, { "epoch": 0.4447735000548426, "grad_norm": 1.7692162990570068, "learning_rate": 2.922322001282874e-06, "loss": 0.8113, "step": 36495 }, { "epoch": 0.4448344362789904, "grad_norm": 2.0155420303344727, "learning_rate": 2.9220012828736373e-06, "loss": 0.7801, "step": 36500 }, { "epoch": 0.4448953725031382, "grad_norm": 1.8842666149139404, "learning_rate": 2.9216805644644003e-06, "loss": 0.8851, "step": 36505 }, { "epoch": 0.44495630872728603, "grad_norm": 1.9082292318344116, "learning_rate": 2.9213598460551638e-06, "loss": 0.8255, "step": 36510 }, { "epoch": 0.44501724495143385, "grad_norm": 1.7920894622802734, "learning_rate": 2.9210391276459272e-06, "loss": 0.8635, "step": 36515 }, { "epoch": 0.4450781811755816, "grad_norm": 3.42004132270813, "learning_rate": 2.9207184092366902e-06, "loss": 0.8217, "step": 36520 }, { "epoch": 0.44513911739972944, "grad_norm": 1.6810706853866577, "learning_rate": 2.920397690827454e-06, "loss": 0.8124, "step": 36525 }, { "epoch": 0.44520005362387727, "grad_norm": 1.8732744455337524, "learning_rate": 2.920076972418217e-06, "loss": 0.856, "step": 36530 }, { "epoch": 0.44526098984802503, "grad_norm": 2.360271692276001, "learning_rate": 2.91975625400898e-06, "loss": 0.8144, "step": 36535 }, { "epoch": 0.44532192607217286, "grad_norm": 1.7344324588775635, "learning_rate": 2.919435535599744e-06, "loss": 0.8841, "step": 36540 }, { "epoch": 0.4453828622963207, "grad_norm": 2.3178701400756836, "learning_rate": 2.919114817190507e-06, "loss": 0.8827, "step": 36545 }, { "epoch": 0.4454437985204685, "grad_norm": 1.9298614263534546, "learning_rate": 2.91879409878127e-06, "loss": 0.8403, "step": 36550 }, { "epoch": 0.44550473474461627, "grad_norm": 2.3893682956695557, "learning_rate": 2.918473380372034e-06, "loss": 0.8711, "step": 36555 }, { "epoch": 0.4455656709687641, "grad_norm": 1.8473163843154907, "learning_rate": 2.918152661962797e-06, "loss": 0.8588, "step": 36560 }, { "epoch": 0.4456266071929119, "grad_norm": 2.041248083114624, "learning_rate": 2.91783194355356e-06, "loss": 0.8894, "step": 36565 }, { "epoch": 0.4456875434170597, "grad_norm": 2.037637233734131, "learning_rate": 2.9175112251443234e-06, "loss": 0.8383, "step": 36570 }, { "epoch": 0.4457484796412075, "grad_norm": 1.9513276815414429, "learning_rate": 2.917190506735087e-06, "loss": 0.8596, "step": 36575 }, { "epoch": 0.44580941586535533, "grad_norm": 1.8260767459869385, "learning_rate": 2.9168697883258503e-06, "loss": 0.9036, "step": 36580 }, { "epoch": 0.44587035208950315, "grad_norm": 2.073108434677124, "learning_rate": 2.9165490699166133e-06, "loss": 0.8356, "step": 36585 }, { "epoch": 0.4459312883136509, "grad_norm": 1.9117296934127808, "learning_rate": 2.916228351507377e-06, "loss": 0.8333, "step": 36590 }, { "epoch": 0.44599222453779874, "grad_norm": 1.9528772830963135, "learning_rate": 2.91590763309814e-06, "loss": 0.8477, "step": 36595 }, { "epoch": 0.44605316076194657, "grad_norm": 1.8662203550338745, "learning_rate": 2.915586914688903e-06, "loss": 0.8675, "step": 36600 }, { "epoch": 0.44611409698609433, "grad_norm": 1.615133285522461, "learning_rate": 2.915266196279667e-06, "loss": 0.8304, "step": 36605 }, { "epoch": 0.44617503321024216, "grad_norm": 1.9020347595214844, "learning_rate": 2.91494547787043e-06, "loss": 0.8629, "step": 36610 }, { "epoch": 0.44623596943439, "grad_norm": 1.8162826299667358, "learning_rate": 2.914624759461193e-06, "loss": 0.7961, "step": 36615 }, { "epoch": 0.4462969056585378, "grad_norm": 1.7174283266067505, "learning_rate": 2.914304041051957e-06, "loss": 0.8604, "step": 36620 }, { "epoch": 0.44635784188268557, "grad_norm": 2.322213649749756, "learning_rate": 2.91398332264272e-06, "loss": 0.8235, "step": 36625 }, { "epoch": 0.4464187781068334, "grad_norm": 1.962397575378418, "learning_rate": 2.913662604233483e-06, "loss": 0.8568, "step": 36630 }, { "epoch": 0.4464797143309812, "grad_norm": 1.9663996696472168, "learning_rate": 2.913341885824247e-06, "loss": 0.854, "step": 36635 }, { "epoch": 0.446540650555129, "grad_norm": 2.0825247764587402, "learning_rate": 2.91302116741501e-06, "loss": 0.8368, "step": 36640 }, { "epoch": 0.4466015867792768, "grad_norm": 2.727280855178833, "learning_rate": 2.9127004490057733e-06, "loss": 0.8675, "step": 36645 }, { "epoch": 0.4466625230034246, "grad_norm": 1.8420348167419434, "learning_rate": 2.9123797305965363e-06, "loss": 0.8451, "step": 36650 }, { "epoch": 0.44672345922757245, "grad_norm": 1.8824628591537476, "learning_rate": 2.9120590121872997e-06, "loss": 0.8002, "step": 36655 }, { "epoch": 0.4467843954517202, "grad_norm": 2.32039213180542, "learning_rate": 2.911738293778063e-06, "loss": 0.8817, "step": 36660 }, { "epoch": 0.44684533167586804, "grad_norm": 1.9031096696853638, "learning_rate": 2.911417575368826e-06, "loss": 0.7869, "step": 36665 }, { "epoch": 0.44690626790001586, "grad_norm": 1.7890491485595703, "learning_rate": 2.91109685695959e-06, "loss": 0.8373, "step": 36670 }, { "epoch": 0.44696720412416363, "grad_norm": 1.989997148513794, "learning_rate": 2.910776138550353e-06, "loss": 0.7666, "step": 36675 }, { "epoch": 0.44702814034831145, "grad_norm": 1.7877836227416992, "learning_rate": 2.910455420141116e-06, "loss": 0.873, "step": 36680 }, { "epoch": 0.4470890765724593, "grad_norm": 2.108754873275757, "learning_rate": 2.91013470173188e-06, "loss": 0.8319, "step": 36685 }, { "epoch": 0.44715001279660704, "grad_norm": 1.9470871686935425, "learning_rate": 2.909813983322643e-06, "loss": 0.7785, "step": 36690 }, { "epoch": 0.44721094902075487, "grad_norm": 2.167276382446289, "learning_rate": 2.909493264913406e-06, "loss": 0.7572, "step": 36695 }, { "epoch": 0.4472718852449027, "grad_norm": 2.013237237930298, "learning_rate": 2.90917254650417e-06, "loss": 0.8277, "step": 36700 }, { "epoch": 0.4473328214690505, "grad_norm": 2.4444823265075684, "learning_rate": 2.908851828094933e-06, "loss": 0.7617, "step": 36705 }, { "epoch": 0.4473937576931983, "grad_norm": 2.381791591644287, "learning_rate": 2.908531109685696e-06, "loss": 0.866, "step": 36710 }, { "epoch": 0.4474546939173461, "grad_norm": 2.296635866165161, "learning_rate": 2.9082103912764598e-06, "loss": 0.938, "step": 36715 }, { "epoch": 0.4475156301414939, "grad_norm": 1.9649423360824585, "learning_rate": 2.9078896728672228e-06, "loss": 0.8943, "step": 36720 }, { "epoch": 0.4475765663656417, "grad_norm": 2.2316911220550537, "learning_rate": 2.9075689544579862e-06, "loss": 0.7877, "step": 36725 }, { "epoch": 0.4476375025897895, "grad_norm": 1.8596054315567017, "learning_rate": 2.9072482360487492e-06, "loss": 0.8778, "step": 36730 }, { "epoch": 0.44769843881393734, "grad_norm": 1.8776259422302246, "learning_rate": 2.9069275176395127e-06, "loss": 0.8082, "step": 36735 }, { "epoch": 0.44775937503808516, "grad_norm": 1.8119436502456665, "learning_rate": 2.906606799230276e-06, "loss": 0.9014, "step": 36740 }, { "epoch": 0.44782031126223293, "grad_norm": 1.783780813217163, "learning_rate": 2.906286080821039e-06, "loss": 0.8458, "step": 36745 }, { "epoch": 0.44788124748638075, "grad_norm": 1.8263674974441528, "learning_rate": 2.905965362411803e-06, "loss": 0.88, "step": 36750 }, { "epoch": 0.4479421837105286, "grad_norm": 1.9835470914840698, "learning_rate": 2.905644644002566e-06, "loss": 0.8692, "step": 36755 }, { "epoch": 0.44800311993467634, "grad_norm": 1.5248960256576538, "learning_rate": 2.905323925593329e-06, "loss": 0.8707, "step": 36760 }, { "epoch": 0.44806405615882416, "grad_norm": 2.142862558364868, "learning_rate": 2.905003207184093e-06, "loss": 0.8471, "step": 36765 }, { "epoch": 0.448124992382972, "grad_norm": 2.1784162521362305, "learning_rate": 2.904682488774856e-06, "loss": 0.8545, "step": 36770 }, { "epoch": 0.4481859286071198, "grad_norm": 2.019927501678467, "learning_rate": 2.904361770365619e-06, "loss": 0.8442, "step": 36775 }, { "epoch": 0.4482468648312676, "grad_norm": 1.9311842918395996, "learning_rate": 2.9040410519563828e-06, "loss": 0.9084, "step": 36780 }, { "epoch": 0.4483078010554154, "grad_norm": 1.8297019004821777, "learning_rate": 2.903720333547146e-06, "loss": 0.8304, "step": 36785 }, { "epoch": 0.4483687372795632, "grad_norm": 1.9776430130004883, "learning_rate": 2.903399615137909e-06, "loss": 0.8453, "step": 36790 }, { "epoch": 0.448429673503711, "grad_norm": 2.1375231742858887, "learning_rate": 2.9030788967286727e-06, "loss": 0.8687, "step": 36795 }, { "epoch": 0.4484906097278588, "grad_norm": 1.8710778951644897, "learning_rate": 2.9027581783194357e-06, "loss": 0.8218, "step": 36800 }, { "epoch": 0.44855154595200664, "grad_norm": 1.6498055458068848, "learning_rate": 2.902437459910199e-06, "loss": 0.841, "step": 36805 }, { "epoch": 0.44861248217615446, "grad_norm": 2.069620370864868, "learning_rate": 2.902116741500962e-06, "loss": 0.8923, "step": 36810 }, { "epoch": 0.4486734184003022, "grad_norm": 1.9942874908447266, "learning_rate": 2.901796023091726e-06, "loss": 0.8546, "step": 36815 }, { "epoch": 0.44873435462445005, "grad_norm": 1.7061105966567993, "learning_rate": 2.901475304682489e-06, "loss": 0.7835, "step": 36820 }, { "epoch": 0.44879529084859787, "grad_norm": 2.350254535675049, "learning_rate": 2.901154586273252e-06, "loss": 0.8518, "step": 36825 }, { "epoch": 0.44885622707274564, "grad_norm": 1.7913607358932495, "learning_rate": 2.900833867864016e-06, "loss": 0.8754, "step": 36830 }, { "epoch": 0.44891716329689346, "grad_norm": 1.7365857362747192, "learning_rate": 2.900513149454779e-06, "loss": 0.8647, "step": 36835 }, { "epoch": 0.4489780995210413, "grad_norm": 1.7804527282714844, "learning_rate": 2.900192431045542e-06, "loss": 0.8399, "step": 36840 }, { "epoch": 0.4490390357451891, "grad_norm": 1.8810477256774902, "learning_rate": 2.899871712636306e-06, "loss": 0.8378, "step": 36845 }, { "epoch": 0.4490999719693369, "grad_norm": 2.171025276184082, "learning_rate": 2.899550994227069e-06, "loss": 0.847, "step": 36850 }, { "epoch": 0.4491609081934847, "grad_norm": 2.0246593952178955, "learning_rate": 2.899230275817832e-06, "loss": 0.8629, "step": 36855 }, { "epoch": 0.4492218444176325, "grad_norm": 2.0895326137542725, "learning_rate": 2.8989095574085957e-06, "loss": 0.8759, "step": 36860 }, { "epoch": 0.4492827806417803, "grad_norm": 1.863307237625122, "learning_rate": 2.8985888389993587e-06, "loss": 0.8239, "step": 36865 }, { "epoch": 0.4493437168659281, "grad_norm": 2.0366289615631104, "learning_rate": 2.898268120590122e-06, "loss": 0.8705, "step": 36870 }, { "epoch": 0.44940465309007593, "grad_norm": 2.0109007358551025, "learning_rate": 2.8979474021808856e-06, "loss": 0.8393, "step": 36875 }, { "epoch": 0.44946558931422376, "grad_norm": 1.9323118925094604, "learning_rate": 2.8976266837716486e-06, "loss": 0.8515, "step": 36880 }, { "epoch": 0.4495265255383715, "grad_norm": 1.8047025203704834, "learning_rate": 2.897305965362412e-06, "loss": 0.8536, "step": 36885 }, { "epoch": 0.44958746176251935, "grad_norm": 2.2024245262145996, "learning_rate": 2.8969852469531755e-06, "loss": 0.8622, "step": 36890 }, { "epoch": 0.44964839798666717, "grad_norm": 2.142531394958496, "learning_rate": 2.896664528543939e-06, "loss": 0.8178, "step": 36895 }, { "epoch": 0.44970933421081494, "grad_norm": 2.0836782455444336, "learning_rate": 2.896343810134702e-06, "loss": 0.7907, "step": 36900 }, { "epoch": 0.44977027043496276, "grad_norm": 1.9904905557632446, "learning_rate": 2.896023091725465e-06, "loss": 0.8039, "step": 36905 }, { "epoch": 0.4498312066591106, "grad_norm": 1.7115839719772339, "learning_rate": 2.895702373316229e-06, "loss": 0.877, "step": 36910 }, { "epoch": 0.4498921428832584, "grad_norm": 2.062671661376953, "learning_rate": 2.895381654906992e-06, "loss": 0.8195, "step": 36915 }, { "epoch": 0.4499530791074062, "grad_norm": 2.3172762393951416, "learning_rate": 2.895060936497755e-06, "loss": 0.838, "step": 36920 }, { "epoch": 0.450014015331554, "grad_norm": 2.0643386840820312, "learning_rate": 2.8947402180885187e-06, "loss": 0.7967, "step": 36925 }, { "epoch": 0.4500749515557018, "grad_norm": 1.9179052114486694, "learning_rate": 2.8944194996792818e-06, "loss": 0.8929, "step": 36930 }, { "epoch": 0.4501358877798496, "grad_norm": 1.810884952545166, "learning_rate": 2.8940987812700448e-06, "loss": 0.8288, "step": 36935 }, { "epoch": 0.4501968240039974, "grad_norm": 1.8546161651611328, "learning_rate": 2.8937780628608086e-06, "loss": 0.8245, "step": 36940 }, { "epoch": 0.45025776022814523, "grad_norm": 2.8951833248138428, "learning_rate": 2.8934573444515717e-06, "loss": 0.7896, "step": 36945 }, { "epoch": 0.45031869645229305, "grad_norm": 2.0239059925079346, "learning_rate": 2.893136626042335e-06, "loss": 0.8477, "step": 36950 }, { "epoch": 0.4503796326764408, "grad_norm": 1.7553060054779053, "learning_rate": 2.8928159076330985e-06, "loss": 0.8249, "step": 36955 }, { "epoch": 0.45044056890058864, "grad_norm": 1.868148684501648, "learning_rate": 2.8924951892238616e-06, "loss": 0.7634, "step": 36960 }, { "epoch": 0.45050150512473647, "grad_norm": 2.1358933448791504, "learning_rate": 2.892174470814625e-06, "loss": 0.8377, "step": 36965 }, { "epoch": 0.45056244134888424, "grad_norm": 1.801390290260315, "learning_rate": 2.8918537524053884e-06, "loss": 0.8098, "step": 36970 }, { "epoch": 0.45062337757303206, "grad_norm": 1.949028730392456, "learning_rate": 2.891533033996152e-06, "loss": 0.8172, "step": 36975 }, { "epoch": 0.4506843137971799, "grad_norm": 1.9720492362976074, "learning_rate": 2.891212315586915e-06, "loss": 0.8975, "step": 36980 }, { "epoch": 0.4507452500213277, "grad_norm": 1.9694104194641113, "learning_rate": 2.890891597177678e-06, "loss": 0.8816, "step": 36985 }, { "epoch": 0.45080618624547547, "grad_norm": 2.0930569171905518, "learning_rate": 2.8905708787684418e-06, "loss": 0.8393, "step": 36990 }, { "epoch": 0.4508671224696233, "grad_norm": 1.9674367904663086, "learning_rate": 2.890250160359205e-06, "loss": 0.8113, "step": 36995 }, { "epoch": 0.4509280586937711, "grad_norm": 1.7509269714355469, "learning_rate": 2.889929441949968e-06, "loss": 0.8057, "step": 37000 }, { "epoch": 0.4509889949179189, "grad_norm": 2.1079022884368896, "learning_rate": 2.8896087235407317e-06, "loss": 0.8526, "step": 37005 }, { "epoch": 0.4510499311420667, "grad_norm": 1.7558602094650269, "learning_rate": 2.8892880051314947e-06, "loss": 0.8658, "step": 37010 }, { "epoch": 0.45111086736621453, "grad_norm": 2.031822919845581, "learning_rate": 2.8889672867222577e-06, "loss": 0.8107, "step": 37015 }, { "epoch": 0.45117180359036235, "grad_norm": 2.277327299118042, "learning_rate": 2.8886465683130216e-06, "loss": 0.9368, "step": 37020 }, { "epoch": 0.4512327398145101, "grad_norm": 2.11358904838562, "learning_rate": 2.8883258499037846e-06, "loss": 0.8143, "step": 37025 }, { "epoch": 0.45129367603865794, "grad_norm": 1.870788812637329, "learning_rate": 2.888005131494548e-06, "loss": 0.8636, "step": 37030 }, { "epoch": 0.45135461226280577, "grad_norm": 1.8843204975128174, "learning_rate": 2.8876844130853115e-06, "loss": 0.8276, "step": 37035 }, { "epoch": 0.45141554848695353, "grad_norm": 2.296172618865967, "learning_rate": 2.8873636946760745e-06, "loss": 0.9086, "step": 37040 }, { "epoch": 0.45147648471110136, "grad_norm": 1.7210524082183838, "learning_rate": 2.887042976266838e-06, "loss": 0.8017, "step": 37045 }, { "epoch": 0.4515374209352492, "grad_norm": 1.8217216730117798, "learning_rate": 2.8867222578576014e-06, "loss": 0.8476, "step": 37050 }, { "epoch": 0.451598357159397, "grad_norm": 1.8716193437576294, "learning_rate": 2.886401539448365e-06, "loss": 0.8136, "step": 37055 }, { "epoch": 0.45165929338354477, "grad_norm": 1.7567391395568848, "learning_rate": 2.886080821039128e-06, "loss": 0.8806, "step": 37060 }, { "epoch": 0.4517202296076926, "grad_norm": 1.5827358961105347, "learning_rate": 2.885760102629891e-06, "loss": 0.8863, "step": 37065 }, { "epoch": 0.4517811658318404, "grad_norm": 2.0063745975494385, "learning_rate": 2.8854393842206547e-06, "loss": 0.8748, "step": 37070 }, { "epoch": 0.4518421020559882, "grad_norm": 1.955739974975586, "learning_rate": 2.8851186658114177e-06, "loss": 0.9075, "step": 37075 }, { "epoch": 0.451903038280136, "grad_norm": 1.8191062211990356, "learning_rate": 2.8847979474021807e-06, "loss": 0.8308, "step": 37080 }, { "epoch": 0.4519639745042838, "grad_norm": 1.7475824356079102, "learning_rate": 2.8844772289929446e-06, "loss": 0.8124, "step": 37085 }, { "epoch": 0.45202491072843165, "grad_norm": 2.3360133171081543, "learning_rate": 2.8841565105837076e-06, "loss": 0.8461, "step": 37090 }, { "epoch": 0.4520858469525794, "grad_norm": 1.8744109869003296, "learning_rate": 2.883835792174471e-06, "loss": 0.8468, "step": 37095 }, { "epoch": 0.45214678317672724, "grad_norm": 1.7768090963363647, "learning_rate": 2.8835150737652345e-06, "loss": 0.7468, "step": 37100 }, { "epoch": 0.45220771940087506, "grad_norm": 2.0316696166992188, "learning_rate": 2.8831943553559975e-06, "loss": 0.8316, "step": 37105 }, { "epoch": 0.45226865562502283, "grad_norm": 2.0563271045684814, "learning_rate": 2.882873636946761e-06, "loss": 0.8945, "step": 37110 }, { "epoch": 0.45232959184917065, "grad_norm": 1.7934775352478027, "learning_rate": 2.8825529185375244e-06, "loss": 0.8476, "step": 37115 }, { "epoch": 0.4523905280733185, "grad_norm": 1.5986151695251465, "learning_rate": 2.882232200128288e-06, "loss": 0.8319, "step": 37120 }, { "epoch": 0.4524514642974663, "grad_norm": 1.71591055393219, "learning_rate": 2.881911481719051e-06, "loss": 0.8472, "step": 37125 }, { "epoch": 0.45251240052161407, "grad_norm": 1.8808822631835938, "learning_rate": 2.8815907633098143e-06, "loss": 0.8616, "step": 37130 }, { "epoch": 0.4525733367457619, "grad_norm": 1.9774359464645386, "learning_rate": 2.8812700449005777e-06, "loss": 0.8356, "step": 37135 }, { "epoch": 0.4526342729699097, "grad_norm": 2.084224224090576, "learning_rate": 2.8809493264913407e-06, "loss": 0.8393, "step": 37140 }, { "epoch": 0.4526952091940575, "grad_norm": 2.0067062377929688, "learning_rate": 2.8806286080821038e-06, "loss": 0.9172, "step": 37145 }, { "epoch": 0.4527561454182053, "grad_norm": 2.296818256378174, "learning_rate": 2.8803078896728676e-06, "loss": 0.799, "step": 37150 }, { "epoch": 0.4528170816423531, "grad_norm": 1.7277140617370605, "learning_rate": 2.8799871712636306e-06, "loss": 0.8232, "step": 37155 }, { "epoch": 0.4528780178665009, "grad_norm": 1.720111608505249, "learning_rate": 2.8796664528543937e-06, "loss": 0.9015, "step": 37160 }, { "epoch": 0.4529389540906487, "grad_norm": 2.0390233993530273, "learning_rate": 2.8793457344451575e-06, "loss": 0.8765, "step": 37165 }, { "epoch": 0.45299989031479654, "grad_norm": 1.8810890913009644, "learning_rate": 2.8790250160359205e-06, "loss": 0.8851, "step": 37170 }, { "epoch": 0.45306082653894436, "grad_norm": 1.7267605066299438, "learning_rate": 2.878704297626684e-06, "loss": 0.8551, "step": 37175 }, { "epoch": 0.45312176276309213, "grad_norm": 2.0969138145446777, "learning_rate": 2.8783835792174474e-06, "loss": 0.8148, "step": 37180 }, { "epoch": 0.45318269898723995, "grad_norm": 2.2376043796539307, "learning_rate": 2.8780628608082104e-06, "loss": 0.9086, "step": 37185 }, { "epoch": 0.4532436352113878, "grad_norm": 1.7890175580978394, "learning_rate": 2.877742142398974e-06, "loss": 0.8095, "step": 37190 }, { "epoch": 0.45330457143553554, "grad_norm": 1.9502668380737305, "learning_rate": 2.8774214239897373e-06, "loss": 0.9053, "step": 37195 }, { "epoch": 0.45336550765968336, "grad_norm": 2.0765435695648193, "learning_rate": 2.8771007055805008e-06, "loss": 0.7846, "step": 37200 }, { "epoch": 0.4534264438838312, "grad_norm": 1.9147334098815918, "learning_rate": 2.8767799871712638e-06, "loss": 0.8274, "step": 37205 }, { "epoch": 0.453487380107979, "grad_norm": 1.958251953125, "learning_rate": 2.8764592687620272e-06, "loss": 0.9326, "step": 37210 }, { "epoch": 0.4535483163321268, "grad_norm": 1.8929380178451538, "learning_rate": 2.8761385503527907e-06, "loss": 0.8386, "step": 37215 }, { "epoch": 0.4536092525562746, "grad_norm": 2.0233426094055176, "learning_rate": 2.8758178319435537e-06, "loss": 0.8546, "step": 37220 }, { "epoch": 0.4536701887804224, "grad_norm": 1.9647186994552612, "learning_rate": 2.8754971135343175e-06, "loss": 0.7949, "step": 37225 }, { "epoch": 0.4537311250045702, "grad_norm": 1.9234130382537842, "learning_rate": 2.8751763951250806e-06, "loss": 0.8218, "step": 37230 }, { "epoch": 0.453792061228718, "grad_norm": 2.6583411693573, "learning_rate": 2.8748556767158436e-06, "loss": 0.842, "step": 37235 }, { "epoch": 0.45385299745286584, "grad_norm": 1.8830288648605347, "learning_rate": 2.8745349583066066e-06, "loss": 0.8318, "step": 37240 }, { "epoch": 0.45391393367701366, "grad_norm": 2.1859138011932373, "learning_rate": 2.8742142398973705e-06, "loss": 0.8217, "step": 37245 }, { "epoch": 0.4539748699011614, "grad_norm": 1.7621991634368896, "learning_rate": 2.8738935214881335e-06, "loss": 0.8493, "step": 37250 }, { "epoch": 0.45403580612530925, "grad_norm": 1.908872365951538, "learning_rate": 2.873572803078897e-06, "loss": 0.8219, "step": 37255 }, { "epoch": 0.45409674234945707, "grad_norm": 1.9043118953704834, "learning_rate": 2.8732520846696603e-06, "loss": 0.8127, "step": 37260 }, { "epoch": 0.45415767857360484, "grad_norm": 1.8280950784683228, "learning_rate": 2.8729313662604234e-06, "loss": 0.8593, "step": 37265 }, { "epoch": 0.45421861479775266, "grad_norm": 1.8338979482650757, "learning_rate": 2.872610647851187e-06, "loss": 0.859, "step": 37270 }, { "epoch": 0.4542795510219005, "grad_norm": 1.9119104146957397, "learning_rate": 2.8722899294419502e-06, "loss": 0.8081, "step": 37275 }, { "epoch": 0.4543404872460483, "grad_norm": 2.120821475982666, "learning_rate": 2.8719692110327137e-06, "loss": 0.826, "step": 37280 }, { "epoch": 0.4544014234701961, "grad_norm": 1.8206367492675781, "learning_rate": 2.8716484926234767e-06, "loss": 0.8701, "step": 37285 }, { "epoch": 0.4544623596943439, "grad_norm": 1.8388718366622925, "learning_rate": 2.8713277742142406e-06, "loss": 0.8754, "step": 37290 }, { "epoch": 0.4545232959184917, "grad_norm": 1.967469334602356, "learning_rate": 2.8710070558050036e-06, "loss": 0.7851, "step": 37295 }, { "epoch": 0.4545842321426395, "grad_norm": 2.2021420001983643, "learning_rate": 2.8706863373957666e-06, "loss": 0.885, "step": 37300 }, { "epoch": 0.4546451683667873, "grad_norm": 1.8786113262176514, "learning_rate": 2.8703656189865305e-06, "loss": 0.8559, "step": 37305 }, { "epoch": 0.45470610459093513, "grad_norm": 1.9762600660324097, "learning_rate": 2.8700449005772935e-06, "loss": 0.8316, "step": 37310 }, { "epoch": 0.45476704081508296, "grad_norm": 1.7132632732391357, "learning_rate": 2.8697241821680565e-06, "loss": 0.8899, "step": 37315 }, { "epoch": 0.4548279770392307, "grad_norm": 1.9067912101745605, "learning_rate": 2.86940346375882e-06, "loss": 0.8591, "step": 37320 }, { "epoch": 0.45488891326337855, "grad_norm": 2.143535852432251, "learning_rate": 2.8690827453495834e-06, "loss": 0.8755, "step": 37325 }, { "epoch": 0.45494984948752637, "grad_norm": 1.7860829830169678, "learning_rate": 2.8687620269403464e-06, "loss": 0.8654, "step": 37330 }, { "epoch": 0.45501078571167414, "grad_norm": 1.776322841644287, "learning_rate": 2.86844130853111e-06, "loss": 0.825, "step": 37335 }, { "epoch": 0.45507172193582196, "grad_norm": 1.9133864641189575, "learning_rate": 2.8681205901218733e-06, "loss": 0.8668, "step": 37340 }, { "epoch": 0.4551326581599698, "grad_norm": 1.8185678720474243, "learning_rate": 2.8677998717126367e-06, "loss": 0.8535, "step": 37345 }, { "epoch": 0.4551935943841176, "grad_norm": 1.9878594875335693, "learning_rate": 2.8674791533033997e-06, "loss": 0.8165, "step": 37350 }, { "epoch": 0.4552545306082654, "grad_norm": 2.1296677589416504, "learning_rate": 2.867158434894163e-06, "loss": 0.9039, "step": 37355 }, { "epoch": 0.4553154668324132, "grad_norm": 2.302191734313965, "learning_rate": 2.8668377164849266e-06, "loss": 0.8785, "step": 37360 }, { "epoch": 0.455376403056561, "grad_norm": 1.81536865234375, "learning_rate": 2.8665169980756896e-06, "loss": 0.8672, "step": 37365 }, { "epoch": 0.4554373392807088, "grad_norm": 1.962745189666748, "learning_rate": 2.8661962796664535e-06, "loss": 0.8158, "step": 37370 }, { "epoch": 0.4554982755048566, "grad_norm": 1.9259828329086304, "learning_rate": 2.8658755612572165e-06, "loss": 0.8794, "step": 37375 }, { "epoch": 0.45555921172900443, "grad_norm": 1.750797152519226, "learning_rate": 2.8655548428479795e-06, "loss": 0.7502, "step": 37380 }, { "epoch": 0.45562014795315225, "grad_norm": 2.527700901031494, "learning_rate": 2.8652341244387434e-06, "loss": 0.9496, "step": 37385 }, { "epoch": 0.4556810841773, "grad_norm": 1.779285192489624, "learning_rate": 2.8649134060295064e-06, "loss": 0.8713, "step": 37390 }, { "epoch": 0.45574202040144784, "grad_norm": 2.0858044624328613, "learning_rate": 2.8645926876202694e-06, "loss": 0.7888, "step": 37395 }, { "epoch": 0.45580295662559567, "grad_norm": 2.052391529083252, "learning_rate": 2.864271969211033e-06, "loss": 0.8921, "step": 37400 }, { "epoch": 0.45586389284974344, "grad_norm": 1.9366521835327148, "learning_rate": 2.8639512508017963e-06, "loss": 0.8038, "step": 37405 }, { "epoch": 0.45592482907389126, "grad_norm": 1.8932278156280518, "learning_rate": 2.8636305323925593e-06, "loss": 0.8494, "step": 37410 }, { "epoch": 0.4559857652980391, "grad_norm": 1.979375958442688, "learning_rate": 2.8633098139833228e-06, "loss": 0.8279, "step": 37415 }, { "epoch": 0.4560467015221869, "grad_norm": 1.9121119976043701, "learning_rate": 2.862989095574086e-06, "loss": 0.8785, "step": 37420 }, { "epoch": 0.45610763774633467, "grad_norm": 2.4782888889312744, "learning_rate": 2.8626683771648496e-06, "loss": 0.7848, "step": 37425 }, { "epoch": 0.4561685739704825, "grad_norm": 1.815511703491211, "learning_rate": 2.8623476587556127e-06, "loss": 0.8465, "step": 37430 }, { "epoch": 0.4562295101946303, "grad_norm": 1.8194001913070679, "learning_rate": 2.862026940346376e-06, "loss": 0.851, "step": 37435 }, { "epoch": 0.4562904464187781, "grad_norm": 2.3469808101654053, "learning_rate": 2.8617062219371395e-06, "loss": 0.7756, "step": 37440 }, { "epoch": 0.4563513826429259, "grad_norm": 1.923041582107544, "learning_rate": 2.8613855035279026e-06, "loss": 0.7742, "step": 37445 }, { "epoch": 0.45641231886707373, "grad_norm": 1.7788690328598022, "learning_rate": 2.8610647851186664e-06, "loss": 0.7926, "step": 37450 }, { "epoch": 0.45647325509122155, "grad_norm": 2.1651551723480225, "learning_rate": 2.8607440667094294e-06, "loss": 0.8276, "step": 37455 }, { "epoch": 0.4565341913153693, "grad_norm": 2.016921043395996, "learning_rate": 2.8604233483001925e-06, "loss": 0.8382, "step": 37460 }, { "epoch": 0.45659512753951714, "grad_norm": 1.7909126281738281, "learning_rate": 2.8601026298909563e-06, "loss": 0.8538, "step": 37465 }, { "epoch": 0.45665606376366497, "grad_norm": 1.9352390766143799, "learning_rate": 2.8597819114817193e-06, "loss": 0.8086, "step": 37470 }, { "epoch": 0.45671699998781273, "grad_norm": 2.0052123069763184, "learning_rate": 2.8594611930724824e-06, "loss": 0.8137, "step": 37475 }, { "epoch": 0.45677793621196056, "grad_norm": 2.1236441135406494, "learning_rate": 2.8591404746632462e-06, "loss": 0.8403, "step": 37480 }, { "epoch": 0.4568388724361084, "grad_norm": 1.9800087213516235, "learning_rate": 2.8588197562540092e-06, "loss": 0.8673, "step": 37485 }, { "epoch": 0.4568998086602562, "grad_norm": 1.7367631196975708, "learning_rate": 2.8584990378447723e-06, "loss": 0.7191, "step": 37490 }, { "epoch": 0.45696074488440397, "grad_norm": 1.6798574924468994, "learning_rate": 2.8581783194355357e-06, "loss": 0.7956, "step": 37495 }, { "epoch": 0.4570216811085518, "grad_norm": 1.6090500354766846, "learning_rate": 2.857857601026299e-06, "loss": 0.8257, "step": 37500 }, { "epoch": 0.4570826173326996, "grad_norm": 2.0278995037078857, "learning_rate": 2.8575368826170626e-06, "loss": 0.8226, "step": 37505 }, { "epoch": 0.4571435535568474, "grad_norm": 1.6738520860671997, "learning_rate": 2.8572161642078256e-06, "loss": 0.8885, "step": 37510 }, { "epoch": 0.4572044897809952, "grad_norm": 1.8544601202011108, "learning_rate": 2.856895445798589e-06, "loss": 0.8226, "step": 37515 }, { "epoch": 0.457265426005143, "grad_norm": 1.9736881256103516, "learning_rate": 2.8565747273893525e-06, "loss": 0.9328, "step": 37520 }, { "epoch": 0.45732636222929085, "grad_norm": 1.733001708984375, "learning_rate": 2.8562540089801155e-06, "loss": 0.8143, "step": 37525 }, { "epoch": 0.4573872984534386, "grad_norm": 2.1862850189208984, "learning_rate": 2.8559332905708793e-06, "loss": 0.8325, "step": 37530 }, { "epoch": 0.45744823467758644, "grad_norm": 1.729525089263916, "learning_rate": 2.8556125721616424e-06, "loss": 0.8324, "step": 37535 }, { "epoch": 0.45750917090173426, "grad_norm": 1.8489443063735962, "learning_rate": 2.8552918537524054e-06, "loss": 0.8603, "step": 37540 }, { "epoch": 0.45757010712588203, "grad_norm": 1.6011887788772583, "learning_rate": 2.8549711353431692e-06, "loss": 0.7858, "step": 37545 }, { "epoch": 0.45763104335002985, "grad_norm": 2.5258262157440186, "learning_rate": 2.8546504169339323e-06, "loss": 0.8071, "step": 37550 }, { "epoch": 0.4576919795741777, "grad_norm": 2.00502872467041, "learning_rate": 2.8543296985246953e-06, "loss": 0.8902, "step": 37555 }, { "epoch": 0.4577529157983255, "grad_norm": 2.040804862976074, "learning_rate": 2.854008980115459e-06, "loss": 0.8515, "step": 37560 }, { "epoch": 0.45781385202247327, "grad_norm": 2.7665603160858154, "learning_rate": 2.853688261706222e-06, "loss": 0.8524, "step": 37565 }, { "epoch": 0.4578747882466211, "grad_norm": 1.710179328918457, "learning_rate": 2.8533675432969856e-06, "loss": 0.7976, "step": 37570 }, { "epoch": 0.4579357244707689, "grad_norm": 1.8938955068588257, "learning_rate": 2.8530468248877486e-06, "loss": 0.8049, "step": 37575 }, { "epoch": 0.4579966606949167, "grad_norm": 1.7716718912124634, "learning_rate": 2.852726106478512e-06, "loss": 0.8336, "step": 37580 }, { "epoch": 0.4580575969190645, "grad_norm": 2.021212339401245, "learning_rate": 2.8524053880692755e-06, "loss": 0.8877, "step": 37585 }, { "epoch": 0.4581185331432123, "grad_norm": 1.8848943710327148, "learning_rate": 2.8520846696600385e-06, "loss": 0.815, "step": 37590 }, { "epoch": 0.45817946936736015, "grad_norm": 2.5715017318725586, "learning_rate": 2.8517639512508024e-06, "loss": 0.7943, "step": 37595 }, { "epoch": 0.4582404055915079, "grad_norm": 2.272944211959839, "learning_rate": 2.8514432328415654e-06, "loss": 0.8358, "step": 37600 }, { "epoch": 0.45830134181565574, "grad_norm": 2.052436590194702, "learning_rate": 2.8511225144323284e-06, "loss": 0.8787, "step": 37605 }, { "epoch": 0.45836227803980356, "grad_norm": 2.0079076290130615, "learning_rate": 2.8508017960230923e-06, "loss": 0.8217, "step": 37610 }, { "epoch": 0.45842321426395133, "grad_norm": 2.033374786376953, "learning_rate": 2.8504810776138553e-06, "loss": 0.8413, "step": 37615 }, { "epoch": 0.45848415048809915, "grad_norm": 1.9227430820465088, "learning_rate": 2.8501603592046183e-06, "loss": 0.8034, "step": 37620 }, { "epoch": 0.458545086712247, "grad_norm": 1.8894094228744507, "learning_rate": 2.849839640795382e-06, "loss": 0.9152, "step": 37625 }, { "epoch": 0.45860602293639474, "grad_norm": 1.893770456314087, "learning_rate": 2.849518922386145e-06, "loss": 0.8318, "step": 37630 }, { "epoch": 0.45866695916054256, "grad_norm": 2.0312938690185547, "learning_rate": 2.849198203976908e-06, "loss": 0.8722, "step": 37635 }, { "epoch": 0.4587278953846904, "grad_norm": 2.032888174057007, "learning_rate": 2.848877485567672e-06, "loss": 0.8433, "step": 37640 }, { "epoch": 0.4587888316088382, "grad_norm": 1.7894561290740967, "learning_rate": 2.848556767158435e-06, "loss": 0.8553, "step": 37645 }, { "epoch": 0.458849767832986, "grad_norm": 2.7191877365112305, "learning_rate": 2.8482360487491985e-06, "loss": 0.7865, "step": 37650 }, { "epoch": 0.4589107040571338, "grad_norm": 1.7663975954055786, "learning_rate": 2.8479153303399615e-06, "loss": 0.8175, "step": 37655 }, { "epoch": 0.4589716402812816, "grad_norm": 2.414001941680908, "learning_rate": 2.847594611930725e-06, "loss": 0.8747, "step": 37660 }, { "epoch": 0.4590325765054294, "grad_norm": 2.0413153171539307, "learning_rate": 2.8472738935214884e-06, "loss": 0.8579, "step": 37665 }, { "epoch": 0.4590935127295772, "grad_norm": 1.9150457382202148, "learning_rate": 2.8469531751122514e-06, "loss": 0.7606, "step": 37670 }, { "epoch": 0.45915444895372504, "grad_norm": 1.5544227361679077, "learning_rate": 2.8466324567030153e-06, "loss": 0.7998, "step": 37675 }, { "epoch": 0.45921538517787286, "grad_norm": 2.2114059925079346, "learning_rate": 2.8463117382937783e-06, "loss": 0.845, "step": 37680 }, { "epoch": 0.4592763214020206, "grad_norm": 2.133220911026001, "learning_rate": 2.8459910198845413e-06, "loss": 0.865, "step": 37685 }, { "epoch": 0.45933725762616845, "grad_norm": 2.373133420944214, "learning_rate": 2.845670301475305e-06, "loss": 0.8368, "step": 37690 }, { "epoch": 0.45939819385031627, "grad_norm": 2.1985716819763184, "learning_rate": 2.8453495830660682e-06, "loss": 0.8773, "step": 37695 }, { "epoch": 0.45945913007446404, "grad_norm": 1.8834537267684937, "learning_rate": 2.8450288646568312e-06, "loss": 0.8313, "step": 37700 }, { "epoch": 0.45952006629861186, "grad_norm": 1.9720823764801025, "learning_rate": 2.844708146247595e-06, "loss": 0.8641, "step": 37705 }, { "epoch": 0.4595810025227597, "grad_norm": 2.17118501663208, "learning_rate": 2.844387427838358e-06, "loss": 0.9475, "step": 37710 }, { "epoch": 0.4596419387469075, "grad_norm": 1.7708557844161987, "learning_rate": 2.844066709429121e-06, "loss": 0.9212, "step": 37715 }, { "epoch": 0.4597028749710553, "grad_norm": 1.790793776512146, "learning_rate": 2.843745991019885e-06, "loss": 0.8332, "step": 37720 }, { "epoch": 0.4597638111952031, "grad_norm": 1.918790340423584, "learning_rate": 2.843425272610648e-06, "loss": 0.8414, "step": 37725 }, { "epoch": 0.4598247474193509, "grad_norm": 1.9523751735687256, "learning_rate": 2.8431045542014115e-06, "loss": 0.8344, "step": 37730 }, { "epoch": 0.4598856836434987, "grad_norm": 2.125908613204956, "learning_rate": 2.8427838357921745e-06, "loss": 0.8377, "step": 37735 }, { "epoch": 0.4599466198676465, "grad_norm": 1.9392274618148804, "learning_rate": 2.842463117382938e-06, "loss": 0.8003, "step": 37740 }, { "epoch": 0.46000755609179433, "grad_norm": 1.921766996383667, "learning_rate": 2.8421423989737014e-06, "loss": 0.8304, "step": 37745 }, { "epoch": 0.46006849231594216, "grad_norm": 2.168130874633789, "learning_rate": 2.8418216805644644e-06, "loss": 0.838, "step": 37750 }, { "epoch": 0.4601294285400899, "grad_norm": 1.9588830471038818, "learning_rate": 2.8415009621552282e-06, "loss": 0.7441, "step": 37755 }, { "epoch": 0.46019036476423775, "grad_norm": 1.940536379814148, "learning_rate": 2.8411802437459912e-06, "loss": 0.8554, "step": 37760 }, { "epoch": 0.46025130098838557, "grad_norm": 2.096050977706909, "learning_rate": 2.8408595253367543e-06, "loss": 0.8255, "step": 37765 }, { "epoch": 0.46031223721253334, "grad_norm": 2.0608808994293213, "learning_rate": 2.840538806927518e-06, "loss": 0.8745, "step": 37770 }, { "epoch": 0.46037317343668116, "grad_norm": 1.9851466417312622, "learning_rate": 2.840218088518281e-06, "loss": 0.9145, "step": 37775 }, { "epoch": 0.460434109660829, "grad_norm": 1.8882813453674316, "learning_rate": 2.839897370109044e-06, "loss": 0.9107, "step": 37780 }, { "epoch": 0.4604950458849768, "grad_norm": 2.0713207721710205, "learning_rate": 2.839576651699808e-06, "loss": 0.8561, "step": 37785 }, { "epoch": 0.4605559821091246, "grad_norm": 1.7847850322723389, "learning_rate": 2.839255933290571e-06, "loss": 0.803, "step": 37790 }, { "epoch": 0.4606169183332724, "grad_norm": 1.8364310264587402, "learning_rate": 2.8389352148813345e-06, "loss": 0.794, "step": 37795 }, { "epoch": 0.4606778545574202, "grad_norm": 2.03847599029541, "learning_rate": 2.838614496472098e-06, "loss": 0.8496, "step": 37800 }, { "epoch": 0.460738790781568, "grad_norm": 1.7957342863082886, "learning_rate": 2.838293778062861e-06, "loss": 0.8945, "step": 37805 }, { "epoch": 0.4607997270057158, "grad_norm": 1.878063678741455, "learning_rate": 2.8379730596536244e-06, "loss": 0.8102, "step": 37810 }, { "epoch": 0.46086066322986363, "grad_norm": 1.9468305110931396, "learning_rate": 2.837652341244388e-06, "loss": 0.8451, "step": 37815 }, { "epoch": 0.46092159945401145, "grad_norm": 2.044001340866089, "learning_rate": 2.8373316228351513e-06, "loss": 0.8257, "step": 37820 }, { "epoch": 0.4609825356781592, "grad_norm": 1.718163013458252, "learning_rate": 2.8370109044259143e-06, "loss": 0.8149, "step": 37825 }, { "epoch": 0.46104347190230704, "grad_norm": 1.875351905822754, "learning_rate": 2.8366901860166773e-06, "loss": 0.8955, "step": 37830 }, { "epoch": 0.46110440812645487, "grad_norm": 2.43188214302063, "learning_rate": 2.836369467607441e-06, "loss": 0.8514, "step": 37835 }, { "epoch": 0.46116534435060264, "grad_norm": 1.736883521080017, "learning_rate": 2.836048749198204e-06, "loss": 0.88, "step": 37840 }, { "epoch": 0.46122628057475046, "grad_norm": 2.2348480224609375, "learning_rate": 2.835728030788967e-06, "loss": 0.8232, "step": 37845 }, { "epoch": 0.4612872167988983, "grad_norm": 2.3509278297424316, "learning_rate": 2.835407312379731e-06, "loss": 0.8227, "step": 37850 }, { "epoch": 0.4613481530230461, "grad_norm": 2.0117132663726807, "learning_rate": 2.835086593970494e-06, "loss": 0.8322, "step": 37855 }, { "epoch": 0.46140908924719387, "grad_norm": 1.830902338027954, "learning_rate": 2.834765875561257e-06, "loss": 0.8514, "step": 37860 }, { "epoch": 0.4614700254713417, "grad_norm": 2.0685553550720215, "learning_rate": 2.834445157152021e-06, "loss": 0.7649, "step": 37865 }, { "epoch": 0.4615309616954895, "grad_norm": 2.232849597930908, "learning_rate": 2.834124438742784e-06, "loss": 0.9194, "step": 37870 }, { "epoch": 0.4615918979196373, "grad_norm": 1.765592098236084, "learning_rate": 2.8338037203335474e-06, "loss": 0.7925, "step": 37875 }, { "epoch": 0.4616528341437851, "grad_norm": 1.942356824874878, "learning_rate": 2.833483001924311e-06, "loss": 0.8231, "step": 37880 }, { "epoch": 0.46171377036793293, "grad_norm": 1.8142908811569214, "learning_rate": 2.833162283515074e-06, "loss": 0.969, "step": 37885 }, { "epoch": 0.46177470659208075, "grad_norm": 1.8384567499160767, "learning_rate": 2.8328415651058373e-06, "loss": 0.9024, "step": 37890 }, { "epoch": 0.4618356428162285, "grad_norm": 1.8436514139175415, "learning_rate": 2.8325208466966007e-06, "loss": 0.8087, "step": 37895 }, { "epoch": 0.46189657904037634, "grad_norm": 2.045100450515747, "learning_rate": 2.832200128287364e-06, "loss": 0.8263, "step": 37900 }, { "epoch": 0.46195751526452417, "grad_norm": 1.75651216506958, "learning_rate": 2.831879409878127e-06, "loss": 0.8607, "step": 37905 }, { "epoch": 0.46201845148867193, "grad_norm": 1.6773205995559692, "learning_rate": 2.8315586914688902e-06, "loss": 0.8428, "step": 37910 }, { "epoch": 0.46207938771281976, "grad_norm": 2.2728629112243652, "learning_rate": 2.831237973059654e-06, "loss": 0.827, "step": 37915 }, { "epoch": 0.4621403239369676, "grad_norm": 1.9939111471176147, "learning_rate": 2.830917254650417e-06, "loss": 0.85, "step": 37920 }, { "epoch": 0.4622012601611154, "grad_norm": 2.076258420944214, "learning_rate": 2.83059653624118e-06, "loss": 0.8517, "step": 37925 }, { "epoch": 0.46226219638526317, "grad_norm": 1.6503469944000244, "learning_rate": 2.830275817831944e-06, "loss": 0.8888, "step": 37930 }, { "epoch": 0.462323132609411, "grad_norm": 2.097561836242676, "learning_rate": 2.829955099422707e-06, "loss": 0.8568, "step": 37935 }, { "epoch": 0.4623840688335588, "grad_norm": 1.9214997291564941, "learning_rate": 2.82963438101347e-06, "loss": 0.8938, "step": 37940 }, { "epoch": 0.4624450050577066, "grad_norm": 1.9459192752838135, "learning_rate": 2.829313662604234e-06, "loss": 0.8679, "step": 37945 }, { "epoch": 0.4625059412818544, "grad_norm": 1.987894892692566, "learning_rate": 2.828992944194997e-06, "loss": 0.8768, "step": 37950 }, { "epoch": 0.4625668775060022, "grad_norm": 1.8716777563095093, "learning_rate": 2.8286722257857603e-06, "loss": 0.858, "step": 37955 }, { "epoch": 0.46262781373015005, "grad_norm": 2.1731090545654297, "learning_rate": 2.8283515073765238e-06, "loss": 0.8664, "step": 37960 }, { "epoch": 0.4626887499542978, "grad_norm": 1.9094352722167969, "learning_rate": 2.828030788967287e-06, "loss": 0.8116, "step": 37965 }, { "epoch": 0.46274968617844564, "grad_norm": 1.9856371879577637, "learning_rate": 2.8277100705580502e-06, "loss": 0.8249, "step": 37970 }, { "epoch": 0.46281062240259346, "grad_norm": 2.084149122238159, "learning_rate": 2.8273893521488137e-06, "loss": 0.8326, "step": 37975 }, { "epoch": 0.46287155862674123, "grad_norm": 1.742214560508728, "learning_rate": 2.827068633739577e-06, "loss": 0.8292, "step": 37980 }, { "epoch": 0.46293249485088905, "grad_norm": 2.008739709854126, "learning_rate": 2.82674791533034e-06, "loss": 0.8735, "step": 37985 }, { "epoch": 0.4629934310750369, "grad_norm": 2.104567050933838, "learning_rate": 2.826427196921103e-06, "loss": 0.8462, "step": 37990 }, { "epoch": 0.4630543672991847, "grad_norm": 2.180767059326172, "learning_rate": 2.826106478511867e-06, "loss": 0.8713, "step": 37995 }, { "epoch": 0.46311530352333247, "grad_norm": 1.9965664148330688, "learning_rate": 2.82578576010263e-06, "loss": 0.8765, "step": 38000 }, { "epoch": 0.4631762397474803, "grad_norm": 2.0106077194213867, "learning_rate": 2.825465041693393e-06, "loss": 0.8589, "step": 38005 }, { "epoch": 0.4632371759716281, "grad_norm": 2.2468109130859375, "learning_rate": 2.825144323284157e-06, "loss": 0.7892, "step": 38010 }, { "epoch": 0.4632981121957759, "grad_norm": 2.303248405456543, "learning_rate": 2.82482360487492e-06, "loss": 0.9106, "step": 38015 }, { "epoch": 0.4633590484199237, "grad_norm": 2.0445544719696045, "learning_rate": 2.8245028864656834e-06, "loss": 0.8747, "step": 38020 }, { "epoch": 0.4634199846440715, "grad_norm": 1.6266206502914429, "learning_rate": 2.824182168056447e-06, "loss": 0.8361, "step": 38025 }, { "epoch": 0.46348092086821935, "grad_norm": 1.9532593488693237, "learning_rate": 2.82386144964721e-06, "loss": 0.8944, "step": 38030 }, { "epoch": 0.4635418570923671, "grad_norm": 1.8488082885742188, "learning_rate": 2.8235407312379733e-06, "loss": 0.866, "step": 38035 }, { "epoch": 0.46360279331651494, "grad_norm": 2.312127113342285, "learning_rate": 2.8232200128287367e-06, "loss": 0.8817, "step": 38040 }, { "epoch": 0.46366372954066276, "grad_norm": 1.9917433261871338, "learning_rate": 2.8228992944195e-06, "loss": 0.8355, "step": 38045 }, { "epoch": 0.46372466576481053, "grad_norm": 1.9671053886413574, "learning_rate": 2.822578576010263e-06, "loss": 0.7923, "step": 38050 }, { "epoch": 0.46378560198895835, "grad_norm": 2.1322524547576904, "learning_rate": 2.8222578576010266e-06, "loss": 0.8854, "step": 38055 }, { "epoch": 0.4638465382131062, "grad_norm": 2.4324374198913574, "learning_rate": 2.82193713919179e-06, "loss": 0.8786, "step": 38060 }, { "epoch": 0.463907474437254, "grad_norm": 2.0758745670318604, "learning_rate": 2.821616420782553e-06, "loss": 0.9163, "step": 38065 }, { "epoch": 0.46396841066140176, "grad_norm": 1.8051600456237793, "learning_rate": 2.821295702373317e-06, "loss": 0.8424, "step": 38070 }, { "epoch": 0.4640293468855496, "grad_norm": 3.048596143722534, "learning_rate": 2.82097498396408e-06, "loss": 0.8102, "step": 38075 }, { "epoch": 0.4640902831096974, "grad_norm": 2.1766836643218994, "learning_rate": 2.820654265554843e-06, "loss": 0.8641, "step": 38080 }, { "epoch": 0.4641512193338452, "grad_norm": 1.9028782844543457, "learning_rate": 2.820333547145606e-06, "loss": 0.8644, "step": 38085 }, { "epoch": 0.464212155557993, "grad_norm": 1.8930425643920898, "learning_rate": 2.82001282873637e-06, "loss": 0.8379, "step": 38090 }, { "epoch": 0.4642730917821408, "grad_norm": 1.7295852899551392, "learning_rate": 2.819692110327133e-06, "loss": 0.8159, "step": 38095 }, { "epoch": 0.46433402800628865, "grad_norm": 1.8536598682403564, "learning_rate": 2.8193713919178963e-06, "loss": 0.8927, "step": 38100 }, { "epoch": 0.4643949642304364, "grad_norm": 1.9912666082382202, "learning_rate": 2.8190506735086597e-06, "loss": 0.8822, "step": 38105 }, { "epoch": 0.46445590045458424, "grad_norm": 2.072406530380249, "learning_rate": 2.8187299550994228e-06, "loss": 0.8241, "step": 38110 }, { "epoch": 0.46451683667873206, "grad_norm": 1.943324327468872, "learning_rate": 2.818409236690186e-06, "loss": 0.8441, "step": 38115 }, { "epoch": 0.4645777729028798, "grad_norm": 1.9759151935577393, "learning_rate": 2.8180885182809496e-06, "loss": 0.8044, "step": 38120 }, { "epoch": 0.46463870912702765, "grad_norm": 2.0864124298095703, "learning_rate": 2.817767799871713e-06, "loss": 0.8815, "step": 38125 }, { "epoch": 0.46469964535117547, "grad_norm": 1.8373748064041138, "learning_rate": 2.817447081462476e-06, "loss": 0.8023, "step": 38130 }, { "epoch": 0.46476058157532324, "grad_norm": 2.0265283584594727, "learning_rate": 2.8171263630532395e-06, "loss": 0.8231, "step": 38135 }, { "epoch": 0.46482151779947106, "grad_norm": 1.8680710792541504, "learning_rate": 2.816805644644003e-06, "loss": 0.8813, "step": 38140 }, { "epoch": 0.4648824540236189, "grad_norm": 1.7224711179733276, "learning_rate": 2.816484926234766e-06, "loss": 0.8035, "step": 38145 }, { "epoch": 0.4649433902477667, "grad_norm": 1.7919656038284302, "learning_rate": 2.81616420782553e-06, "loss": 0.8826, "step": 38150 }, { "epoch": 0.4650043264719145, "grad_norm": 1.8323081731796265, "learning_rate": 2.815843489416293e-06, "loss": 0.8189, "step": 38155 }, { "epoch": 0.4650652626960623, "grad_norm": 2.25958251953125, "learning_rate": 2.815522771007056e-06, "loss": 0.7856, "step": 38160 }, { "epoch": 0.4651261989202101, "grad_norm": 1.6250941753387451, "learning_rate": 2.815202052597819e-06, "loss": 0.8105, "step": 38165 }, { "epoch": 0.4651871351443579, "grad_norm": 2.3652079105377197, "learning_rate": 2.8148813341885828e-06, "loss": 0.8257, "step": 38170 }, { "epoch": 0.4652480713685057, "grad_norm": 1.802641749382019, "learning_rate": 2.8145606157793458e-06, "loss": 0.7879, "step": 38175 }, { "epoch": 0.46530900759265353, "grad_norm": 1.635651707649231, "learning_rate": 2.8142398973701092e-06, "loss": 0.8209, "step": 38180 }, { "epoch": 0.46536994381680136, "grad_norm": 1.8639756441116333, "learning_rate": 2.8139191789608727e-06, "loss": 0.866, "step": 38185 }, { "epoch": 0.4654308800409491, "grad_norm": 1.8879363536834717, "learning_rate": 2.8135984605516357e-06, "loss": 0.8254, "step": 38190 }, { "epoch": 0.46549181626509695, "grad_norm": 1.7434333562850952, "learning_rate": 2.813277742142399e-06, "loss": 0.8782, "step": 38195 }, { "epoch": 0.46555275248924477, "grad_norm": 1.7027686834335327, "learning_rate": 2.8129570237331626e-06, "loss": 0.8787, "step": 38200 }, { "epoch": 0.46561368871339254, "grad_norm": 2.0181996822357178, "learning_rate": 2.812636305323926e-06, "loss": 0.8784, "step": 38205 }, { "epoch": 0.46567462493754036, "grad_norm": 2.0460684299468994, "learning_rate": 2.812315586914689e-06, "loss": 0.7785, "step": 38210 }, { "epoch": 0.4657355611616882, "grad_norm": 2.0462663173675537, "learning_rate": 2.8119948685054525e-06, "loss": 0.8714, "step": 38215 }, { "epoch": 0.465796497385836, "grad_norm": 1.8959016799926758, "learning_rate": 2.811674150096216e-06, "loss": 0.8816, "step": 38220 }, { "epoch": 0.4658574336099838, "grad_norm": 2.082315683364868, "learning_rate": 2.811353431686979e-06, "loss": 0.8286, "step": 38225 }, { "epoch": 0.4659183698341316, "grad_norm": 2.0336852073669434, "learning_rate": 2.8110327132777428e-06, "loss": 0.8456, "step": 38230 }, { "epoch": 0.4659793060582794, "grad_norm": 2.7688639163970947, "learning_rate": 2.810711994868506e-06, "loss": 0.8343, "step": 38235 }, { "epoch": 0.4660402422824272, "grad_norm": 1.8977512121200562, "learning_rate": 2.810391276459269e-06, "loss": 0.8484, "step": 38240 }, { "epoch": 0.466101178506575, "grad_norm": 1.942737102508545, "learning_rate": 2.810070558050032e-06, "loss": 0.8632, "step": 38245 }, { "epoch": 0.46616211473072283, "grad_norm": 1.900473713874817, "learning_rate": 2.8097498396407957e-06, "loss": 0.8289, "step": 38250 }, { "epoch": 0.46622305095487065, "grad_norm": 1.9847854375839233, "learning_rate": 2.8094291212315587e-06, "loss": 0.8713, "step": 38255 }, { "epoch": 0.4662839871790184, "grad_norm": 1.6125868558883667, "learning_rate": 2.809108402822322e-06, "loss": 0.8458, "step": 38260 }, { "epoch": 0.46634492340316624, "grad_norm": 1.8714449405670166, "learning_rate": 2.8087876844130856e-06, "loss": 0.783, "step": 38265 }, { "epoch": 0.46640585962731407, "grad_norm": 1.9071794748306274, "learning_rate": 2.808466966003849e-06, "loss": 0.8245, "step": 38270 }, { "epoch": 0.46646679585146184, "grad_norm": 2.374051570892334, "learning_rate": 2.808146247594612e-06, "loss": 0.855, "step": 38275 }, { "epoch": 0.46652773207560966, "grad_norm": 1.8254637718200684, "learning_rate": 2.8078255291853755e-06, "loss": 0.8049, "step": 38280 }, { "epoch": 0.4665886682997575, "grad_norm": 2.679316759109497, "learning_rate": 2.807504810776139e-06, "loss": 0.8364, "step": 38285 }, { "epoch": 0.4666496045239053, "grad_norm": 2.731698513031006, "learning_rate": 2.807184092366902e-06, "loss": 0.9153, "step": 38290 }, { "epoch": 0.46671054074805307, "grad_norm": 2.1390719413757324, "learning_rate": 2.806863373957666e-06, "loss": 0.9099, "step": 38295 }, { "epoch": 0.4667714769722009, "grad_norm": 1.7610045671463013, "learning_rate": 2.806542655548429e-06, "loss": 0.819, "step": 38300 }, { "epoch": 0.4668324131963487, "grad_norm": 2.381093978881836, "learning_rate": 2.806221937139192e-06, "loss": 0.8741, "step": 38305 }, { "epoch": 0.4668933494204965, "grad_norm": 1.7828843593597412, "learning_rate": 2.8059012187299557e-06, "loss": 0.8166, "step": 38310 }, { "epoch": 0.4669542856446443, "grad_norm": 2.2653274536132812, "learning_rate": 2.8055805003207187e-06, "loss": 0.8379, "step": 38315 }, { "epoch": 0.46701522186879213, "grad_norm": 1.8579745292663574, "learning_rate": 2.8052597819114817e-06, "loss": 0.8329, "step": 38320 }, { "epoch": 0.46707615809293995, "grad_norm": 1.6749651432037354, "learning_rate": 2.804939063502245e-06, "loss": 0.9262, "step": 38325 }, { "epoch": 0.4671370943170877, "grad_norm": 1.7580771446228027, "learning_rate": 2.8046183450930086e-06, "loss": 0.8494, "step": 38330 }, { "epoch": 0.46719803054123554, "grad_norm": 1.644911527633667, "learning_rate": 2.8042976266837716e-06, "loss": 0.844, "step": 38335 }, { "epoch": 0.46725896676538337, "grad_norm": 1.9026240110397339, "learning_rate": 2.803976908274535e-06, "loss": 0.8473, "step": 38340 }, { "epoch": 0.46731990298953113, "grad_norm": 1.7171109914779663, "learning_rate": 2.8036561898652985e-06, "loss": 0.7762, "step": 38345 }, { "epoch": 0.46738083921367896, "grad_norm": 2.1615447998046875, "learning_rate": 2.803335471456062e-06, "loss": 0.8356, "step": 38350 }, { "epoch": 0.4674417754378268, "grad_norm": 1.9315162897109985, "learning_rate": 2.803014753046825e-06, "loss": 0.8431, "step": 38355 }, { "epoch": 0.4675027116619746, "grad_norm": 1.9227954149246216, "learning_rate": 2.8026940346375884e-06, "loss": 0.8328, "step": 38360 }, { "epoch": 0.46756364788612237, "grad_norm": 1.9237031936645508, "learning_rate": 2.802373316228352e-06, "loss": 0.8401, "step": 38365 }, { "epoch": 0.4676245841102702, "grad_norm": 1.7906156778335571, "learning_rate": 2.802052597819115e-06, "loss": 0.84, "step": 38370 }, { "epoch": 0.467685520334418, "grad_norm": 2.037292003631592, "learning_rate": 2.8017318794098787e-06, "loss": 0.8042, "step": 38375 }, { "epoch": 0.4677464565585658, "grad_norm": 2.565295696258545, "learning_rate": 2.8014111610006418e-06, "loss": 0.8325, "step": 38380 }, { "epoch": 0.4678073927827136, "grad_norm": 2.0966055393218994, "learning_rate": 2.8010904425914048e-06, "loss": 0.7811, "step": 38385 }, { "epoch": 0.4678683290068614, "grad_norm": 1.7934563159942627, "learning_rate": 2.8007697241821686e-06, "loss": 0.9249, "step": 38390 }, { "epoch": 0.46792926523100925, "grad_norm": 1.9211976528167725, "learning_rate": 2.8004490057729316e-06, "loss": 0.8714, "step": 38395 }, { "epoch": 0.467990201455157, "grad_norm": 2.0208277702331543, "learning_rate": 2.8001282873636947e-06, "loss": 0.7941, "step": 38400 }, { "epoch": 0.46805113767930484, "grad_norm": 1.6659339666366577, "learning_rate": 2.7998075689544585e-06, "loss": 0.822, "step": 38405 }, { "epoch": 0.46811207390345266, "grad_norm": 2.1628713607788086, "learning_rate": 2.7994868505452215e-06, "loss": 0.8063, "step": 38410 }, { "epoch": 0.46817301012760043, "grad_norm": 2.08990478515625, "learning_rate": 2.7991661321359846e-06, "loss": 0.8713, "step": 38415 }, { "epoch": 0.46823394635174825, "grad_norm": 1.786033272743225, "learning_rate": 2.798845413726748e-06, "loss": 0.8663, "step": 38420 }, { "epoch": 0.4682948825758961, "grad_norm": 1.5834252834320068, "learning_rate": 2.7985246953175114e-06, "loss": 0.7779, "step": 38425 }, { "epoch": 0.4683558188000439, "grad_norm": 2.115161895751953, "learning_rate": 2.798203976908275e-06, "loss": 0.892, "step": 38430 }, { "epoch": 0.46841675502419167, "grad_norm": 1.7797354459762573, "learning_rate": 2.797883258499038e-06, "loss": 0.8645, "step": 38435 }, { "epoch": 0.4684776912483395, "grad_norm": 1.681751012802124, "learning_rate": 2.7975625400898013e-06, "loss": 0.8182, "step": 38440 }, { "epoch": 0.4685386274724873, "grad_norm": 1.8492732048034668, "learning_rate": 2.7972418216805648e-06, "loss": 0.8174, "step": 38445 }, { "epoch": 0.4685995636966351, "grad_norm": 1.8471696376800537, "learning_rate": 2.796921103271328e-06, "loss": 0.8064, "step": 38450 }, { "epoch": 0.4686604999207829, "grad_norm": 2.0836434364318848, "learning_rate": 2.7966003848620917e-06, "loss": 0.8867, "step": 38455 }, { "epoch": 0.4687214361449307, "grad_norm": 1.895333170890808, "learning_rate": 2.7962796664528547e-06, "loss": 0.8723, "step": 38460 }, { "epoch": 0.46878237236907855, "grad_norm": 1.8538450002670288, "learning_rate": 2.7959589480436177e-06, "loss": 0.7999, "step": 38465 }, { "epoch": 0.4688433085932263, "grad_norm": 1.9086602926254272, "learning_rate": 2.7956382296343816e-06, "loss": 0.8008, "step": 38470 }, { "epoch": 0.46890424481737414, "grad_norm": 1.9668951034545898, "learning_rate": 2.7953175112251446e-06, "loss": 0.8584, "step": 38475 }, { "epoch": 0.46896518104152196, "grad_norm": 2.1374518871307373, "learning_rate": 2.7949967928159076e-06, "loss": 0.8931, "step": 38480 }, { "epoch": 0.46902611726566973, "grad_norm": 1.6751700639724731, "learning_rate": 2.7946760744066715e-06, "loss": 0.8422, "step": 38485 }, { "epoch": 0.46908705348981755, "grad_norm": 1.8570916652679443, "learning_rate": 2.7943553559974345e-06, "loss": 0.7842, "step": 38490 }, { "epoch": 0.4691479897139654, "grad_norm": 2.0252366065979004, "learning_rate": 2.794034637588198e-06, "loss": 0.8255, "step": 38495 }, { "epoch": 0.4692089259381132, "grad_norm": 2.0100860595703125, "learning_rate": 2.793713919178961e-06, "loss": 0.8321, "step": 38500 }, { "epoch": 0.46926986216226096, "grad_norm": 1.8403326272964478, "learning_rate": 2.7933932007697244e-06, "loss": 0.8086, "step": 38505 }, { "epoch": 0.4693307983864088, "grad_norm": 2.5964932441711426, "learning_rate": 2.793072482360488e-06, "loss": 0.8464, "step": 38510 }, { "epoch": 0.4693917346105566, "grad_norm": 1.9519550800323486, "learning_rate": 2.792751763951251e-06, "loss": 0.8563, "step": 38515 }, { "epoch": 0.4694526708347044, "grad_norm": 2.069039821624756, "learning_rate": 2.7924310455420147e-06, "loss": 0.87, "step": 38520 }, { "epoch": 0.4695136070588522, "grad_norm": 1.880873680114746, "learning_rate": 2.7921103271327777e-06, "loss": 0.8241, "step": 38525 }, { "epoch": 0.469574543283, "grad_norm": 1.7419319152832031, "learning_rate": 2.7917896087235407e-06, "loss": 0.8414, "step": 38530 }, { "epoch": 0.46963547950714785, "grad_norm": 1.9500255584716797, "learning_rate": 2.7914688903143046e-06, "loss": 0.8523, "step": 38535 }, { "epoch": 0.4696964157312956, "grad_norm": 1.89736807346344, "learning_rate": 2.7911481719050676e-06, "loss": 0.7912, "step": 38540 }, { "epoch": 0.46975735195544344, "grad_norm": 1.8736796379089355, "learning_rate": 2.7908274534958306e-06, "loss": 0.8785, "step": 38545 }, { "epoch": 0.46981828817959126, "grad_norm": 1.9041111469268799, "learning_rate": 2.7905067350865945e-06, "loss": 0.8158, "step": 38550 }, { "epoch": 0.469879224403739, "grad_norm": 2.516435384750366, "learning_rate": 2.7901860166773575e-06, "loss": 0.8745, "step": 38555 }, { "epoch": 0.46994016062788685, "grad_norm": 1.8856557607650757, "learning_rate": 2.7898652982681205e-06, "loss": 0.8318, "step": 38560 }, { "epoch": 0.47000109685203467, "grad_norm": 1.8537883758544922, "learning_rate": 2.7895445798588844e-06, "loss": 0.8241, "step": 38565 }, { "epoch": 0.4700620330761825, "grad_norm": 1.7493964433670044, "learning_rate": 2.7892238614496474e-06, "loss": 0.7694, "step": 38570 }, { "epoch": 0.47012296930033026, "grad_norm": 2.047844648361206, "learning_rate": 2.788903143040411e-06, "loss": 0.862, "step": 38575 }, { "epoch": 0.4701839055244781, "grad_norm": 1.944580316543579, "learning_rate": 2.788582424631174e-06, "loss": 0.8926, "step": 38580 }, { "epoch": 0.4702448417486259, "grad_norm": 1.750733494758606, "learning_rate": 2.7882617062219373e-06, "loss": 0.8438, "step": 38585 }, { "epoch": 0.4703057779727737, "grad_norm": 1.7035915851593018, "learning_rate": 2.7879409878127007e-06, "loss": 0.8655, "step": 38590 }, { "epoch": 0.4703667141969215, "grad_norm": 1.7943596839904785, "learning_rate": 2.7876202694034638e-06, "loss": 0.8988, "step": 38595 }, { "epoch": 0.4704276504210693, "grad_norm": 2.5624916553497314, "learning_rate": 2.7872995509942276e-06, "loss": 0.8339, "step": 38600 }, { "epoch": 0.4704885866452171, "grad_norm": 2.0522234439849854, "learning_rate": 2.7869788325849906e-06, "loss": 0.8427, "step": 38605 }, { "epoch": 0.4705495228693649, "grad_norm": 2.148109197616577, "learning_rate": 2.7866581141757537e-06, "loss": 0.8782, "step": 38610 }, { "epoch": 0.47061045909351273, "grad_norm": 1.812163233757019, "learning_rate": 2.7863373957665175e-06, "loss": 0.8646, "step": 38615 }, { "epoch": 0.47067139531766056, "grad_norm": 2.026250123977661, "learning_rate": 2.7860166773572805e-06, "loss": 0.8396, "step": 38620 }, { "epoch": 0.4707323315418083, "grad_norm": 1.7975915670394897, "learning_rate": 2.7856959589480436e-06, "loss": 0.8507, "step": 38625 }, { "epoch": 0.47079326776595615, "grad_norm": 2.070305585861206, "learning_rate": 2.7853752405388074e-06, "loss": 0.8258, "step": 38630 }, { "epoch": 0.47085420399010397, "grad_norm": 1.95391047000885, "learning_rate": 2.7850545221295704e-06, "loss": 0.862, "step": 38635 }, { "epoch": 0.47091514021425174, "grad_norm": 2.1957545280456543, "learning_rate": 2.7847338037203334e-06, "loss": 0.8477, "step": 38640 }, { "epoch": 0.47097607643839956, "grad_norm": 2.0723891258239746, "learning_rate": 2.7844130853110973e-06, "loss": 0.8653, "step": 38645 }, { "epoch": 0.4710370126625474, "grad_norm": 2.108398914337158, "learning_rate": 2.7840923669018603e-06, "loss": 0.8116, "step": 38650 }, { "epoch": 0.4710979488866952, "grad_norm": 2.2499961853027344, "learning_rate": 2.7837716484926238e-06, "loss": 0.9142, "step": 38655 }, { "epoch": 0.471158885110843, "grad_norm": 1.7598384618759155, "learning_rate": 2.7834509300833868e-06, "loss": 0.8379, "step": 38660 }, { "epoch": 0.4712198213349908, "grad_norm": 1.8898584842681885, "learning_rate": 2.7831302116741502e-06, "loss": 0.7917, "step": 38665 }, { "epoch": 0.4712807575591386, "grad_norm": 1.7308658361434937, "learning_rate": 2.7828094932649137e-06, "loss": 0.8667, "step": 38670 }, { "epoch": 0.4713416937832864, "grad_norm": 1.923668622970581, "learning_rate": 2.7824887748556767e-06, "loss": 0.8299, "step": 38675 }, { "epoch": 0.4714026300074342, "grad_norm": 2.2855470180511475, "learning_rate": 2.7821680564464405e-06, "loss": 0.8233, "step": 38680 }, { "epoch": 0.47146356623158203, "grad_norm": 2.4301416873931885, "learning_rate": 2.7818473380372036e-06, "loss": 0.8342, "step": 38685 }, { "epoch": 0.47152450245572985, "grad_norm": 1.9655171632766724, "learning_rate": 2.7815266196279666e-06, "loss": 0.8739, "step": 38690 }, { "epoch": 0.4715854386798776, "grad_norm": 2.5013809204101562, "learning_rate": 2.7812059012187304e-06, "loss": 0.8812, "step": 38695 }, { "epoch": 0.47164637490402544, "grad_norm": 1.664379596710205, "learning_rate": 2.7808851828094935e-06, "loss": 0.7472, "step": 38700 }, { "epoch": 0.47170731112817327, "grad_norm": 2.3581676483154297, "learning_rate": 2.7805644644002565e-06, "loss": 0.8224, "step": 38705 }, { "epoch": 0.47176824735232104, "grad_norm": 1.9567914009094238, "learning_rate": 2.7802437459910203e-06, "loss": 0.8101, "step": 38710 }, { "epoch": 0.47182918357646886, "grad_norm": 1.9745454788208008, "learning_rate": 2.7799230275817834e-06, "loss": 0.8666, "step": 38715 }, { "epoch": 0.4718901198006167, "grad_norm": 1.611992597579956, "learning_rate": 2.779602309172547e-06, "loss": 0.8433, "step": 38720 }, { "epoch": 0.4719510560247645, "grad_norm": 2.1175332069396973, "learning_rate": 2.7792815907633102e-06, "loss": 0.8964, "step": 38725 }, { "epoch": 0.47201199224891227, "grad_norm": 1.9769362211227417, "learning_rate": 2.7789608723540733e-06, "loss": 0.867, "step": 38730 }, { "epoch": 0.4720729284730601, "grad_norm": 1.7330650091171265, "learning_rate": 2.7786401539448367e-06, "loss": 0.8236, "step": 38735 }, { "epoch": 0.4721338646972079, "grad_norm": 2.0337798595428467, "learning_rate": 2.7783194355356e-06, "loss": 0.8344, "step": 38740 }, { "epoch": 0.4721948009213557, "grad_norm": 1.8298121690750122, "learning_rate": 2.7779987171263636e-06, "loss": 0.7713, "step": 38745 }, { "epoch": 0.4722557371455035, "grad_norm": 2.059154510498047, "learning_rate": 2.7776779987171266e-06, "loss": 0.8378, "step": 38750 }, { "epoch": 0.47231667336965133, "grad_norm": 1.8918156623840332, "learning_rate": 2.7773572803078896e-06, "loss": 0.8281, "step": 38755 }, { "epoch": 0.47237760959379915, "grad_norm": 1.8548734188079834, "learning_rate": 2.7770365618986535e-06, "loss": 0.8156, "step": 38760 }, { "epoch": 0.4724385458179469, "grad_norm": 2.415756940841675, "learning_rate": 2.7767158434894165e-06, "loss": 0.7813, "step": 38765 }, { "epoch": 0.47249948204209474, "grad_norm": 1.8576443195343018, "learning_rate": 2.7763951250801795e-06, "loss": 0.8686, "step": 38770 }, { "epoch": 0.47256041826624257, "grad_norm": 1.7223819494247437, "learning_rate": 2.7760744066709434e-06, "loss": 0.848, "step": 38775 }, { "epoch": 0.47262135449039033, "grad_norm": 1.8450565338134766, "learning_rate": 2.7757536882617064e-06, "loss": 0.8011, "step": 38780 }, { "epoch": 0.47268229071453816, "grad_norm": 1.7548422813415527, "learning_rate": 2.7754329698524694e-06, "loss": 0.8249, "step": 38785 }, { "epoch": 0.472743226938686, "grad_norm": 1.9338656663894653, "learning_rate": 2.7751122514432333e-06, "loss": 0.8446, "step": 38790 }, { "epoch": 0.4728041631628338, "grad_norm": 3.7817881107330322, "learning_rate": 2.7747915330339963e-06, "loss": 0.8877, "step": 38795 }, { "epoch": 0.47286509938698157, "grad_norm": 1.9836965799331665, "learning_rate": 2.7744708146247597e-06, "loss": 0.8156, "step": 38800 }, { "epoch": 0.4729260356111294, "grad_norm": 1.8764326572418213, "learning_rate": 2.774150096215523e-06, "loss": 0.9125, "step": 38805 }, { "epoch": 0.4729869718352772, "grad_norm": 1.7745354175567627, "learning_rate": 2.773829377806286e-06, "loss": 0.8695, "step": 38810 }, { "epoch": 0.473047908059425, "grad_norm": 2.0929884910583496, "learning_rate": 2.7735086593970496e-06, "loss": 0.8158, "step": 38815 }, { "epoch": 0.4731088442835728, "grad_norm": 2.2254695892333984, "learning_rate": 2.773187940987813e-06, "loss": 0.8351, "step": 38820 }, { "epoch": 0.4731697805077206, "grad_norm": 1.9261929988861084, "learning_rate": 2.7728672225785765e-06, "loss": 0.8723, "step": 38825 }, { "epoch": 0.47323071673186845, "grad_norm": 2.6825249195098877, "learning_rate": 2.7725465041693395e-06, "loss": 0.8748, "step": 38830 }, { "epoch": 0.4732916529560162, "grad_norm": 2.0376646518707275, "learning_rate": 2.7722257857601025e-06, "loss": 0.8175, "step": 38835 }, { "epoch": 0.47335258918016404, "grad_norm": 1.9363656044006348, "learning_rate": 2.7719050673508664e-06, "loss": 0.7779, "step": 38840 }, { "epoch": 0.47341352540431186, "grad_norm": 2.0647976398468018, "learning_rate": 2.7715843489416294e-06, "loss": 0.8156, "step": 38845 }, { "epoch": 0.47347446162845963, "grad_norm": 1.850909948348999, "learning_rate": 2.7712636305323924e-06, "loss": 0.7997, "step": 38850 }, { "epoch": 0.47353539785260745, "grad_norm": 2.392198324203491, "learning_rate": 2.7709429121231563e-06, "loss": 0.9, "step": 38855 }, { "epoch": 0.4735963340767553, "grad_norm": 2.173999547958374, "learning_rate": 2.7706221937139193e-06, "loss": 0.7915, "step": 38860 }, { "epoch": 0.4736572703009031, "grad_norm": 1.8552298545837402, "learning_rate": 2.7703014753046823e-06, "loss": 0.783, "step": 38865 }, { "epoch": 0.47371820652505087, "grad_norm": 2.1445469856262207, "learning_rate": 2.769980756895446e-06, "loss": 0.835, "step": 38870 }, { "epoch": 0.4737791427491987, "grad_norm": 2.065477132797241, "learning_rate": 2.7696600384862092e-06, "loss": 0.8173, "step": 38875 }, { "epoch": 0.4738400789733465, "grad_norm": 1.8648896217346191, "learning_rate": 2.7693393200769727e-06, "loss": 0.8414, "step": 38880 }, { "epoch": 0.4739010151974943, "grad_norm": 2.5041444301605225, "learning_rate": 2.769018601667736e-06, "loss": 0.8376, "step": 38885 }, { "epoch": 0.4739619514216421, "grad_norm": 1.7509856224060059, "learning_rate": 2.768697883258499e-06, "loss": 0.776, "step": 38890 }, { "epoch": 0.4740228876457899, "grad_norm": 1.8872292041778564, "learning_rate": 2.7683771648492626e-06, "loss": 0.8524, "step": 38895 }, { "epoch": 0.47408382386993775, "grad_norm": 1.8833998441696167, "learning_rate": 2.768056446440026e-06, "loss": 0.9177, "step": 38900 }, { "epoch": 0.4741447600940855, "grad_norm": 2.2649428844451904, "learning_rate": 2.7677357280307894e-06, "loss": 0.8721, "step": 38905 }, { "epoch": 0.47420569631823334, "grad_norm": 1.8518321514129639, "learning_rate": 2.7674150096215524e-06, "loss": 0.8185, "step": 38910 }, { "epoch": 0.47426663254238116, "grad_norm": 1.925005555152893, "learning_rate": 2.7670942912123155e-06, "loss": 0.8627, "step": 38915 }, { "epoch": 0.47432756876652893, "grad_norm": 1.7730379104614258, "learning_rate": 2.7667735728030793e-06, "loss": 0.8474, "step": 38920 }, { "epoch": 0.47438850499067675, "grad_norm": 2.2351160049438477, "learning_rate": 2.7664528543938423e-06, "loss": 0.8202, "step": 38925 }, { "epoch": 0.4744494412148246, "grad_norm": 2.0200393199920654, "learning_rate": 2.7661321359846054e-06, "loss": 0.8317, "step": 38930 }, { "epoch": 0.4745103774389724, "grad_norm": 1.9991365671157837, "learning_rate": 2.7658114175753692e-06, "loss": 0.8946, "step": 38935 }, { "epoch": 0.47457131366312016, "grad_norm": 2.0208945274353027, "learning_rate": 2.7654906991661322e-06, "loss": 0.8372, "step": 38940 }, { "epoch": 0.474632249887268, "grad_norm": 2.177686929702759, "learning_rate": 2.7651699807568953e-06, "loss": 0.9275, "step": 38945 }, { "epoch": 0.4746931861114158, "grad_norm": 1.8940067291259766, "learning_rate": 2.764849262347659e-06, "loss": 0.8213, "step": 38950 }, { "epoch": 0.4747541223355636, "grad_norm": 1.8372079133987427, "learning_rate": 2.764528543938422e-06, "loss": 0.8727, "step": 38955 }, { "epoch": 0.4748150585597114, "grad_norm": 2.180692434310913, "learning_rate": 2.7642078255291856e-06, "loss": 0.8667, "step": 38960 }, { "epoch": 0.4748759947838592, "grad_norm": 1.9104394912719727, "learning_rate": 2.763887107119949e-06, "loss": 0.8312, "step": 38965 }, { "epoch": 0.47493693100800705, "grad_norm": 2.031528949737549, "learning_rate": 2.7635663887107125e-06, "loss": 0.8526, "step": 38970 }, { "epoch": 0.4749978672321548, "grad_norm": 1.6983616352081299, "learning_rate": 2.7632456703014755e-06, "loss": 0.8367, "step": 38975 }, { "epoch": 0.47505880345630264, "grad_norm": 1.8717323541641235, "learning_rate": 2.762924951892239e-06, "loss": 0.7553, "step": 38980 }, { "epoch": 0.47511973968045046, "grad_norm": 2.165498971939087, "learning_rate": 2.7626042334830024e-06, "loss": 0.8075, "step": 38985 }, { "epoch": 0.4751806759045982, "grad_norm": 1.5667095184326172, "learning_rate": 2.7622835150737654e-06, "loss": 0.879, "step": 38990 }, { "epoch": 0.47524161212874605, "grad_norm": 1.9732744693756104, "learning_rate": 2.7619627966645292e-06, "loss": 0.9006, "step": 38995 }, { "epoch": 0.47530254835289387, "grad_norm": 2.033966541290283, "learning_rate": 2.7616420782552923e-06, "loss": 0.8386, "step": 39000 }, { "epoch": 0.4753634845770417, "grad_norm": 1.6681526899337769, "learning_rate": 2.7613213598460553e-06, "loss": 0.7742, "step": 39005 }, { "epoch": 0.47542442080118946, "grad_norm": 2.2113006114959717, "learning_rate": 2.7610006414368183e-06, "loss": 0.8461, "step": 39010 }, { "epoch": 0.4754853570253373, "grad_norm": 1.7945868968963623, "learning_rate": 2.760679923027582e-06, "loss": 0.8135, "step": 39015 }, { "epoch": 0.4755462932494851, "grad_norm": 2.124056100845337, "learning_rate": 2.760359204618345e-06, "loss": 0.7943, "step": 39020 }, { "epoch": 0.4756072294736329, "grad_norm": 1.7793807983398438, "learning_rate": 2.7600384862091086e-06, "loss": 0.8307, "step": 39025 }, { "epoch": 0.4756681656977807, "grad_norm": 1.8872288465499878, "learning_rate": 2.759717767799872e-06, "loss": 0.8203, "step": 39030 }, { "epoch": 0.4757291019219285, "grad_norm": 1.9543726444244385, "learning_rate": 2.759397049390635e-06, "loss": 0.8834, "step": 39035 }, { "epoch": 0.47579003814607634, "grad_norm": 1.8854498863220215, "learning_rate": 2.7590763309813985e-06, "loss": 0.8134, "step": 39040 }, { "epoch": 0.4758509743702241, "grad_norm": 2.2365896701812744, "learning_rate": 2.758755612572162e-06, "loss": 0.8298, "step": 39045 }, { "epoch": 0.47591191059437193, "grad_norm": 1.771995186805725, "learning_rate": 2.7584348941629254e-06, "loss": 0.8231, "step": 39050 }, { "epoch": 0.47597284681851976, "grad_norm": 1.8350762128829956, "learning_rate": 2.7581141757536884e-06, "loss": 0.8158, "step": 39055 }, { "epoch": 0.4760337830426675, "grad_norm": 1.7586030960083008, "learning_rate": 2.757793457344452e-06, "loss": 0.8547, "step": 39060 }, { "epoch": 0.47609471926681535, "grad_norm": 4.154651165008545, "learning_rate": 2.7574727389352153e-06, "loss": 0.8496, "step": 39065 }, { "epoch": 0.47615565549096317, "grad_norm": 2.090564727783203, "learning_rate": 2.7571520205259783e-06, "loss": 0.8774, "step": 39070 }, { "epoch": 0.47621659171511094, "grad_norm": 1.8874179124832153, "learning_rate": 2.756831302116742e-06, "loss": 0.7833, "step": 39075 }, { "epoch": 0.47627752793925876, "grad_norm": 1.8611547946929932, "learning_rate": 2.756510583707505e-06, "loss": 0.7788, "step": 39080 }, { "epoch": 0.4763384641634066, "grad_norm": 2.314127206802368, "learning_rate": 2.756189865298268e-06, "loss": 0.8735, "step": 39085 }, { "epoch": 0.4763994003875544, "grad_norm": 2.083698034286499, "learning_rate": 2.7558691468890312e-06, "loss": 0.8108, "step": 39090 }, { "epoch": 0.4764603366117022, "grad_norm": 1.7471195459365845, "learning_rate": 2.755548428479795e-06, "loss": 0.8381, "step": 39095 }, { "epoch": 0.47652127283585, "grad_norm": 2.1629390716552734, "learning_rate": 2.755227710070558e-06, "loss": 0.918, "step": 39100 }, { "epoch": 0.4765822090599978, "grad_norm": 2.167013645172119, "learning_rate": 2.7549069916613215e-06, "loss": 0.922, "step": 39105 }, { "epoch": 0.4766431452841456, "grad_norm": 2.0315001010894775, "learning_rate": 2.754586273252085e-06, "loss": 0.828, "step": 39110 }, { "epoch": 0.4767040815082934, "grad_norm": 1.825785756111145, "learning_rate": 2.754265554842848e-06, "loss": 0.8748, "step": 39115 }, { "epoch": 0.47676501773244123, "grad_norm": 2.3092379570007324, "learning_rate": 2.7539448364336114e-06, "loss": 0.8624, "step": 39120 }, { "epoch": 0.47682595395658905, "grad_norm": 1.6133193969726562, "learning_rate": 2.753624118024375e-06, "loss": 0.791, "step": 39125 }, { "epoch": 0.4768868901807368, "grad_norm": 1.967750072479248, "learning_rate": 2.7533033996151383e-06, "loss": 0.8079, "step": 39130 }, { "epoch": 0.47694782640488464, "grad_norm": 1.9724162817001343, "learning_rate": 2.7529826812059013e-06, "loss": 0.8731, "step": 39135 }, { "epoch": 0.47700876262903247, "grad_norm": 2.1187896728515625, "learning_rate": 2.7526619627966648e-06, "loss": 0.7916, "step": 39140 }, { "epoch": 0.47706969885318024, "grad_norm": 2.474881649017334, "learning_rate": 2.7523412443874282e-06, "loss": 0.8279, "step": 39145 }, { "epoch": 0.47713063507732806, "grad_norm": 1.8109949827194214, "learning_rate": 2.7520205259781912e-06, "loss": 0.8101, "step": 39150 }, { "epoch": 0.4771915713014759, "grad_norm": 1.8517675399780273, "learning_rate": 2.751699807568955e-06, "loss": 0.886, "step": 39155 }, { "epoch": 0.4772525075256237, "grad_norm": 1.9041063785552979, "learning_rate": 2.751379089159718e-06, "loss": 0.8824, "step": 39160 }, { "epoch": 0.47731344374977147, "grad_norm": 1.8653192520141602, "learning_rate": 2.751058370750481e-06, "loss": 0.8515, "step": 39165 }, { "epoch": 0.4773743799739193, "grad_norm": 1.878029704093933, "learning_rate": 2.750737652341244e-06, "loss": 0.8939, "step": 39170 }, { "epoch": 0.4774353161980671, "grad_norm": 2.3814210891723633, "learning_rate": 2.750416933932008e-06, "loss": 0.8119, "step": 39175 }, { "epoch": 0.4774962524222149, "grad_norm": 2.084721088409424, "learning_rate": 2.750096215522771e-06, "loss": 0.7944, "step": 39180 }, { "epoch": 0.4775571886463627, "grad_norm": 1.932655930519104, "learning_rate": 2.7497754971135345e-06, "loss": 0.776, "step": 39185 }, { "epoch": 0.47761812487051053, "grad_norm": 2.6405982971191406, "learning_rate": 2.749454778704298e-06, "loss": 0.8844, "step": 39190 }, { "epoch": 0.47767906109465835, "grad_norm": 2.34248685836792, "learning_rate": 2.7491340602950613e-06, "loss": 0.7928, "step": 39195 }, { "epoch": 0.4777399973188061, "grad_norm": 1.8342219591140747, "learning_rate": 2.7488133418858244e-06, "loss": 0.8514, "step": 39200 }, { "epoch": 0.47780093354295394, "grad_norm": 1.8640844821929932, "learning_rate": 2.748492623476588e-06, "loss": 0.7761, "step": 39205 }, { "epoch": 0.47786186976710177, "grad_norm": 1.6404578685760498, "learning_rate": 2.7481719050673512e-06, "loss": 0.8661, "step": 39210 }, { "epoch": 0.47792280599124953, "grad_norm": 2.263848066329956, "learning_rate": 2.7478511866581143e-06, "loss": 0.8202, "step": 39215 }, { "epoch": 0.47798374221539736, "grad_norm": 1.910240888595581, "learning_rate": 2.747530468248878e-06, "loss": 0.8894, "step": 39220 }, { "epoch": 0.4780446784395452, "grad_norm": 1.9757641553878784, "learning_rate": 2.747209749839641e-06, "loss": 0.8502, "step": 39225 }, { "epoch": 0.478105614663693, "grad_norm": 1.8755841255187988, "learning_rate": 2.746889031430404e-06, "loss": 0.8376, "step": 39230 }, { "epoch": 0.47816655088784077, "grad_norm": 1.9308675527572632, "learning_rate": 2.746568313021168e-06, "loss": 0.8179, "step": 39235 }, { "epoch": 0.4782274871119886, "grad_norm": 2.0010006427764893, "learning_rate": 2.746247594611931e-06, "loss": 0.8568, "step": 39240 }, { "epoch": 0.4782884233361364, "grad_norm": 1.8294882774353027, "learning_rate": 2.745926876202694e-06, "loss": 0.7894, "step": 39245 }, { "epoch": 0.4783493595602842, "grad_norm": 2.101757764816284, "learning_rate": 2.7456061577934575e-06, "loss": 0.8455, "step": 39250 }, { "epoch": 0.478410295784432, "grad_norm": 1.8548520803451538, "learning_rate": 2.745285439384221e-06, "loss": 0.879, "step": 39255 }, { "epoch": 0.4784712320085798, "grad_norm": 1.5549218654632568, "learning_rate": 2.744964720974984e-06, "loss": 0.8064, "step": 39260 }, { "epoch": 0.47853216823272765, "grad_norm": 2.250030517578125, "learning_rate": 2.7446440025657474e-06, "loss": 0.7969, "step": 39265 }, { "epoch": 0.4785931044568754, "grad_norm": 1.872937560081482, "learning_rate": 2.744323284156511e-06, "loss": 0.8335, "step": 39270 }, { "epoch": 0.47865404068102324, "grad_norm": 2.0420403480529785, "learning_rate": 2.7440025657472743e-06, "loss": 0.9552, "step": 39275 }, { "epoch": 0.47871497690517106, "grad_norm": 2.488165855407715, "learning_rate": 2.7436818473380373e-06, "loss": 0.8959, "step": 39280 }, { "epoch": 0.47877591312931883, "grad_norm": 2.1236836910247803, "learning_rate": 2.7433611289288007e-06, "loss": 0.874, "step": 39285 }, { "epoch": 0.47883684935346665, "grad_norm": 2.1307504177093506, "learning_rate": 2.743040410519564e-06, "loss": 0.8417, "step": 39290 }, { "epoch": 0.4788977855776145, "grad_norm": 2.0503153800964355, "learning_rate": 2.742719692110327e-06, "loss": 0.9018, "step": 39295 }, { "epoch": 0.4789587218017623, "grad_norm": 1.9368808269500732, "learning_rate": 2.742398973701091e-06, "loss": 0.8049, "step": 39300 }, { "epoch": 0.47901965802591007, "grad_norm": 2.1246509552001953, "learning_rate": 2.742078255291854e-06, "loss": 0.8298, "step": 39305 }, { "epoch": 0.4790805942500579, "grad_norm": 1.8513116836547852, "learning_rate": 2.741757536882617e-06, "loss": 0.8962, "step": 39310 }, { "epoch": 0.4791415304742057, "grad_norm": 1.8022007942199707, "learning_rate": 2.741436818473381e-06, "loss": 0.8158, "step": 39315 }, { "epoch": 0.4792024666983535, "grad_norm": 2.030432939529419, "learning_rate": 2.741116100064144e-06, "loss": 0.808, "step": 39320 }, { "epoch": 0.4792634029225013, "grad_norm": 2.4641871452331543, "learning_rate": 2.740795381654907e-06, "loss": 0.7286, "step": 39325 }, { "epoch": 0.4793243391466491, "grad_norm": 2.145167589187622, "learning_rate": 2.740474663245671e-06, "loss": 0.8833, "step": 39330 }, { "epoch": 0.47938527537079695, "grad_norm": 1.8034074306488037, "learning_rate": 2.740153944836434e-06, "loss": 0.7635, "step": 39335 }, { "epoch": 0.4794462115949447, "grad_norm": 1.7695449590682983, "learning_rate": 2.739833226427197e-06, "loss": 0.7861, "step": 39340 }, { "epoch": 0.47950714781909254, "grad_norm": 1.746430516242981, "learning_rate": 2.7395125080179603e-06, "loss": 0.8801, "step": 39345 }, { "epoch": 0.47956808404324036, "grad_norm": 2.1251819133758545, "learning_rate": 2.7391917896087238e-06, "loss": 0.8471, "step": 39350 }, { "epoch": 0.47962902026738813, "grad_norm": 1.9433739185333252, "learning_rate": 2.738871071199487e-06, "loss": 0.8109, "step": 39355 }, { "epoch": 0.47968995649153595, "grad_norm": 1.8268048763275146, "learning_rate": 2.7385503527902502e-06, "loss": 0.8849, "step": 39360 }, { "epoch": 0.4797508927156838, "grad_norm": 2.069958448410034, "learning_rate": 2.7382296343810137e-06, "loss": 0.8201, "step": 39365 }, { "epoch": 0.4798118289398316, "grad_norm": 1.7508922815322876, "learning_rate": 2.737908915971777e-06, "loss": 0.8324, "step": 39370 }, { "epoch": 0.47987276516397936, "grad_norm": 1.8866021633148193, "learning_rate": 2.73758819756254e-06, "loss": 0.8391, "step": 39375 }, { "epoch": 0.4799337013881272, "grad_norm": 1.7317880392074585, "learning_rate": 2.737267479153304e-06, "loss": 0.8583, "step": 39380 }, { "epoch": 0.479994637612275, "grad_norm": 2.0812089443206787, "learning_rate": 2.736946760744067e-06, "loss": 0.7923, "step": 39385 }, { "epoch": 0.4800555738364228, "grad_norm": 2.0099315643310547, "learning_rate": 2.73662604233483e-06, "loss": 0.8845, "step": 39390 }, { "epoch": 0.4801165100605706, "grad_norm": 2.302225351333618, "learning_rate": 2.736305323925594e-06, "loss": 0.8595, "step": 39395 }, { "epoch": 0.4801774462847184, "grad_norm": 1.9612908363342285, "learning_rate": 2.735984605516357e-06, "loss": 0.878, "step": 39400 }, { "epoch": 0.48023838250886625, "grad_norm": 1.9083433151245117, "learning_rate": 2.73566388710712e-06, "loss": 0.8839, "step": 39405 }, { "epoch": 0.480299318733014, "grad_norm": 2.1354682445526123, "learning_rate": 2.7353431686978838e-06, "loss": 0.9279, "step": 39410 }, { "epoch": 0.48036025495716184, "grad_norm": 1.809194564819336, "learning_rate": 2.7350224502886468e-06, "loss": 0.8286, "step": 39415 }, { "epoch": 0.48042119118130966, "grad_norm": 1.7704943418502808, "learning_rate": 2.73470173187941e-06, "loss": 0.782, "step": 39420 }, { "epoch": 0.4804821274054574, "grad_norm": 1.9413838386535645, "learning_rate": 2.7343810134701732e-06, "loss": 0.8721, "step": 39425 }, { "epoch": 0.48054306362960525, "grad_norm": 1.8388118743896484, "learning_rate": 2.7340602950609367e-06, "loss": 0.8135, "step": 39430 }, { "epoch": 0.48060399985375307, "grad_norm": 2.7557148933410645, "learning_rate": 2.7337395766517e-06, "loss": 0.8635, "step": 39435 }, { "epoch": 0.4806649360779009, "grad_norm": 2.1033451557159424, "learning_rate": 2.733418858242463e-06, "loss": 0.8518, "step": 39440 }, { "epoch": 0.48072587230204866, "grad_norm": 1.8687852621078491, "learning_rate": 2.733098139833227e-06, "loss": 0.8826, "step": 39445 }, { "epoch": 0.4807868085261965, "grad_norm": 1.9677515029907227, "learning_rate": 2.73277742142399e-06, "loss": 0.8287, "step": 39450 }, { "epoch": 0.4808477447503443, "grad_norm": 2.3024070262908936, "learning_rate": 2.732456703014753e-06, "loss": 0.7895, "step": 39455 }, { "epoch": 0.4809086809744921, "grad_norm": 1.8286311626434326, "learning_rate": 2.732135984605517e-06, "loss": 0.9049, "step": 39460 }, { "epoch": 0.4809696171986399, "grad_norm": 1.9905487298965454, "learning_rate": 2.73181526619628e-06, "loss": 0.8704, "step": 39465 }, { "epoch": 0.4810305534227877, "grad_norm": 2.5021114349365234, "learning_rate": 2.731494547787043e-06, "loss": 0.8476, "step": 39470 }, { "epoch": 0.48109148964693554, "grad_norm": 1.8396703004837036, "learning_rate": 2.731173829377807e-06, "loss": 0.7784, "step": 39475 }, { "epoch": 0.4811524258710833, "grad_norm": 1.6033498048782349, "learning_rate": 2.73085311096857e-06, "loss": 0.8428, "step": 39480 }, { "epoch": 0.48121336209523113, "grad_norm": 1.9762881994247437, "learning_rate": 2.730532392559333e-06, "loss": 0.8414, "step": 39485 }, { "epoch": 0.48127429831937896, "grad_norm": 1.8492966890335083, "learning_rate": 2.7302116741500967e-06, "loss": 0.8413, "step": 39490 }, { "epoch": 0.4813352345435267, "grad_norm": 1.8272703886032104, "learning_rate": 2.7298909557408597e-06, "loss": 0.7701, "step": 39495 }, { "epoch": 0.48139617076767455, "grad_norm": 1.8469287157058716, "learning_rate": 2.729570237331623e-06, "loss": 0.8347, "step": 39500 }, { "epoch": 0.48145710699182237, "grad_norm": 1.8861093521118164, "learning_rate": 2.729249518922386e-06, "loss": 0.8175, "step": 39505 }, { "epoch": 0.4815180432159702, "grad_norm": 1.9122850894927979, "learning_rate": 2.7289288005131496e-06, "loss": 0.7551, "step": 39510 }, { "epoch": 0.48157897944011796, "grad_norm": 1.8006339073181152, "learning_rate": 2.728608082103913e-06, "loss": 0.9076, "step": 39515 }, { "epoch": 0.4816399156642658, "grad_norm": 1.7073346376419067, "learning_rate": 2.728287363694676e-06, "loss": 0.854, "step": 39520 }, { "epoch": 0.4817008518884136, "grad_norm": 2.0234034061431885, "learning_rate": 2.72796664528544e-06, "loss": 0.9229, "step": 39525 }, { "epoch": 0.4817617881125614, "grad_norm": 2.0040981769561768, "learning_rate": 2.727645926876203e-06, "loss": 0.8347, "step": 39530 }, { "epoch": 0.4818227243367092, "grad_norm": 2.9726643562316895, "learning_rate": 2.727325208466966e-06, "loss": 0.8594, "step": 39535 }, { "epoch": 0.481883660560857, "grad_norm": 1.8259172439575195, "learning_rate": 2.72700449005773e-06, "loss": 0.8092, "step": 39540 }, { "epoch": 0.4819445967850048, "grad_norm": 1.9201158285140991, "learning_rate": 2.726683771648493e-06, "loss": 0.7612, "step": 39545 }, { "epoch": 0.4820055330091526, "grad_norm": 1.7778079509735107, "learning_rate": 2.726363053239256e-06, "loss": 0.8693, "step": 39550 }, { "epoch": 0.48206646923330043, "grad_norm": 2.0501599311828613, "learning_rate": 2.7260423348300197e-06, "loss": 0.8411, "step": 39555 }, { "epoch": 0.48212740545744825, "grad_norm": 1.7038973569869995, "learning_rate": 2.7257216164207827e-06, "loss": 0.8577, "step": 39560 }, { "epoch": 0.482188341681596, "grad_norm": 2.0949442386627197, "learning_rate": 2.7254008980115458e-06, "loss": 0.7662, "step": 39565 }, { "epoch": 0.48224927790574385, "grad_norm": 2.559785842895508, "learning_rate": 2.7250801796023096e-06, "loss": 0.8727, "step": 39570 }, { "epoch": 0.48231021412989167, "grad_norm": 1.8892408609390259, "learning_rate": 2.7247594611930726e-06, "loss": 0.7613, "step": 39575 }, { "epoch": 0.48237115035403944, "grad_norm": 1.8809707164764404, "learning_rate": 2.724438742783836e-06, "loss": 0.8663, "step": 39580 }, { "epoch": 0.48243208657818726, "grad_norm": 1.4579174518585205, "learning_rate": 2.7241180243745995e-06, "loss": 0.8016, "step": 39585 }, { "epoch": 0.4824930228023351, "grad_norm": 2.024019718170166, "learning_rate": 2.7237973059653625e-06, "loss": 0.8592, "step": 39590 }, { "epoch": 0.4825539590264829, "grad_norm": 1.8179926872253418, "learning_rate": 2.723476587556126e-06, "loss": 0.8553, "step": 39595 }, { "epoch": 0.48261489525063067, "grad_norm": 1.8379548788070679, "learning_rate": 2.723155869146889e-06, "loss": 0.8554, "step": 39600 }, { "epoch": 0.4826758314747785, "grad_norm": 2.2792394161224365, "learning_rate": 2.722835150737653e-06, "loss": 0.8651, "step": 39605 }, { "epoch": 0.4827367676989263, "grad_norm": 1.846904993057251, "learning_rate": 2.722514432328416e-06, "loss": 0.9073, "step": 39610 }, { "epoch": 0.4827977039230741, "grad_norm": 1.8541964292526245, "learning_rate": 2.722193713919179e-06, "loss": 0.8639, "step": 39615 }, { "epoch": 0.4828586401472219, "grad_norm": 1.9796948432922363, "learning_rate": 2.7218729955099428e-06, "loss": 0.8634, "step": 39620 }, { "epoch": 0.48291957637136973, "grad_norm": 1.8343757390975952, "learning_rate": 2.7215522771007058e-06, "loss": 0.8061, "step": 39625 }, { "epoch": 0.48298051259551755, "grad_norm": 1.9993152618408203, "learning_rate": 2.721231558691469e-06, "loss": 0.8304, "step": 39630 }, { "epoch": 0.4830414488196653, "grad_norm": 1.9793781042099, "learning_rate": 2.7209108402822327e-06, "loss": 0.9177, "step": 39635 }, { "epoch": 0.48310238504381314, "grad_norm": 1.749233365058899, "learning_rate": 2.7205901218729957e-06, "loss": 0.8232, "step": 39640 }, { "epoch": 0.48316332126796097, "grad_norm": 2.0177760124206543, "learning_rate": 2.7202694034637587e-06, "loss": 0.8452, "step": 39645 }, { "epoch": 0.48322425749210873, "grad_norm": 1.8440595865249634, "learning_rate": 2.7199486850545226e-06, "loss": 0.8034, "step": 39650 }, { "epoch": 0.48328519371625656, "grad_norm": 1.8125039339065552, "learning_rate": 2.7196279666452856e-06, "loss": 0.8505, "step": 39655 }, { "epoch": 0.4833461299404044, "grad_norm": 2.4108331203460693, "learning_rate": 2.719307248236049e-06, "loss": 0.8496, "step": 39660 }, { "epoch": 0.4834070661645522, "grad_norm": 2.089768886566162, "learning_rate": 2.7189865298268124e-06, "loss": 0.7744, "step": 39665 }, { "epoch": 0.48346800238869997, "grad_norm": 1.9496071338653564, "learning_rate": 2.718665811417576e-06, "loss": 0.8101, "step": 39670 }, { "epoch": 0.4835289386128478, "grad_norm": 2.2619521617889404, "learning_rate": 2.718345093008339e-06, "loss": 0.8829, "step": 39675 }, { "epoch": 0.4835898748369956, "grad_norm": 1.9869866371154785, "learning_rate": 2.718024374599102e-06, "loss": 0.8118, "step": 39680 }, { "epoch": 0.4836508110611434, "grad_norm": 2.2781853675842285, "learning_rate": 2.7177036561898658e-06, "loss": 0.8424, "step": 39685 }, { "epoch": 0.4837117472852912, "grad_norm": 2.5265307426452637, "learning_rate": 2.717382937780629e-06, "loss": 0.8919, "step": 39690 }, { "epoch": 0.483772683509439, "grad_norm": 1.6894559860229492, "learning_rate": 2.717062219371392e-06, "loss": 0.8235, "step": 39695 }, { "epoch": 0.48383361973358685, "grad_norm": 1.7862502336502075, "learning_rate": 2.7167415009621557e-06, "loss": 0.8791, "step": 39700 }, { "epoch": 0.4838945559577346, "grad_norm": 1.9009363651275635, "learning_rate": 2.7164207825529187e-06, "loss": 0.8163, "step": 39705 }, { "epoch": 0.48395549218188244, "grad_norm": 2.2631287574768066, "learning_rate": 2.7161000641436817e-06, "loss": 0.9133, "step": 39710 }, { "epoch": 0.48401642840603026, "grad_norm": 2.1330106258392334, "learning_rate": 2.7157793457344456e-06, "loss": 0.8952, "step": 39715 }, { "epoch": 0.48407736463017803, "grad_norm": 1.9706426858901978, "learning_rate": 2.7154586273252086e-06, "loss": 0.7984, "step": 39720 }, { "epoch": 0.48413830085432585, "grad_norm": 2.7070350646972656, "learning_rate": 2.715137908915972e-06, "loss": 0.7751, "step": 39725 }, { "epoch": 0.4841992370784737, "grad_norm": 1.8829702138900757, "learning_rate": 2.7148171905067355e-06, "loss": 0.7706, "step": 39730 }, { "epoch": 0.4842601733026215, "grad_norm": 1.6808497905731201, "learning_rate": 2.7144964720974985e-06, "loss": 0.8687, "step": 39735 }, { "epoch": 0.48432110952676927, "grad_norm": 2.1861534118652344, "learning_rate": 2.714175753688262e-06, "loss": 0.8508, "step": 39740 }, { "epoch": 0.4843820457509171, "grad_norm": 1.8766517639160156, "learning_rate": 2.7138550352790254e-06, "loss": 0.7913, "step": 39745 }, { "epoch": 0.4844429819750649, "grad_norm": 1.651112675666809, "learning_rate": 2.713534316869789e-06, "loss": 0.8762, "step": 39750 }, { "epoch": 0.4845039181992127, "grad_norm": 1.8384459018707275, "learning_rate": 2.713213598460552e-06, "loss": 0.7985, "step": 39755 }, { "epoch": 0.4845648544233605, "grad_norm": 2.5779318809509277, "learning_rate": 2.712892880051315e-06, "loss": 0.8333, "step": 39760 }, { "epoch": 0.4846257906475083, "grad_norm": 2.021226406097412, "learning_rate": 2.7125721616420787e-06, "loss": 0.8095, "step": 39765 }, { "epoch": 0.48468672687165615, "grad_norm": 1.859517216682434, "learning_rate": 2.7122514432328417e-06, "loss": 0.8609, "step": 39770 }, { "epoch": 0.4847476630958039, "grad_norm": 1.7665244340896606, "learning_rate": 2.7119307248236047e-06, "loss": 0.7519, "step": 39775 }, { "epoch": 0.48480859931995174, "grad_norm": 2.215506076812744, "learning_rate": 2.7116100064143686e-06, "loss": 0.8804, "step": 39780 }, { "epoch": 0.48486953554409956, "grad_norm": 2.055995225906372, "learning_rate": 2.7112892880051316e-06, "loss": 0.8103, "step": 39785 }, { "epoch": 0.48493047176824733, "grad_norm": 1.9665789604187012, "learning_rate": 2.7109685695958946e-06, "loss": 0.7801, "step": 39790 }, { "epoch": 0.48499140799239515, "grad_norm": 2.094555616378784, "learning_rate": 2.7106478511866585e-06, "loss": 0.8183, "step": 39795 }, { "epoch": 0.485052344216543, "grad_norm": 2.3880205154418945, "learning_rate": 2.7103271327774215e-06, "loss": 0.8877, "step": 39800 }, { "epoch": 0.4851132804406908, "grad_norm": 1.7693787813186646, "learning_rate": 2.710006414368185e-06, "loss": 0.7915, "step": 39805 }, { "epoch": 0.48517421666483856, "grad_norm": 1.8488836288452148, "learning_rate": 2.7096856959589484e-06, "loss": 0.8466, "step": 39810 }, { "epoch": 0.4852351528889864, "grad_norm": 2.3557591438293457, "learning_rate": 2.7093649775497114e-06, "loss": 0.8621, "step": 39815 }, { "epoch": 0.4852960891131342, "grad_norm": 2.1341772079467773, "learning_rate": 2.709044259140475e-06, "loss": 0.871, "step": 39820 }, { "epoch": 0.485357025337282, "grad_norm": 1.779307246208191, "learning_rate": 2.7087235407312383e-06, "loss": 0.7942, "step": 39825 }, { "epoch": 0.4854179615614298, "grad_norm": 2.1685192584991455, "learning_rate": 2.7084028223220017e-06, "loss": 0.842, "step": 39830 }, { "epoch": 0.4854788977855776, "grad_norm": 1.9101985692977905, "learning_rate": 2.7080821039127648e-06, "loss": 0.853, "step": 39835 }, { "epoch": 0.48553983400972545, "grad_norm": 1.8172515630722046, "learning_rate": 2.7077613855035278e-06, "loss": 0.8525, "step": 39840 }, { "epoch": 0.4856007702338732, "grad_norm": 3.11244797706604, "learning_rate": 2.7074406670942916e-06, "loss": 0.867, "step": 39845 }, { "epoch": 0.48566170645802104, "grad_norm": 1.9148598909378052, "learning_rate": 2.7071199486850547e-06, "loss": 0.9128, "step": 39850 }, { "epoch": 0.48572264268216886, "grad_norm": 1.9669628143310547, "learning_rate": 2.7067992302758177e-06, "loss": 0.8573, "step": 39855 }, { "epoch": 0.4857835789063166, "grad_norm": 1.6164332628250122, "learning_rate": 2.7064785118665815e-06, "loss": 0.8235, "step": 39860 }, { "epoch": 0.48584451513046445, "grad_norm": 2.2662551403045654, "learning_rate": 2.7061577934573446e-06, "loss": 0.82, "step": 39865 }, { "epoch": 0.48590545135461227, "grad_norm": 1.7786537408828735, "learning_rate": 2.7058370750481076e-06, "loss": 0.7834, "step": 39870 }, { "epoch": 0.4859663875787601, "grad_norm": 1.7913862466812134, "learning_rate": 2.7055163566388714e-06, "loss": 0.8082, "step": 39875 }, { "epoch": 0.48602732380290786, "grad_norm": 1.7849934101104736, "learning_rate": 2.7051956382296345e-06, "loss": 0.809, "step": 39880 }, { "epoch": 0.4860882600270557, "grad_norm": 1.9023150205612183, "learning_rate": 2.704874919820398e-06, "loss": 0.8837, "step": 39885 }, { "epoch": 0.4861491962512035, "grad_norm": 1.8463597297668457, "learning_rate": 2.7045542014111613e-06, "loss": 0.8336, "step": 39890 }, { "epoch": 0.4862101324753513, "grad_norm": 2.3680179119110107, "learning_rate": 2.7042334830019248e-06, "loss": 0.8883, "step": 39895 }, { "epoch": 0.4862710686994991, "grad_norm": 2.0932469367980957, "learning_rate": 2.703912764592688e-06, "loss": 0.8717, "step": 39900 }, { "epoch": 0.4863320049236469, "grad_norm": 1.7975242137908936, "learning_rate": 2.7035920461834512e-06, "loss": 0.833, "step": 39905 }, { "epoch": 0.48639294114779474, "grad_norm": 1.886605978012085, "learning_rate": 2.7032713277742147e-06, "loss": 0.8955, "step": 39910 }, { "epoch": 0.4864538773719425, "grad_norm": 1.807209849357605, "learning_rate": 2.7029506093649777e-06, "loss": 0.8937, "step": 39915 }, { "epoch": 0.48651481359609033, "grad_norm": 1.9178649187088013, "learning_rate": 2.7026298909557416e-06, "loss": 0.762, "step": 39920 }, { "epoch": 0.48657574982023816, "grad_norm": 2.0037331581115723, "learning_rate": 2.7023091725465046e-06, "loss": 0.912, "step": 39925 }, { "epoch": 0.4866366860443859, "grad_norm": 1.7310853004455566, "learning_rate": 2.7019884541372676e-06, "loss": 0.7622, "step": 39930 }, { "epoch": 0.48669762226853375, "grad_norm": 2.145082950592041, "learning_rate": 2.7016677357280306e-06, "loss": 0.851, "step": 39935 }, { "epoch": 0.48675855849268157, "grad_norm": 2.291581392288208, "learning_rate": 2.7013470173187945e-06, "loss": 0.9129, "step": 39940 }, { "epoch": 0.4868194947168294, "grad_norm": 2.0594160556793213, "learning_rate": 2.7010262989095575e-06, "loss": 0.8811, "step": 39945 }, { "epoch": 0.48688043094097716, "grad_norm": 1.9660035371780396, "learning_rate": 2.700705580500321e-06, "loss": 0.8339, "step": 39950 }, { "epoch": 0.486941367165125, "grad_norm": 1.7147152423858643, "learning_rate": 2.7003848620910844e-06, "loss": 0.7786, "step": 39955 }, { "epoch": 0.4870023033892728, "grad_norm": 2.0802078247070312, "learning_rate": 2.7000641436818474e-06, "loss": 0.7882, "step": 39960 }, { "epoch": 0.4870632396134206, "grad_norm": 2.0026612281799316, "learning_rate": 2.699743425272611e-06, "loss": 0.7591, "step": 39965 }, { "epoch": 0.4871241758375684, "grad_norm": 2.0107858180999756, "learning_rate": 2.6994227068633743e-06, "loss": 0.8229, "step": 39970 }, { "epoch": 0.4871851120617162, "grad_norm": 1.9418574571609497, "learning_rate": 2.6991019884541377e-06, "loss": 0.8889, "step": 39975 }, { "epoch": 0.48724604828586404, "grad_norm": 1.9929529428482056, "learning_rate": 2.6987812700449007e-06, "loss": 0.8194, "step": 39980 }, { "epoch": 0.4873069845100118, "grad_norm": 2.3926780223846436, "learning_rate": 2.698460551635664e-06, "loss": 0.8619, "step": 39985 }, { "epoch": 0.48736792073415963, "grad_norm": 1.9865024089813232, "learning_rate": 2.6981398332264276e-06, "loss": 0.8438, "step": 39990 }, { "epoch": 0.48742885695830745, "grad_norm": 1.8029532432556152, "learning_rate": 2.6978191148171906e-06, "loss": 0.8688, "step": 39995 }, { "epoch": 0.4874897931824552, "grad_norm": 1.9648710489273071, "learning_rate": 2.6974983964079545e-06, "loss": 0.8138, "step": 40000 }, { "epoch": 0.48755072940660305, "grad_norm": 2.147167921066284, "learning_rate": 2.6971776779987175e-06, "loss": 0.8467, "step": 40005 }, { "epoch": 0.48761166563075087, "grad_norm": 1.9687247276306152, "learning_rate": 2.6968569595894805e-06, "loss": 0.8355, "step": 40010 }, { "epoch": 0.4876726018548987, "grad_norm": 1.8234070539474487, "learning_rate": 2.6965362411802435e-06, "loss": 0.7706, "step": 40015 }, { "epoch": 0.48773353807904646, "grad_norm": 2.014192819595337, "learning_rate": 2.6962155227710074e-06, "loss": 0.8435, "step": 40020 }, { "epoch": 0.4877944743031943, "grad_norm": 2.2722880840301514, "learning_rate": 2.6958948043617704e-06, "loss": 0.8005, "step": 40025 }, { "epoch": 0.4878554105273421, "grad_norm": 1.6790043115615845, "learning_rate": 2.695574085952534e-06, "loss": 0.9008, "step": 40030 }, { "epoch": 0.48791634675148987, "grad_norm": 2.174867630004883, "learning_rate": 2.6952533675432973e-06, "loss": 0.8905, "step": 40035 }, { "epoch": 0.4879772829756377, "grad_norm": 1.831398606300354, "learning_rate": 2.6949326491340603e-06, "loss": 0.8237, "step": 40040 }, { "epoch": 0.4880382191997855, "grad_norm": 1.9059574604034424, "learning_rate": 2.6946119307248237e-06, "loss": 0.824, "step": 40045 }, { "epoch": 0.4880991554239333, "grad_norm": 2.0116055011749268, "learning_rate": 2.694291212315587e-06, "loss": 0.8635, "step": 40050 }, { "epoch": 0.4881600916480811, "grad_norm": 1.7925710678100586, "learning_rate": 2.6939704939063506e-06, "loss": 0.8111, "step": 40055 }, { "epoch": 0.48822102787222893, "grad_norm": 2.2159197330474854, "learning_rate": 2.6936497754971136e-06, "loss": 0.8131, "step": 40060 }, { "epoch": 0.48828196409637675, "grad_norm": 1.9730428457260132, "learning_rate": 2.693329057087877e-06, "loss": 0.93, "step": 40065 }, { "epoch": 0.4883429003205245, "grad_norm": 1.9324432611465454, "learning_rate": 2.6930083386786405e-06, "loss": 0.8638, "step": 40070 }, { "epoch": 0.48840383654467234, "grad_norm": 1.8013052940368652, "learning_rate": 2.6926876202694035e-06, "loss": 0.8794, "step": 40075 }, { "epoch": 0.48846477276882017, "grad_norm": 1.8987668752670288, "learning_rate": 2.6923669018601674e-06, "loss": 0.8483, "step": 40080 }, { "epoch": 0.48852570899296793, "grad_norm": 1.888403058052063, "learning_rate": 2.6920461834509304e-06, "loss": 0.8233, "step": 40085 }, { "epoch": 0.48858664521711576, "grad_norm": 1.7378883361816406, "learning_rate": 2.6917254650416934e-06, "loss": 0.8537, "step": 40090 }, { "epoch": 0.4886475814412636, "grad_norm": 2.113492965698242, "learning_rate": 2.6914047466324565e-06, "loss": 0.8247, "step": 40095 }, { "epoch": 0.4887085176654114, "grad_norm": 1.967002272605896, "learning_rate": 2.6910840282232203e-06, "loss": 0.7744, "step": 40100 }, { "epoch": 0.48876945388955917, "grad_norm": 1.8397475481033325, "learning_rate": 2.6907633098139833e-06, "loss": 0.7985, "step": 40105 }, { "epoch": 0.488830390113707, "grad_norm": 1.8753321170806885, "learning_rate": 2.6904425914047468e-06, "loss": 0.8247, "step": 40110 }, { "epoch": 0.4888913263378548, "grad_norm": 2.1440343856811523, "learning_rate": 2.6901218729955102e-06, "loss": 0.8275, "step": 40115 }, { "epoch": 0.4889522625620026, "grad_norm": 1.9046837091445923, "learning_rate": 2.6898011545862732e-06, "loss": 0.817, "step": 40120 }, { "epoch": 0.4890131987861504, "grad_norm": 1.8359298706054688, "learning_rate": 2.6894804361770367e-06, "loss": 0.758, "step": 40125 }, { "epoch": 0.4890741350102982, "grad_norm": 2.0128748416900635, "learning_rate": 2.6891597177678e-06, "loss": 0.802, "step": 40130 }, { "epoch": 0.48913507123444605, "grad_norm": 1.8983076810836792, "learning_rate": 2.6888389993585636e-06, "loss": 0.8325, "step": 40135 }, { "epoch": 0.4891960074585938, "grad_norm": 2.394044876098633, "learning_rate": 2.6885182809493266e-06, "loss": 0.8536, "step": 40140 }, { "epoch": 0.48925694368274164, "grad_norm": 1.7182797193527222, "learning_rate": 2.6881975625400904e-06, "loss": 0.7767, "step": 40145 }, { "epoch": 0.48931787990688946, "grad_norm": 2.1517090797424316, "learning_rate": 2.6878768441308535e-06, "loss": 0.8253, "step": 40150 }, { "epoch": 0.48937881613103723, "grad_norm": 1.8652533292770386, "learning_rate": 2.6875561257216165e-06, "loss": 0.847, "step": 40155 }, { "epoch": 0.48943975235518505, "grad_norm": 1.5776714086532593, "learning_rate": 2.6872354073123803e-06, "loss": 0.8039, "step": 40160 }, { "epoch": 0.4895006885793329, "grad_norm": 1.8935437202453613, "learning_rate": 2.6869146889031434e-06, "loss": 0.7598, "step": 40165 }, { "epoch": 0.4895616248034807, "grad_norm": 1.7893636226654053, "learning_rate": 2.6865939704939064e-06, "loss": 0.7227, "step": 40170 }, { "epoch": 0.48962256102762847, "grad_norm": 2.63028621673584, "learning_rate": 2.68627325208467e-06, "loss": 0.8475, "step": 40175 }, { "epoch": 0.4896834972517763, "grad_norm": 1.7559860944747925, "learning_rate": 2.6859525336754332e-06, "loss": 0.9111, "step": 40180 }, { "epoch": 0.4897444334759241, "grad_norm": 2.6000444889068604, "learning_rate": 2.6856318152661963e-06, "loss": 0.8663, "step": 40185 }, { "epoch": 0.4898053697000719, "grad_norm": 1.9529922008514404, "learning_rate": 2.6853110968569597e-06, "loss": 0.8048, "step": 40190 }, { "epoch": 0.4898663059242197, "grad_norm": 2.1627438068389893, "learning_rate": 2.684990378447723e-06, "loss": 0.8317, "step": 40195 }, { "epoch": 0.4899272421483675, "grad_norm": 1.7845841646194458, "learning_rate": 2.6846696600384866e-06, "loss": 0.8483, "step": 40200 }, { "epoch": 0.48998817837251535, "grad_norm": 1.723514437675476, "learning_rate": 2.6843489416292496e-06, "loss": 0.8799, "step": 40205 }, { "epoch": 0.4900491145966631, "grad_norm": 1.82053542137146, "learning_rate": 2.684028223220013e-06, "loss": 0.8515, "step": 40210 }, { "epoch": 0.49011005082081094, "grad_norm": 2.0000438690185547, "learning_rate": 2.6837075048107765e-06, "loss": 0.8605, "step": 40215 }, { "epoch": 0.49017098704495876, "grad_norm": 1.7741739749908447, "learning_rate": 2.6833867864015395e-06, "loss": 0.8444, "step": 40220 }, { "epoch": 0.49023192326910653, "grad_norm": 1.7351001501083374, "learning_rate": 2.6830660679923034e-06, "loss": 0.9355, "step": 40225 }, { "epoch": 0.49029285949325435, "grad_norm": 2.204982280731201, "learning_rate": 2.6827453495830664e-06, "loss": 0.8227, "step": 40230 }, { "epoch": 0.4903537957174022, "grad_norm": 2.2100107669830322, "learning_rate": 2.6824246311738294e-06, "loss": 0.8806, "step": 40235 }, { "epoch": 0.49041473194155, "grad_norm": 2.0822532176971436, "learning_rate": 2.6821039127645933e-06, "loss": 0.8288, "step": 40240 }, { "epoch": 0.49047566816569776, "grad_norm": 1.9433321952819824, "learning_rate": 2.6817831943553563e-06, "loss": 0.8462, "step": 40245 }, { "epoch": 0.4905366043898456, "grad_norm": 1.7483106851577759, "learning_rate": 2.6814624759461193e-06, "loss": 0.8368, "step": 40250 }, { "epoch": 0.4905975406139934, "grad_norm": 1.7891769409179688, "learning_rate": 2.681141757536883e-06, "loss": 0.8189, "step": 40255 }, { "epoch": 0.4906584768381412, "grad_norm": 2.0183165073394775, "learning_rate": 2.680821039127646e-06, "loss": 0.8189, "step": 40260 }, { "epoch": 0.490719413062289, "grad_norm": 2.252854108810425, "learning_rate": 2.680500320718409e-06, "loss": 0.7933, "step": 40265 }, { "epoch": 0.4907803492864368, "grad_norm": 1.8580248355865479, "learning_rate": 2.6801796023091726e-06, "loss": 0.7834, "step": 40270 }, { "epoch": 0.49084128551058465, "grad_norm": 1.7558375597000122, "learning_rate": 2.679858883899936e-06, "loss": 0.7775, "step": 40275 }, { "epoch": 0.4909022217347324, "grad_norm": 1.7165292501449585, "learning_rate": 2.6795381654906995e-06, "loss": 0.8731, "step": 40280 }, { "epoch": 0.49096315795888024, "grad_norm": 2.1298274993896484, "learning_rate": 2.6792174470814625e-06, "loss": 0.8249, "step": 40285 }, { "epoch": 0.49102409418302806, "grad_norm": 1.7395687103271484, "learning_rate": 2.678896728672226e-06, "loss": 0.7969, "step": 40290 }, { "epoch": 0.4910850304071758, "grad_norm": 1.770967960357666, "learning_rate": 2.6785760102629894e-06, "loss": 0.7997, "step": 40295 }, { "epoch": 0.49114596663132365, "grad_norm": 1.8393397331237793, "learning_rate": 2.6782552918537524e-06, "loss": 0.832, "step": 40300 }, { "epoch": 0.4912069028554715, "grad_norm": 1.9202924966812134, "learning_rate": 2.6779345734445163e-06, "loss": 0.8403, "step": 40305 }, { "epoch": 0.4912678390796193, "grad_norm": 1.991562008857727, "learning_rate": 2.6776138550352793e-06, "loss": 0.9023, "step": 40310 }, { "epoch": 0.49132877530376706, "grad_norm": 2.0293490886688232, "learning_rate": 2.6772931366260423e-06, "loss": 0.8643, "step": 40315 }, { "epoch": 0.4913897115279149, "grad_norm": 2.040773868560791, "learning_rate": 2.676972418216806e-06, "loss": 0.8518, "step": 40320 }, { "epoch": 0.4914506477520627, "grad_norm": 2.2814371585845947, "learning_rate": 2.676651699807569e-06, "loss": 0.8857, "step": 40325 }, { "epoch": 0.4915115839762105, "grad_norm": 2.033223867416382, "learning_rate": 2.6763309813983322e-06, "loss": 0.8314, "step": 40330 }, { "epoch": 0.4915725202003583, "grad_norm": 2.397688865661621, "learning_rate": 2.676010262989096e-06, "loss": 0.8089, "step": 40335 }, { "epoch": 0.4916334564245061, "grad_norm": 2.277120590209961, "learning_rate": 2.675689544579859e-06, "loss": 0.8689, "step": 40340 }, { "epoch": 0.49169439264865394, "grad_norm": 1.6121982336044312, "learning_rate": 2.675368826170622e-06, "loss": 0.7452, "step": 40345 }, { "epoch": 0.4917553288728017, "grad_norm": 2.0708138942718506, "learning_rate": 2.6750481077613856e-06, "loss": 0.8715, "step": 40350 }, { "epoch": 0.49181626509694953, "grad_norm": 1.8301805257797241, "learning_rate": 2.674727389352149e-06, "loss": 0.7925, "step": 40355 }, { "epoch": 0.49187720132109736, "grad_norm": 2.1299893856048584, "learning_rate": 2.6744066709429124e-06, "loss": 0.8031, "step": 40360 }, { "epoch": 0.4919381375452451, "grad_norm": 1.7959166765213013, "learning_rate": 2.6740859525336755e-06, "loss": 0.7982, "step": 40365 }, { "epoch": 0.49199907376939295, "grad_norm": 1.8576836585998535, "learning_rate": 2.6737652341244393e-06, "loss": 0.8439, "step": 40370 }, { "epoch": 0.49206000999354077, "grad_norm": 1.9162466526031494, "learning_rate": 2.6734445157152023e-06, "loss": 0.7887, "step": 40375 }, { "epoch": 0.4921209462176886, "grad_norm": 2.1816890239715576, "learning_rate": 2.6731237973059654e-06, "loss": 0.752, "step": 40380 }, { "epoch": 0.49218188244183636, "grad_norm": 1.8869614601135254, "learning_rate": 2.6728030788967292e-06, "loss": 0.8625, "step": 40385 }, { "epoch": 0.4922428186659842, "grad_norm": 1.9242485761642456, "learning_rate": 2.6724823604874922e-06, "loss": 0.8732, "step": 40390 }, { "epoch": 0.492303754890132, "grad_norm": 1.925431251525879, "learning_rate": 2.6721616420782553e-06, "loss": 0.8621, "step": 40395 }, { "epoch": 0.4923646911142798, "grad_norm": 2.0563018321990967, "learning_rate": 2.671840923669019e-06, "loss": 0.8119, "step": 40400 }, { "epoch": 0.4924256273384276, "grad_norm": 2.0399577617645264, "learning_rate": 2.671520205259782e-06, "loss": 0.8418, "step": 40405 }, { "epoch": 0.4924865635625754, "grad_norm": 1.9344687461853027, "learning_rate": 2.671199486850545e-06, "loss": 0.8956, "step": 40410 }, { "epoch": 0.49254749978672324, "grad_norm": 1.9870858192443848, "learning_rate": 2.670878768441309e-06, "loss": 0.8361, "step": 40415 }, { "epoch": 0.492608436010871, "grad_norm": 1.841984748840332, "learning_rate": 2.670558050032072e-06, "loss": 0.8175, "step": 40420 }, { "epoch": 0.49266937223501883, "grad_norm": 2.0821802616119385, "learning_rate": 2.6702373316228355e-06, "loss": 0.8171, "step": 40425 }, { "epoch": 0.49273030845916665, "grad_norm": 2.1396312713623047, "learning_rate": 2.6699166132135985e-06, "loss": 0.802, "step": 40430 }, { "epoch": 0.4927912446833144, "grad_norm": 2.0058863162994385, "learning_rate": 2.669595894804362e-06, "loss": 0.8693, "step": 40435 }, { "epoch": 0.49285218090746225, "grad_norm": 1.9688283205032349, "learning_rate": 2.6692751763951254e-06, "loss": 0.925, "step": 40440 }, { "epoch": 0.49291311713161007, "grad_norm": 1.7545875310897827, "learning_rate": 2.6689544579858884e-06, "loss": 0.8458, "step": 40445 }, { "epoch": 0.4929740533557579, "grad_norm": 2.08528995513916, "learning_rate": 2.6686337395766522e-06, "loss": 0.8792, "step": 40450 }, { "epoch": 0.49303498957990566, "grad_norm": 1.8753496408462524, "learning_rate": 2.6683130211674153e-06, "loss": 0.8424, "step": 40455 }, { "epoch": 0.4930959258040535, "grad_norm": 2.0016744136810303, "learning_rate": 2.6679923027581783e-06, "loss": 0.8292, "step": 40460 }, { "epoch": 0.4931568620282013, "grad_norm": 1.6851807832717896, "learning_rate": 2.667671584348942e-06, "loss": 0.79, "step": 40465 }, { "epoch": 0.49321779825234907, "grad_norm": 2.1448612213134766, "learning_rate": 2.667350865939705e-06, "loss": 0.8155, "step": 40470 }, { "epoch": 0.4932787344764969, "grad_norm": 1.963233232498169, "learning_rate": 2.667030147530468e-06, "loss": 0.8203, "step": 40475 }, { "epoch": 0.4933396707006447, "grad_norm": 1.9333837032318115, "learning_rate": 2.666709429121232e-06, "loss": 0.8283, "step": 40480 }, { "epoch": 0.49340060692479254, "grad_norm": 1.8109755516052246, "learning_rate": 2.666388710711995e-06, "loss": 0.8004, "step": 40485 }, { "epoch": 0.4934615431489403, "grad_norm": 2.3343818187713623, "learning_rate": 2.666067992302758e-06, "loss": 0.8571, "step": 40490 }, { "epoch": 0.49352247937308813, "grad_norm": 1.7933073043823242, "learning_rate": 2.665747273893522e-06, "loss": 0.8364, "step": 40495 }, { "epoch": 0.49358341559723595, "grad_norm": 1.9061110019683838, "learning_rate": 2.665426555484285e-06, "loss": 0.7959, "step": 40500 }, { "epoch": 0.4936443518213837, "grad_norm": 1.7542661428451538, "learning_rate": 2.6651058370750484e-06, "loss": 0.7919, "step": 40505 }, { "epoch": 0.49370528804553154, "grad_norm": 2.2464301586151123, "learning_rate": 2.664785118665812e-06, "loss": 0.8466, "step": 40510 }, { "epoch": 0.49376622426967937, "grad_norm": 2.1622636318206787, "learning_rate": 2.664464400256575e-06, "loss": 0.8269, "step": 40515 }, { "epoch": 0.49382716049382713, "grad_norm": 1.655032992362976, "learning_rate": 2.6641436818473383e-06, "loss": 0.7864, "step": 40520 }, { "epoch": 0.49388809671797496, "grad_norm": 2.1023330688476562, "learning_rate": 2.6638229634381013e-06, "loss": 0.8337, "step": 40525 }, { "epoch": 0.4939490329421228, "grad_norm": 2.3171188831329346, "learning_rate": 2.663502245028865e-06, "loss": 0.8264, "step": 40530 }, { "epoch": 0.4940099691662706, "grad_norm": 1.9005285501480103, "learning_rate": 2.663181526619628e-06, "loss": 0.802, "step": 40535 }, { "epoch": 0.49407090539041837, "grad_norm": 1.8511276245117188, "learning_rate": 2.662860808210391e-06, "loss": 0.8231, "step": 40540 }, { "epoch": 0.4941318416145662, "grad_norm": 1.8154044151306152, "learning_rate": 2.662540089801155e-06, "loss": 0.8609, "step": 40545 }, { "epoch": 0.494192777838714, "grad_norm": 1.8950140476226807, "learning_rate": 2.662219371391918e-06, "loss": 0.8788, "step": 40550 }, { "epoch": 0.4942537140628618, "grad_norm": 1.6771330833435059, "learning_rate": 2.661898652982681e-06, "loss": 0.8024, "step": 40555 }, { "epoch": 0.4943146502870096, "grad_norm": 2.0228419303894043, "learning_rate": 2.661577934573445e-06, "loss": 0.8395, "step": 40560 }, { "epoch": 0.4943755865111574, "grad_norm": 2.0661582946777344, "learning_rate": 2.661257216164208e-06, "loss": 0.8603, "step": 40565 }, { "epoch": 0.49443652273530525, "grad_norm": 2.0294747352600098, "learning_rate": 2.660936497754971e-06, "loss": 0.8114, "step": 40570 }, { "epoch": 0.494497458959453, "grad_norm": 2.0596635341644287, "learning_rate": 2.660615779345735e-06, "loss": 0.8216, "step": 40575 }, { "epoch": 0.49455839518360084, "grad_norm": 2.2809572219848633, "learning_rate": 2.660295060936498e-06, "loss": 0.8788, "step": 40580 }, { "epoch": 0.49461933140774866, "grad_norm": 1.7911772727966309, "learning_rate": 2.6599743425272613e-06, "loss": 0.8468, "step": 40585 }, { "epoch": 0.49468026763189643, "grad_norm": 2.032721996307373, "learning_rate": 2.6596536241180248e-06, "loss": 0.8488, "step": 40590 }, { "epoch": 0.49474120385604425, "grad_norm": 2.4500205516815186, "learning_rate": 2.659332905708788e-06, "loss": 0.9046, "step": 40595 }, { "epoch": 0.4948021400801921, "grad_norm": 1.7255879640579224, "learning_rate": 2.6590121872995512e-06, "loss": 0.8022, "step": 40600 }, { "epoch": 0.4948630763043399, "grad_norm": 2.09279465675354, "learning_rate": 2.6586914688903142e-06, "loss": 0.8776, "step": 40605 }, { "epoch": 0.49492401252848767, "grad_norm": 1.958210825920105, "learning_rate": 2.658370750481078e-06, "loss": 0.8681, "step": 40610 }, { "epoch": 0.4949849487526355, "grad_norm": 1.7436878681182861, "learning_rate": 2.658050032071841e-06, "loss": 0.8828, "step": 40615 }, { "epoch": 0.4950458849767833, "grad_norm": 1.9041211605072021, "learning_rate": 2.657729313662604e-06, "loss": 0.8143, "step": 40620 }, { "epoch": 0.4951068212009311, "grad_norm": 1.5475201606750488, "learning_rate": 2.657408595253368e-06, "loss": 0.7685, "step": 40625 }, { "epoch": 0.4951677574250789, "grad_norm": 1.8027161359786987, "learning_rate": 2.657087876844131e-06, "loss": 0.8267, "step": 40630 }, { "epoch": 0.4952286936492267, "grad_norm": 2.3036818504333496, "learning_rate": 2.656767158434894e-06, "loss": 0.8807, "step": 40635 }, { "epoch": 0.49528962987337455, "grad_norm": 2.034935712814331, "learning_rate": 2.656446440025658e-06, "loss": 0.825, "step": 40640 }, { "epoch": 0.4953505660975223, "grad_norm": 1.9357203245162964, "learning_rate": 2.656125721616421e-06, "loss": 0.8412, "step": 40645 }, { "epoch": 0.49541150232167014, "grad_norm": 2.0918941497802734, "learning_rate": 2.6558050032071844e-06, "loss": 0.8902, "step": 40650 }, { "epoch": 0.49547243854581796, "grad_norm": 1.9113757610321045, "learning_rate": 2.655484284797948e-06, "loss": 0.8016, "step": 40655 }, { "epoch": 0.49553337476996573, "grad_norm": 1.622496247291565, "learning_rate": 2.655163566388711e-06, "loss": 0.8008, "step": 40660 }, { "epoch": 0.49559431099411355, "grad_norm": 2.06760573387146, "learning_rate": 2.6548428479794743e-06, "loss": 0.8977, "step": 40665 }, { "epoch": 0.4956552472182614, "grad_norm": 2.0603139400482178, "learning_rate": 2.6545221295702377e-06, "loss": 0.7862, "step": 40670 }, { "epoch": 0.4957161834424092, "grad_norm": 1.974556565284729, "learning_rate": 2.654201411161001e-06, "loss": 0.7733, "step": 40675 }, { "epoch": 0.49577711966655696, "grad_norm": 1.911516785621643, "learning_rate": 2.653880692751764e-06, "loss": 0.8608, "step": 40680 }, { "epoch": 0.4958380558907048, "grad_norm": 2.8421051502227783, "learning_rate": 2.653559974342527e-06, "loss": 0.7974, "step": 40685 }, { "epoch": 0.4958989921148526, "grad_norm": 2.0169453620910645, "learning_rate": 2.653239255933291e-06, "loss": 0.86, "step": 40690 }, { "epoch": 0.4959599283390004, "grad_norm": 1.919053554534912, "learning_rate": 2.652918537524054e-06, "loss": 0.7909, "step": 40695 }, { "epoch": 0.4960208645631482, "grad_norm": 2.2204551696777344, "learning_rate": 2.652597819114817e-06, "loss": 0.8141, "step": 40700 }, { "epoch": 0.496081800787296, "grad_norm": 1.920506477355957, "learning_rate": 2.652277100705581e-06, "loss": 0.8498, "step": 40705 }, { "epoch": 0.49614273701144385, "grad_norm": 1.905093789100647, "learning_rate": 2.651956382296344e-06, "loss": 0.8107, "step": 40710 }, { "epoch": 0.4962036732355916, "grad_norm": 1.6441646814346313, "learning_rate": 2.651635663887107e-06, "loss": 0.7917, "step": 40715 }, { "epoch": 0.49626460945973944, "grad_norm": 2.0544838905334473, "learning_rate": 2.651314945477871e-06, "loss": 0.7655, "step": 40720 }, { "epoch": 0.49632554568388726, "grad_norm": 1.9124125242233276, "learning_rate": 2.650994227068634e-06, "loss": 0.7893, "step": 40725 }, { "epoch": 0.496386481908035, "grad_norm": 2.0486464500427246, "learning_rate": 2.6506735086593973e-06, "loss": 0.772, "step": 40730 }, { "epoch": 0.49644741813218285, "grad_norm": 2.1330575942993164, "learning_rate": 2.6503527902501607e-06, "loss": 0.8215, "step": 40735 }, { "epoch": 0.4965083543563307, "grad_norm": 1.8050564527511597, "learning_rate": 2.6500320718409237e-06, "loss": 0.9034, "step": 40740 }, { "epoch": 0.4965692905804785, "grad_norm": 1.9016931056976318, "learning_rate": 2.649711353431687e-06, "loss": 0.8376, "step": 40745 }, { "epoch": 0.49663022680462626, "grad_norm": 1.9644845724105835, "learning_rate": 2.6493906350224506e-06, "loss": 0.8604, "step": 40750 }, { "epoch": 0.4966911630287741, "grad_norm": 2.4106483459472656, "learning_rate": 2.649069916613214e-06, "loss": 0.8736, "step": 40755 }, { "epoch": 0.4967520992529219, "grad_norm": 1.8490731716156006, "learning_rate": 2.648749198203977e-06, "loss": 0.8251, "step": 40760 }, { "epoch": 0.4968130354770697, "grad_norm": 1.9001648426055908, "learning_rate": 2.64842847979474e-06, "loss": 0.9211, "step": 40765 }, { "epoch": 0.4968739717012175, "grad_norm": 1.8020583391189575, "learning_rate": 2.648107761385504e-06, "loss": 0.8355, "step": 40770 }, { "epoch": 0.4969349079253653, "grad_norm": 1.8288772106170654, "learning_rate": 2.647787042976267e-06, "loss": 0.8957, "step": 40775 }, { "epoch": 0.49699584414951314, "grad_norm": 2.1331069469451904, "learning_rate": 2.64746632456703e-06, "loss": 0.8242, "step": 40780 }, { "epoch": 0.4970567803736609, "grad_norm": 1.8501332998275757, "learning_rate": 2.647145606157794e-06, "loss": 0.8409, "step": 40785 }, { "epoch": 0.49711771659780873, "grad_norm": 2.012040853500366, "learning_rate": 2.646824887748557e-06, "loss": 0.8358, "step": 40790 }, { "epoch": 0.49717865282195656, "grad_norm": 1.6555465459823608, "learning_rate": 2.64650416933932e-06, "loss": 0.8356, "step": 40795 }, { "epoch": 0.4972395890461043, "grad_norm": 1.5154200792312622, "learning_rate": 2.6461834509300838e-06, "loss": 0.804, "step": 40800 }, { "epoch": 0.49730052527025215, "grad_norm": 2.3287646770477295, "learning_rate": 2.6458627325208468e-06, "loss": 0.8661, "step": 40805 }, { "epoch": 0.49736146149439997, "grad_norm": 1.7082715034484863, "learning_rate": 2.64554201411161e-06, "loss": 0.8215, "step": 40810 }, { "epoch": 0.4974223977185478, "grad_norm": 1.986526370048523, "learning_rate": 2.6452212957023736e-06, "loss": 0.8292, "step": 40815 }, { "epoch": 0.49748333394269556, "grad_norm": 2.1600944995880127, "learning_rate": 2.6449005772931367e-06, "loss": 0.8178, "step": 40820 }, { "epoch": 0.4975442701668434, "grad_norm": 1.939673662185669, "learning_rate": 2.6445798588839e-06, "loss": 0.862, "step": 40825 }, { "epoch": 0.4976052063909912, "grad_norm": 2.007662773132324, "learning_rate": 2.6442591404746635e-06, "loss": 0.8779, "step": 40830 }, { "epoch": 0.497666142615139, "grad_norm": 1.8971816301345825, "learning_rate": 2.643938422065427e-06, "loss": 0.8572, "step": 40835 }, { "epoch": 0.4977270788392868, "grad_norm": 2.0629334449768066, "learning_rate": 2.64361770365619e-06, "loss": 0.7995, "step": 40840 }, { "epoch": 0.4977880150634346, "grad_norm": 2.404284715652466, "learning_rate": 2.643296985246954e-06, "loss": 0.857, "step": 40845 }, { "epoch": 0.49784895128758244, "grad_norm": 2.187466859817505, "learning_rate": 2.642976266837717e-06, "loss": 0.8742, "step": 40850 }, { "epoch": 0.4979098875117302, "grad_norm": 1.8439067602157593, "learning_rate": 2.64265554842848e-06, "loss": 0.8449, "step": 40855 }, { "epoch": 0.49797082373587803, "grad_norm": 1.7401442527770996, "learning_rate": 2.642334830019243e-06, "loss": 0.8462, "step": 40860 }, { "epoch": 0.49803175996002585, "grad_norm": 1.6873178482055664, "learning_rate": 2.6420141116100068e-06, "loss": 0.7951, "step": 40865 }, { "epoch": 0.4980926961841736, "grad_norm": 2.4350011348724365, "learning_rate": 2.64169339320077e-06, "loss": 0.7825, "step": 40870 }, { "epoch": 0.49815363240832145, "grad_norm": 2.718079090118408, "learning_rate": 2.6413726747915332e-06, "loss": 0.8422, "step": 40875 }, { "epoch": 0.49821456863246927, "grad_norm": 1.7224770784378052, "learning_rate": 2.6410519563822967e-06, "loss": 0.8126, "step": 40880 }, { "epoch": 0.4982755048566171, "grad_norm": 2.0310535430908203, "learning_rate": 2.6407312379730597e-06, "loss": 0.8403, "step": 40885 }, { "epoch": 0.49833644108076486, "grad_norm": 1.8933196067810059, "learning_rate": 2.640410519563823e-06, "loss": 0.8413, "step": 40890 }, { "epoch": 0.4983973773049127, "grad_norm": 1.8799123764038086, "learning_rate": 2.6400898011545866e-06, "loss": 0.8921, "step": 40895 }, { "epoch": 0.4984583135290605, "grad_norm": 2.3351986408233643, "learning_rate": 2.63976908274535e-06, "loss": 0.8779, "step": 40900 }, { "epoch": 0.49851924975320827, "grad_norm": 2.21317982673645, "learning_rate": 2.639448364336113e-06, "loss": 0.8641, "step": 40905 }, { "epoch": 0.4985801859773561, "grad_norm": 1.9248135089874268, "learning_rate": 2.6391276459268765e-06, "loss": 0.8756, "step": 40910 }, { "epoch": 0.4986411222015039, "grad_norm": 2.5292675495147705, "learning_rate": 2.63880692751764e-06, "loss": 0.7036, "step": 40915 }, { "epoch": 0.49870205842565174, "grad_norm": 1.9845192432403564, "learning_rate": 2.638486209108403e-06, "loss": 0.7857, "step": 40920 }, { "epoch": 0.4987629946497995, "grad_norm": 1.957316279411316, "learning_rate": 2.638165490699167e-06, "loss": 0.8593, "step": 40925 }, { "epoch": 0.49882393087394733, "grad_norm": 1.7747067213058472, "learning_rate": 2.63784477228993e-06, "loss": 0.8403, "step": 40930 }, { "epoch": 0.49888486709809515, "grad_norm": 1.9462369680404663, "learning_rate": 2.637524053880693e-06, "loss": 0.779, "step": 40935 }, { "epoch": 0.4989458033222429, "grad_norm": 2.1145129203796387, "learning_rate": 2.637203335471456e-06, "loss": 0.8714, "step": 40940 }, { "epoch": 0.49900673954639074, "grad_norm": 1.7456285953521729, "learning_rate": 2.6368826170622197e-06, "loss": 0.8274, "step": 40945 }, { "epoch": 0.49906767577053857, "grad_norm": 1.902636170387268, "learning_rate": 2.6365618986529827e-06, "loss": 0.7706, "step": 40950 }, { "epoch": 0.4991286119946864, "grad_norm": 1.7834982872009277, "learning_rate": 2.636241180243746e-06, "loss": 0.8328, "step": 40955 }, { "epoch": 0.49918954821883416, "grad_norm": 2.526843547821045, "learning_rate": 2.6359204618345096e-06, "loss": 0.8169, "step": 40960 }, { "epoch": 0.499250484442982, "grad_norm": 1.9089796543121338, "learning_rate": 2.6355997434252726e-06, "loss": 0.8088, "step": 40965 }, { "epoch": 0.4993114206671298, "grad_norm": 2.1464285850524902, "learning_rate": 2.635279025016036e-06, "loss": 0.8662, "step": 40970 }, { "epoch": 0.49937235689127757, "grad_norm": 1.8755834102630615, "learning_rate": 2.6349583066067995e-06, "loss": 0.7829, "step": 40975 }, { "epoch": 0.4994332931154254, "grad_norm": 1.7993448972702026, "learning_rate": 2.634637588197563e-06, "loss": 0.7821, "step": 40980 }, { "epoch": 0.4994942293395732, "grad_norm": 2.1912286281585693, "learning_rate": 2.634316869788326e-06, "loss": 0.8595, "step": 40985 }, { "epoch": 0.499555165563721, "grad_norm": 1.8754600286483765, "learning_rate": 2.6339961513790894e-06, "loss": 0.9063, "step": 40990 }, { "epoch": 0.4996161017878688, "grad_norm": 2.379838705062866, "learning_rate": 2.633675432969853e-06, "loss": 0.8044, "step": 40995 }, { "epoch": 0.4996770380120166, "grad_norm": 1.9617220163345337, "learning_rate": 2.633354714560616e-06, "loss": 0.8341, "step": 41000 }, { "epoch": 0.49973797423616445, "grad_norm": 2.517746686935425, "learning_rate": 2.6330339961513797e-06, "loss": 0.8106, "step": 41005 }, { "epoch": 0.4997989104603122, "grad_norm": 1.5969847440719604, "learning_rate": 2.6327132777421427e-06, "loss": 0.785, "step": 41010 }, { "epoch": 0.49985984668446004, "grad_norm": 1.8816229104995728, "learning_rate": 2.6323925593329058e-06, "loss": 0.8067, "step": 41015 }, { "epoch": 0.49992078290860786, "grad_norm": 1.8421103954315186, "learning_rate": 2.6320718409236688e-06, "loss": 0.8235, "step": 41020 }, { "epoch": 0.49998171913275563, "grad_norm": 2.3164658546447754, "learning_rate": 2.6317511225144326e-06, "loss": 0.9059, "step": 41025 }, { "epoch": 0.5000426553569035, "grad_norm": 1.9844270944595337, "learning_rate": 2.6314304041051957e-06, "loss": 0.8651, "step": 41030 }, { "epoch": 0.5001035915810512, "grad_norm": 2.1451873779296875, "learning_rate": 2.631109685695959e-06, "loss": 0.8137, "step": 41035 }, { "epoch": 0.500164527805199, "grad_norm": 2.1535451412200928, "learning_rate": 2.6307889672867225e-06, "loss": 0.8825, "step": 41040 }, { "epoch": 0.5002254640293469, "grad_norm": 1.772133231163025, "learning_rate": 2.6304682488774855e-06, "loss": 0.8264, "step": 41045 }, { "epoch": 0.5002864002534947, "grad_norm": 1.9725208282470703, "learning_rate": 2.630147530468249e-06, "loss": 0.8687, "step": 41050 }, { "epoch": 0.5003473364776425, "grad_norm": 2.185882806777954, "learning_rate": 2.6298268120590124e-06, "loss": 0.7496, "step": 41055 }, { "epoch": 0.5004082727017903, "grad_norm": 2.0683236122131348, "learning_rate": 2.629506093649776e-06, "loss": 0.8346, "step": 41060 }, { "epoch": 0.5004692089259382, "grad_norm": 1.8019040822982788, "learning_rate": 2.629185375240539e-06, "loss": 0.8616, "step": 41065 }, { "epoch": 0.5005301451500859, "grad_norm": 1.9770861864089966, "learning_rate": 2.6288646568313027e-06, "loss": 0.8173, "step": 41070 }, { "epoch": 0.5005910813742337, "grad_norm": 1.941605806350708, "learning_rate": 2.6285439384220658e-06, "loss": 0.8431, "step": 41075 }, { "epoch": 0.5006520175983815, "grad_norm": 2.381781578063965, "learning_rate": 2.6282232200128288e-06, "loss": 0.7791, "step": 41080 }, { "epoch": 0.5007129538225293, "grad_norm": 1.9268780946731567, "learning_rate": 2.6279025016035926e-06, "loss": 0.8102, "step": 41085 }, { "epoch": 0.5007738900466772, "grad_norm": 2.073873519897461, "learning_rate": 2.6275817831943557e-06, "loss": 0.8785, "step": 41090 }, { "epoch": 0.500834826270825, "grad_norm": 1.8878788948059082, "learning_rate": 2.6272610647851187e-06, "loss": 0.8749, "step": 41095 }, { "epoch": 0.5008957624949728, "grad_norm": 2.039196491241455, "learning_rate": 2.6269403463758825e-06, "loss": 0.7996, "step": 41100 }, { "epoch": 0.5009566987191205, "grad_norm": 1.9075723886489868, "learning_rate": 2.6266196279666456e-06, "loss": 0.8043, "step": 41105 }, { "epoch": 0.5010176349432683, "grad_norm": 2.3331122398376465, "learning_rate": 2.6262989095574086e-06, "loss": 0.9448, "step": 41110 }, { "epoch": 0.5010785711674162, "grad_norm": 2.010854482650757, "learning_rate": 2.625978191148172e-06, "loss": 0.8628, "step": 41115 }, { "epoch": 0.501139507391564, "grad_norm": 1.9900434017181396, "learning_rate": 2.6256574727389355e-06, "loss": 0.8213, "step": 41120 }, { "epoch": 0.5012004436157118, "grad_norm": 2.085739850997925, "learning_rate": 2.625336754329699e-06, "loss": 0.8167, "step": 41125 }, { "epoch": 0.5012613798398596, "grad_norm": 1.6017019748687744, "learning_rate": 2.625016035920462e-06, "loss": 0.7838, "step": 41130 }, { "epoch": 0.5013223160640075, "grad_norm": 1.9834963083267212, "learning_rate": 2.6246953175112254e-06, "loss": 0.8225, "step": 41135 }, { "epoch": 0.5013832522881552, "grad_norm": 2.0292253494262695, "learning_rate": 2.624374599101989e-06, "loss": 0.8664, "step": 41140 }, { "epoch": 0.501444188512303, "grad_norm": 2.393554210662842, "learning_rate": 2.624053880692752e-06, "loss": 0.8261, "step": 41145 }, { "epoch": 0.5015051247364508, "grad_norm": 2.003131151199341, "learning_rate": 2.6237331622835157e-06, "loss": 0.8095, "step": 41150 }, { "epoch": 0.5015660609605986, "grad_norm": 1.9039266109466553, "learning_rate": 2.6234124438742787e-06, "loss": 0.7881, "step": 41155 }, { "epoch": 0.5016269971847465, "grad_norm": 1.83786940574646, "learning_rate": 2.6230917254650417e-06, "loss": 0.8643, "step": 41160 }, { "epoch": 0.5016879334088943, "grad_norm": 1.8525538444519043, "learning_rate": 2.6227710070558056e-06, "loss": 0.8184, "step": 41165 }, { "epoch": 0.5017488696330421, "grad_norm": 1.8386917114257812, "learning_rate": 2.6224502886465686e-06, "loss": 0.8044, "step": 41170 }, { "epoch": 0.5018098058571898, "grad_norm": 1.9024572372436523, "learning_rate": 2.6221295702373316e-06, "loss": 0.8381, "step": 41175 }, { "epoch": 0.5018707420813376, "grad_norm": 1.8807069063186646, "learning_rate": 2.6218088518280955e-06, "loss": 0.8005, "step": 41180 }, { "epoch": 0.5019316783054855, "grad_norm": 1.804565668106079, "learning_rate": 2.6214881334188585e-06, "loss": 0.8341, "step": 41185 }, { "epoch": 0.5019926145296333, "grad_norm": 1.8218767642974854, "learning_rate": 2.6211674150096215e-06, "loss": 0.8539, "step": 41190 }, { "epoch": 0.5020535507537811, "grad_norm": 2.3456451892852783, "learning_rate": 2.620846696600385e-06, "loss": 0.8667, "step": 41195 }, { "epoch": 0.5021144869779289, "grad_norm": 2.315382242202759, "learning_rate": 2.6205259781911484e-06, "loss": 0.8623, "step": 41200 }, { "epoch": 0.5021754232020768, "grad_norm": 2.0544142723083496, "learning_rate": 2.620205259781912e-06, "loss": 0.813, "step": 41205 }, { "epoch": 0.5022363594262245, "grad_norm": 1.9302517175674438, "learning_rate": 2.619884541372675e-06, "loss": 0.8213, "step": 41210 }, { "epoch": 0.5022972956503723, "grad_norm": 2.1588430404663086, "learning_rate": 2.6195638229634383e-06, "loss": 0.873, "step": 41215 }, { "epoch": 0.5023582318745201, "grad_norm": 1.87981379032135, "learning_rate": 2.6192431045542017e-06, "loss": 0.8364, "step": 41220 }, { "epoch": 0.5024191680986679, "grad_norm": 2.0443127155303955, "learning_rate": 2.6189223861449647e-06, "loss": 0.8124, "step": 41225 }, { "epoch": 0.5024801043228158, "grad_norm": 1.7023955583572388, "learning_rate": 2.6186016677357286e-06, "loss": 0.8404, "step": 41230 }, { "epoch": 0.5025410405469636, "grad_norm": 1.9579070806503296, "learning_rate": 2.6182809493264916e-06, "loss": 0.8466, "step": 41235 }, { "epoch": 0.5026019767711114, "grad_norm": 2.3306989669799805, "learning_rate": 2.6179602309172546e-06, "loss": 0.9423, "step": 41240 }, { "epoch": 0.5026629129952591, "grad_norm": 2.030141830444336, "learning_rate": 2.6176395125080185e-06, "loss": 0.8331, "step": 41245 }, { "epoch": 0.5027238492194069, "grad_norm": 2.046870470046997, "learning_rate": 2.6173187940987815e-06, "loss": 0.7895, "step": 41250 }, { "epoch": 0.5027847854435548, "grad_norm": 2.1201906204223633, "learning_rate": 2.6169980756895445e-06, "loss": 0.7639, "step": 41255 }, { "epoch": 0.5028457216677026, "grad_norm": 1.8865209817886353, "learning_rate": 2.6166773572803084e-06, "loss": 0.8273, "step": 41260 }, { "epoch": 0.5029066578918504, "grad_norm": 1.8206769227981567, "learning_rate": 2.6163566388710714e-06, "loss": 0.7971, "step": 41265 }, { "epoch": 0.5029675941159982, "grad_norm": 2.156071424484253, "learning_rate": 2.6160359204618344e-06, "loss": 0.908, "step": 41270 }, { "epoch": 0.503028530340146, "grad_norm": 1.5988481044769287, "learning_rate": 2.615715202052598e-06, "loss": 0.8015, "step": 41275 }, { "epoch": 0.5030894665642938, "grad_norm": 1.7100118398666382, "learning_rate": 2.6153944836433613e-06, "loss": 0.7808, "step": 41280 }, { "epoch": 0.5031504027884416, "grad_norm": 2.230398416519165, "learning_rate": 2.6150737652341248e-06, "loss": 0.8587, "step": 41285 }, { "epoch": 0.5032113390125894, "grad_norm": 1.7528173923492432, "learning_rate": 2.6147530468248878e-06, "loss": 0.8093, "step": 41290 }, { "epoch": 0.5032722752367372, "grad_norm": 2.082578182220459, "learning_rate": 2.614432328415651e-06, "loss": 0.8382, "step": 41295 }, { "epoch": 0.503333211460885, "grad_norm": 2.167717695236206, "learning_rate": 2.6141116100064147e-06, "loss": 0.8018, "step": 41300 }, { "epoch": 0.5033941476850329, "grad_norm": 2.0342037677764893, "learning_rate": 2.6137908915971777e-06, "loss": 0.7801, "step": 41305 }, { "epoch": 0.5034550839091807, "grad_norm": 2.228346347808838, "learning_rate": 2.6134701731879415e-06, "loss": 0.8176, "step": 41310 }, { "epoch": 0.5035160201333284, "grad_norm": 1.9582785367965698, "learning_rate": 2.6131494547787045e-06, "loss": 0.867, "step": 41315 }, { "epoch": 0.5035769563574762, "grad_norm": 1.747490644454956, "learning_rate": 2.6128287363694676e-06, "loss": 0.7938, "step": 41320 }, { "epoch": 0.5036378925816241, "grad_norm": 1.9651588201522827, "learning_rate": 2.6125080179602314e-06, "loss": 0.8363, "step": 41325 }, { "epoch": 0.5036988288057719, "grad_norm": 2.070573568344116, "learning_rate": 2.6121872995509944e-06, "loss": 0.8353, "step": 41330 }, { "epoch": 0.5037597650299197, "grad_norm": 2.3930835723876953, "learning_rate": 2.6118665811417575e-06, "loss": 0.8391, "step": 41335 }, { "epoch": 0.5038207012540675, "grad_norm": 1.9226603507995605, "learning_rate": 2.6115458627325213e-06, "loss": 0.8871, "step": 41340 }, { "epoch": 0.5038816374782153, "grad_norm": 1.9494539499282837, "learning_rate": 2.6112251443232843e-06, "loss": 0.8194, "step": 41345 }, { "epoch": 0.5039425737023631, "grad_norm": 1.8096474409103394, "learning_rate": 2.6109044259140478e-06, "loss": 0.813, "step": 41350 }, { "epoch": 0.5040035099265109, "grad_norm": 2.3241798877716064, "learning_rate": 2.610583707504811e-06, "loss": 0.8947, "step": 41355 }, { "epoch": 0.5040644461506587, "grad_norm": 1.9258008003234863, "learning_rate": 2.6102629890955742e-06, "loss": 0.8291, "step": 41360 }, { "epoch": 0.5041253823748065, "grad_norm": 2.0798799991607666, "learning_rate": 2.6099422706863377e-06, "loss": 0.7897, "step": 41365 }, { "epoch": 0.5041863185989544, "grad_norm": 2.174025774002075, "learning_rate": 2.6096215522771007e-06, "loss": 0.8977, "step": 41370 }, { "epoch": 0.5042472548231022, "grad_norm": 1.9432600736618042, "learning_rate": 2.6093008338678646e-06, "loss": 0.7791, "step": 41375 }, { "epoch": 0.50430819104725, "grad_norm": 1.7969099283218384, "learning_rate": 2.6089801154586276e-06, "loss": 0.8051, "step": 41380 }, { "epoch": 0.5043691272713977, "grad_norm": 2.5508365631103516, "learning_rate": 2.6086593970493906e-06, "loss": 0.8527, "step": 41385 }, { "epoch": 0.5044300634955455, "grad_norm": 1.862448811531067, "learning_rate": 2.6083386786401545e-06, "loss": 0.8474, "step": 41390 }, { "epoch": 0.5044909997196934, "grad_norm": 1.6153452396392822, "learning_rate": 2.6080179602309175e-06, "loss": 0.9209, "step": 41395 }, { "epoch": 0.5045519359438412, "grad_norm": 1.9801568984985352, "learning_rate": 2.6076972418216805e-06, "loss": 0.87, "step": 41400 }, { "epoch": 0.504612872167989, "grad_norm": 2.0664215087890625, "learning_rate": 2.6073765234124444e-06, "loss": 0.876, "step": 41405 }, { "epoch": 0.5046738083921368, "grad_norm": 2.0338430404663086, "learning_rate": 2.6070558050032074e-06, "loss": 0.834, "step": 41410 }, { "epoch": 0.5047347446162846, "grad_norm": 1.67015540599823, "learning_rate": 2.6067350865939704e-06, "loss": 0.8388, "step": 41415 }, { "epoch": 0.5047956808404324, "grad_norm": 2.0986320972442627, "learning_rate": 2.6064143681847343e-06, "loss": 0.8025, "step": 41420 }, { "epoch": 0.5048566170645802, "grad_norm": 1.8202810287475586, "learning_rate": 2.6060936497754973e-06, "loss": 0.8359, "step": 41425 }, { "epoch": 0.504917553288728, "grad_norm": 1.706434726715088, "learning_rate": 2.6057729313662607e-06, "loss": 0.8781, "step": 41430 }, { "epoch": 0.5049784895128758, "grad_norm": 2.3907649517059326, "learning_rate": 2.605452212957024e-06, "loss": 0.904, "step": 41435 }, { "epoch": 0.5050394257370237, "grad_norm": 1.9588991403579712, "learning_rate": 2.605131494547787e-06, "loss": 0.8463, "step": 41440 }, { "epoch": 0.5051003619611715, "grad_norm": 1.9676893949508667, "learning_rate": 2.6048107761385506e-06, "loss": 0.8708, "step": 41445 }, { "epoch": 0.5051612981853193, "grad_norm": 1.8410660028457642, "learning_rate": 2.6044900577293136e-06, "loss": 0.797, "step": 41450 }, { "epoch": 0.505222234409467, "grad_norm": 1.9803844690322876, "learning_rate": 2.6041693393200775e-06, "loss": 0.8901, "step": 41455 }, { "epoch": 0.5052831706336148, "grad_norm": 2.052011489868164, "learning_rate": 2.6038486209108405e-06, "loss": 0.8014, "step": 41460 }, { "epoch": 0.5053441068577627, "grad_norm": 1.9182629585266113, "learning_rate": 2.6035279025016035e-06, "loss": 0.8882, "step": 41465 }, { "epoch": 0.5054050430819105, "grad_norm": 1.701393723487854, "learning_rate": 2.6032071840923674e-06, "loss": 0.9363, "step": 41470 }, { "epoch": 0.5054659793060583, "grad_norm": 1.7122775316238403, "learning_rate": 2.6028864656831304e-06, "loss": 0.7416, "step": 41475 }, { "epoch": 0.5055269155302061, "grad_norm": 1.8148434162139893, "learning_rate": 2.6025657472738934e-06, "loss": 0.8215, "step": 41480 }, { "epoch": 0.505587851754354, "grad_norm": 2.1306183338165283, "learning_rate": 2.6022450288646573e-06, "loss": 0.8034, "step": 41485 }, { "epoch": 0.5056487879785017, "grad_norm": 2.02585768699646, "learning_rate": 2.6019243104554203e-06, "loss": 0.8219, "step": 41490 }, { "epoch": 0.5057097242026495, "grad_norm": 1.830508828163147, "learning_rate": 2.6016035920461833e-06, "loss": 0.7924, "step": 41495 }, { "epoch": 0.5057706604267973, "grad_norm": 1.9728602170944214, "learning_rate": 2.601282873636947e-06, "loss": 0.8659, "step": 41500 }, { "epoch": 0.5058315966509451, "grad_norm": 1.8169108629226685, "learning_rate": 2.60096215522771e-06, "loss": 0.8764, "step": 41505 }, { "epoch": 0.505892532875093, "grad_norm": 1.8772997856140137, "learning_rate": 2.6006414368184736e-06, "loss": 0.8081, "step": 41510 }, { "epoch": 0.5059534690992408, "grad_norm": 2.0647127628326416, "learning_rate": 2.600320718409237e-06, "loss": 0.8232, "step": 41515 }, { "epoch": 0.5060144053233886, "grad_norm": 2.0569026470184326, "learning_rate": 2.6e-06, "loss": 0.7912, "step": 41520 }, { "epoch": 0.5060753415475363, "grad_norm": 1.8657649755477905, "learning_rate": 2.5996792815907635e-06, "loss": 0.8436, "step": 41525 }, { "epoch": 0.5061362777716841, "grad_norm": 1.982480525970459, "learning_rate": 2.5993585631815266e-06, "loss": 0.8429, "step": 41530 }, { "epoch": 0.506197213995832, "grad_norm": 1.9113311767578125, "learning_rate": 2.5990378447722904e-06, "loss": 0.7826, "step": 41535 }, { "epoch": 0.5062581502199798, "grad_norm": 2.0072948932647705, "learning_rate": 2.5987171263630534e-06, "loss": 0.8211, "step": 41540 }, { "epoch": 0.5063190864441276, "grad_norm": 2.1302490234375, "learning_rate": 2.5983964079538164e-06, "loss": 0.9136, "step": 41545 }, { "epoch": 0.5063800226682754, "grad_norm": 2.006117582321167, "learning_rate": 2.5980756895445803e-06, "loss": 0.8615, "step": 41550 }, { "epoch": 0.5064409588924232, "grad_norm": 2.1485981941223145, "learning_rate": 2.5977549711353433e-06, "loss": 0.8245, "step": 41555 }, { "epoch": 0.506501895116571, "grad_norm": 1.842011570930481, "learning_rate": 2.5974342527261063e-06, "loss": 0.8041, "step": 41560 }, { "epoch": 0.5065628313407188, "grad_norm": 1.7795395851135254, "learning_rate": 2.59711353431687e-06, "loss": 0.8151, "step": 41565 }, { "epoch": 0.5066237675648666, "grad_norm": 1.7861977815628052, "learning_rate": 2.5967928159076332e-06, "loss": 0.8683, "step": 41570 }, { "epoch": 0.5066847037890144, "grad_norm": 1.94487726688385, "learning_rate": 2.5964720974983967e-06, "loss": 0.789, "step": 41575 }, { "epoch": 0.5067456400131622, "grad_norm": 1.9362088441848755, "learning_rate": 2.59615137908916e-06, "loss": 0.7743, "step": 41580 }, { "epoch": 0.5068065762373101, "grad_norm": 2.188084125518799, "learning_rate": 2.595830660679923e-06, "loss": 0.7693, "step": 41585 }, { "epoch": 0.5068675124614579, "grad_norm": 2.253629207611084, "learning_rate": 2.5955099422706866e-06, "loss": 0.8158, "step": 41590 }, { "epoch": 0.5069284486856056, "grad_norm": 2.005898952484131, "learning_rate": 2.59518922386145e-06, "loss": 0.8278, "step": 41595 }, { "epoch": 0.5069893849097534, "grad_norm": 1.7273268699645996, "learning_rate": 2.5948685054522134e-06, "loss": 0.8605, "step": 41600 }, { "epoch": 0.5070503211339012, "grad_norm": 1.7992221117019653, "learning_rate": 2.5945477870429765e-06, "loss": 0.7402, "step": 41605 }, { "epoch": 0.5071112573580491, "grad_norm": 1.66240394115448, "learning_rate": 2.5942270686337395e-06, "loss": 0.8088, "step": 41610 }, { "epoch": 0.5071721935821969, "grad_norm": 5.523091793060303, "learning_rate": 2.5939063502245033e-06, "loss": 0.8379, "step": 41615 }, { "epoch": 0.5072331298063447, "grad_norm": 2.18446946144104, "learning_rate": 2.5935856318152664e-06, "loss": 0.769, "step": 41620 }, { "epoch": 0.5072940660304925, "grad_norm": 1.9937506914138794, "learning_rate": 2.5932649134060294e-06, "loss": 0.7783, "step": 41625 }, { "epoch": 0.5073550022546403, "grad_norm": 2.1582677364349365, "learning_rate": 2.5929441949967932e-06, "loss": 0.8284, "step": 41630 }, { "epoch": 0.5074159384787881, "grad_norm": 1.6440473794937134, "learning_rate": 2.5926234765875563e-06, "loss": 0.8382, "step": 41635 }, { "epoch": 0.5074768747029359, "grad_norm": 2.4141385555267334, "learning_rate": 2.5923027581783193e-06, "loss": 0.8651, "step": 41640 }, { "epoch": 0.5075378109270837, "grad_norm": 1.8326499462127686, "learning_rate": 2.591982039769083e-06, "loss": 0.83, "step": 41645 }, { "epoch": 0.5075987471512315, "grad_norm": 2.229968786239624, "learning_rate": 2.591661321359846e-06, "loss": 0.8615, "step": 41650 }, { "epoch": 0.5076596833753794, "grad_norm": 2.3832106590270996, "learning_rate": 2.5913406029506096e-06, "loss": 0.8969, "step": 41655 }, { "epoch": 0.5077206195995272, "grad_norm": 1.782114863395691, "learning_rate": 2.591019884541373e-06, "loss": 0.8905, "step": 41660 }, { "epoch": 0.5077815558236749, "grad_norm": 2.148503303527832, "learning_rate": 2.590699166132136e-06, "loss": 0.8938, "step": 41665 }, { "epoch": 0.5078424920478227, "grad_norm": 1.9475300312042236, "learning_rate": 2.5903784477228995e-06, "loss": 0.8808, "step": 41670 }, { "epoch": 0.5079034282719705, "grad_norm": 2.195624828338623, "learning_rate": 2.590057729313663e-06, "loss": 0.8523, "step": 41675 }, { "epoch": 0.5079643644961184, "grad_norm": 1.791473150253296, "learning_rate": 2.5897370109044264e-06, "loss": 0.8106, "step": 41680 }, { "epoch": 0.5080253007202662, "grad_norm": 1.8989577293395996, "learning_rate": 2.5894162924951894e-06, "loss": 0.8532, "step": 41685 }, { "epoch": 0.508086236944414, "grad_norm": 1.928101658821106, "learning_rate": 2.589095574085953e-06, "loss": 0.8133, "step": 41690 }, { "epoch": 0.5081471731685618, "grad_norm": 2.3085923194885254, "learning_rate": 2.5887748556767163e-06, "loss": 0.8459, "step": 41695 }, { "epoch": 0.5082081093927096, "grad_norm": 2.1403985023498535, "learning_rate": 2.5884541372674793e-06, "loss": 0.8026, "step": 41700 }, { "epoch": 0.5082690456168574, "grad_norm": 1.8975001573562622, "learning_rate": 2.5881334188582423e-06, "loss": 0.8242, "step": 41705 }, { "epoch": 0.5083299818410052, "grad_norm": 2.0948634147644043, "learning_rate": 2.587812700449006e-06, "loss": 0.8719, "step": 41710 }, { "epoch": 0.508390918065153, "grad_norm": 1.8051133155822754, "learning_rate": 2.587491982039769e-06, "loss": 0.7734, "step": 41715 }, { "epoch": 0.5084518542893008, "grad_norm": 1.951296091079712, "learning_rate": 2.587171263630532e-06, "loss": 0.8316, "step": 41720 }, { "epoch": 0.5085127905134487, "grad_norm": 1.744685173034668, "learning_rate": 2.586850545221296e-06, "loss": 0.7957, "step": 41725 }, { "epoch": 0.5085737267375964, "grad_norm": 2.0379819869995117, "learning_rate": 2.586529826812059e-06, "loss": 0.776, "step": 41730 }, { "epoch": 0.5086346629617442, "grad_norm": 1.7827098369598389, "learning_rate": 2.5862091084028225e-06, "loss": 0.8149, "step": 41735 }, { "epoch": 0.508695599185892, "grad_norm": 1.9475700855255127, "learning_rate": 2.585888389993586e-06, "loss": 0.8281, "step": 41740 }, { "epoch": 0.5087565354100398, "grad_norm": 2.0015339851379395, "learning_rate": 2.585567671584349e-06, "loss": 0.8315, "step": 41745 }, { "epoch": 0.5088174716341877, "grad_norm": 1.9218882322311401, "learning_rate": 2.5852469531751124e-06, "loss": 0.821, "step": 41750 }, { "epoch": 0.5088784078583355, "grad_norm": 1.7998158931732178, "learning_rate": 2.584926234765876e-06, "loss": 0.8312, "step": 41755 }, { "epoch": 0.5089393440824833, "grad_norm": 1.957706093788147, "learning_rate": 2.5846055163566393e-06, "loss": 0.8375, "step": 41760 }, { "epoch": 0.509000280306631, "grad_norm": 1.7900689840316772, "learning_rate": 2.5842847979474023e-06, "loss": 0.8765, "step": 41765 }, { "epoch": 0.5090612165307788, "grad_norm": 2.1908106803894043, "learning_rate": 2.583964079538166e-06, "loss": 0.8657, "step": 41770 }, { "epoch": 0.5091221527549267, "grad_norm": 1.7163811922073364, "learning_rate": 2.583643361128929e-06, "loss": 0.8735, "step": 41775 }, { "epoch": 0.5091830889790745, "grad_norm": 2.3667919635772705, "learning_rate": 2.5833226427196922e-06, "loss": 0.8864, "step": 41780 }, { "epoch": 0.5092440252032223, "grad_norm": 2.553436040878296, "learning_rate": 2.5830019243104552e-06, "loss": 0.9478, "step": 41785 }, { "epoch": 0.5093049614273701, "grad_norm": 1.7380207777023315, "learning_rate": 2.582681205901219e-06, "loss": 0.852, "step": 41790 }, { "epoch": 0.509365897651518, "grad_norm": 1.9641671180725098, "learning_rate": 2.582360487491982e-06, "loss": 0.8566, "step": 41795 }, { "epoch": 0.5094268338756657, "grad_norm": 1.828945517539978, "learning_rate": 2.5820397690827456e-06, "loss": 0.7717, "step": 41800 }, { "epoch": 0.5094877700998135, "grad_norm": 2.0240447521209717, "learning_rate": 2.581719050673509e-06, "loss": 0.8749, "step": 41805 }, { "epoch": 0.5095487063239613, "grad_norm": 2.009187936782837, "learning_rate": 2.581398332264272e-06, "loss": 0.8983, "step": 41810 }, { "epoch": 0.5096096425481091, "grad_norm": 1.7740987539291382, "learning_rate": 2.5810776138550354e-06, "loss": 0.8376, "step": 41815 }, { "epoch": 0.509670578772257, "grad_norm": 1.789674162864685, "learning_rate": 2.580756895445799e-06, "loss": 0.8282, "step": 41820 }, { "epoch": 0.5097315149964048, "grad_norm": 1.815457820892334, "learning_rate": 2.5804361770365623e-06, "loss": 0.8222, "step": 41825 }, { "epoch": 0.5097924512205526, "grad_norm": 2.089069366455078, "learning_rate": 2.5801154586273253e-06, "loss": 0.8512, "step": 41830 }, { "epoch": 0.5098533874447003, "grad_norm": 1.8672877550125122, "learning_rate": 2.5797947402180888e-06, "loss": 0.8588, "step": 41835 }, { "epoch": 0.5099143236688481, "grad_norm": 2.025157928466797, "learning_rate": 2.5794740218088522e-06, "loss": 0.8937, "step": 41840 }, { "epoch": 0.509975259892996, "grad_norm": 1.7692186832427979, "learning_rate": 2.5791533033996152e-06, "loss": 0.8782, "step": 41845 }, { "epoch": 0.5100361961171438, "grad_norm": 2.2725391387939453, "learning_rate": 2.578832584990379e-06, "loss": 0.8122, "step": 41850 }, { "epoch": 0.5100971323412916, "grad_norm": 1.9926798343658447, "learning_rate": 2.578511866581142e-06, "loss": 0.8526, "step": 41855 }, { "epoch": 0.5101580685654394, "grad_norm": 2.1552019119262695, "learning_rate": 2.578191148171905e-06, "loss": 0.814, "step": 41860 }, { "epoch": 0.5102190047895873, "grad_norm": 1.9678912162780762, "learning_rate": 2.577870429762668e-06, "loss": 0.8901, "step": 41865 }, { "epoch": 0.510279941013735, "grad_norm": 2.0757901668548584, "learning_rate": 2.577549711353432e-06, "loss": 0.8186, "step": 41870 }, { "epoch": 0.5103408772378828, "grad_norm": 1.8815333843231201, "learning_rate": 2.577228992944195e-06, "loss": 0.8608, "step": 41875 }, { "epoch": 0.5104018134620306, "grad_norm": 2.003030776977539, "learning_rate": 2.5769082745349585e-06, "loss": 0.8472, "step": 41880 }, { "epoch": 0.5104627496861784, "grad_norm": 1.7752046585083008, "learning_rate": 2.576587556125722e-06, "loss": 0.8169, "step": 41885 }, { "epoch": 0.5105236859103263, "grad_norm": 1.9621143341064453, "learning_rate": 2.576266837716485e-06, "loss": 0.8145, "step": 41890 }, { "epoch": 0.5105846221344741, "grad_norm": 3.681591510772705, "learning_rate": 2.5759461193072484e-06, "loss": 0.874, "step": 41895 }, { "epoch": 0.5106455583586219, "grad_norm": 1.917738437652588, "learning_rate": 2.575625400898012e-06, "loss": 0.7836, "step": 41900 }, { "epoch": 0.5107064945827696, "grad_norm": 1.877664566040039, "learning_rate": 2.5753046824887753e-06, "loss": 0.7817, "step": 41905 }, { "epoch": 0.5107674308069174, "grad_norm": 1.7777856588363647, "learning_rate": 2.5749839640795383e-06, "loss": 0.7507, "step": 41910 }, { "epoch": 0.5108283670310653, "grad_norm": 2.202638864517212, "learning_rate": 2.5746632456703017e-06, "loss": 0.7715, "step": 41915 }, { "epoch": 0.5108893032552131, "grad_norm": 1.6408840417861938, "learning_rate": 2.574342527261065e-06, "loss": 0.8452, "step": 41920 }, { "epoch": 0.5109502394793609, "grad_norm": 1.6592721939086914, "learning_rate": 2.574021808851828e-06, "loss": 0.8059, "step": 41925 }, { "epoch": 0.5110111757035087, "grad_norm": 1.9747947454452515, "learning_rate": 2.573701090442592e-06, "loss": 0.7933, "step": 41930 }, { "epoch": 0.5110721119276566, "grad_norm": 2.330033302307129, "learning_rate": 2.573380372033355e-06, "loss": 0.9109, "step": 41935 }, { "epoch": 0.5111330481518043, "grad_norm": 1.9324684143066406, "learning_rate": 2.573059653624118e-06, "loss": 0.9201, "step": 41940 }, { "epoch": 0.5111939843759521, "grad_norm": 1.6303969621658325, "learning_rate": 2.572738935214881e-06, "loss": 0.8159, "step": 41945 }, { "epoch": 0.5112549206000999, "grad_norm": 3.0285964012145996, "learning_rate": 2.572418216805645e-06, "loss": 0.9076, "step": 41950 }, { "epoch": 0.5113158568242477, "grad_norm": 1.9440336227416992, "learning_rate": 2.572097498396408e-06, "loss": 0.8139, "step": 41955 }, { "epoch": 0.5113767930483956, "grad_norm": 2.165687084197998, "learning_rate": 2.5717767799871714e-06, "loss": 0.7948, "step": 41960 }, { "epoch": 0.5114377292725434, "grad_norm": 1.9227275848388672, "learning_rate": 2.571456061577935e-06, "loss": 0.8482, "step": 41965 }, { "epoch": 0.5114986654966912, "grad_norm": 1.790958285331726, "learning_rate": 2.571135343168698e-06, "loss": 0.8556, "step": 41970 }, { "epoch": 0.5115596017208389, "grad_norm": 1.9355326890945435, "learning_rate": 2.5708146247594613e-06, "loss": 0.734, "step": 41975 }, { "epoch": 0.5116205379449867, "grad_norm": 1.705519199371338, "learning_rate": 2.5704939063502247e-06, "loss": 0.8018, "step": 41980 }, { "epoch": 0.5116814741691346, "grad_norm": 1.9017568826675415, "learning_rate": 2.570173187940988e-06, "loss": 0.841, "step": 41985 }, { "epoch": 0.5117424103932824, "grad_norm": 1.7827630043029785, "learning_rate": 2.569852469531751e-06, "loss": 0.8404, "step": 41990 }, { "epoch": 0.5118033466174302, "grad_norm": 2.0258333683013916, "learning_rate": 2.5695317511225146e-06, "loss": 0.877, "step": 41995 }, { "epoch": 0.511864282841578, "grad_norm": 1.9259198904037476, "learning_rate": 2.569211032713278e-06, "loss": 0.869, "step": 42000 }, { "epoch": 0.5119252190657259, "grad_norm": 1.7739012241363525, "learning_rate": 2.568890314304041e-06, "loss": 0.8212, "step": 42005 }, { "epoch": 0.5119861552898736, "grad_norm": 2.5694029331207275, "learning_rate": 2.568569595894805e-06, "loss": 0.8554, "step": 42010 }, { "epoch": 0.5120470915140214, "grad_norm": 2.0329887866973877, "learning_rate": 2.568248877485568e-06, "loss": 0.8173, "step": 42015 }, { "epoch": 0.5121080277381692, "grad_norm": 1.9761340618133545, "learning_rate": 2.567928159076331e-06, "loss": 0.8479, "step": 42020 }, { "epoch": 0.512168963962317, "grad_norm": 2.2509918212890625, "learning_rate": 2.567607440667095e-06, "loss": 0.8818, "step": 42025 }, { "epoch": 0.5122299001864649, "grad_norm": 1.9444926977157593, "learning_rate": 2.567286722257858e-06, "loss": 0.8473, "step": 42030 }, { "epoch": 0.5122908364106127, "grad_norm": 2.1468074321746826, "learning_rate": 2.566966003848621e-06, "loss": 0.8034, "step": 42035 }, { "epoch": 0.5123517726347605, "grad_norm": 2.6820755004882812, "learning_rate": 2.5666452854393843e-06, "loss": 0.8401, "step": 42040 }, { "epoch": 0.5124127088589082, "grad_norm": 1.8532459735870361, "learning_rate": 2.5663245670301478e-06, "loss": 0.8101, "step": 42045 }, { "epoch": 0.512473645083056, "grad_norm": 1.8253403902053833, "learning_rate": 2.5660038486209112e-06, "loss": 0.8367, "step": 42050 }, { "epoch": 0.5125345813072039, "grad_norm": 2.7036566734313965, "learning_rate": 2.5656831302116742e-06, "loss": 0.8565, "step": 42055 }, { "epoch": 0.5125955175313517, "grad_norm": 1.9246567487716675, "learning_rate": 2.5653624118024377e-06, "loss": 0.7932, "step": 42060 }, { "epoch": 0.5126564537554995, "grad_norm": 1.8590458631515503, "learning_rate": 2.565041693393201e-06, "loss": 0.7802, "step": 42065 }, { "epoch": 0.5127173899796473, "grad_norm": 1.884818196296692, "learning_rate": 2.564720974983964e-06, "loss": 0.8399, "step": 42070 }, { "epoch": 0.5127783262037952, "grad_norm": 2.218311309814453, "learning_rate": 2.564400256574728e-06, "loss": 0.8429, "step": 42075 }, { "epoch": 0.5128392624279429, "grad_norm": 1.8581186532974243, "learning_rate": 2.564079538165491e-06, "loss": 0.8777, "step": 42080 }, { "epoch": 0.5129001986520907, "grad_norm": 2.0980582237243652, "learning_rate": 2.563758819756254e-06, "loss": 0.8626, "step": 42085 }, { "epoch": 0.5129611348762385, "grad_norm": 2.1966707706451416, "learning_rate": 2.563438101347018e-06, "loss": 0.8694, "step": 42090 }, { "epoch": 0.5130220711003863, "grad_norm": 1.7795417308807373, "learning_rate": 2.563117382937781e-06, "loss": 0.8128, "step": 42095 }, { "epoch": 0.5130830073245342, "grad_norm": 2.128007173538208, "learning_rate": 2.562796664528544e-06, "loss": 0.8803, "step": 42100 }, { "epoch": 0.513143943548682, "grad_norm": 2.1567351818084717, "learning_rate": 2.5624759461193078e-06, "loss": 0.8608, "step": 42105 }, { "epoch": 0.5132048797728298, "grad_norm": 1.9307292699813843, "learning_rate": 2.562155227710071e-06, "loss": 0.8116, "step": 42110 }, { "epoch": 0.5132658159969775, "grad_norm": 1.7396173477172852, "learning_rate": 2.561834509300834e-06, "loss": 0.8399, "step": 42115 }, { "epoch": 0.5133267522211253, "grad_norm": 1.805139422416687, "learning_rate": 2.5615137908915973e-06, "loss": 0.8401, "step": 42120 }, { "epoch": 0.5133876884452732, "grad_norm": 2.3768985271453857, "learning_rate": 2.5611930724823607e-06, "loss": 0.8579, "step": 42125 }, { "epoch": 0.513448624669421, "grad_norm": 1.991953730583191, "learning_rate": 2.560872354073124e-06, "loss": 0.9079, "step": 42130 }, { "epoch": 0.5135095608935688, "grad_norm": 2.1124825477600098, "learning_rate": 2.560551635663887e-06, "loss": 0.773, "step": 42135 }, { "epoch": 0.5135704971177166, "grad_norm": 2.1955301761627197, "learning_rate": 2.5602309172546506e-06, "loss": 0.884, "step": 42140 }, { "epoch": 0.5136314333418645, "grad_norm": 1.8569070100784302, "learning_rate": 2.559910198845414e-06, "loss": 0.809, "step": 42145 }, { "epoch": 0.5136923695660122, "grad_norm": 1.6963692903518677, "learning_rate": 2.559589480436177e-06, "loss": 0.8486, "step": 42150 }, { "epoch": 0.51375330579016, "grad_norm": 2.076129198074341, "learning_rate": 2.559268762026941e-06, "loss": 0.8541, "step": 42155 }, { "epoch": 0.5138142420143078, "grad_norm": 1.947500228881836, "learning_rate": 2.558948043617704e-06, "loss": 0.8881, "step": 42160 }, { "epoch": 0.5138751782384556, "grad_norm": 2.25152587890625, "learning_rate": 2.558627325208467e-06, "loss": 0.8825, "step": 42165 }, { "epoch": 0.5139361144626035, "grad_norm": 2.2305965423583984, "learning_rate": 2.558306606799231e-06, "loss": 0.8519, "step": 42170 }, { "epoch": 0.5139970506867513, "grad_norm": 1.8650354146957397, "learning_rate": 2.557985888389994e-06, "loss": 0.8441, "step": 42175 }, { "epoch": 0.5140579869108991, "grad_norm": 2.21806263923645, "learning_rate": 2.557665169980757e-06, "loss": 0.8074, "step": 42180 }, { "epoch": 0.5141189231350468, "grad_norm": 2.127127170562744, "learning_rate": 2.5573444515715207e-06, "loss": 0.855, "step": 42185 }, { "epoch": 0.5141798593591946, "grad_norm": 1.916763424873352, "learning_rate": 2.5570237331622837e-06, "loss": 0.9102, "step": 42190 }, { "epoch": 0.5142407955833425, "grad_norm": 1.7957336902618408, "learning_rate": 2.5567030147530467e-06, "loss": 0.828, "step": 42195 }, { "epoch": 0.5143017318074903, "grad_norm": 2.149574041366577, "learning_rate": 2.55638229634381e-06, "loss": 0.8423, "step": 42200 }, { "epoch": 0.5143626680316381, "grad_norm": 2.124234199523926, "learning_rate": 2.5560615779345736e-06, "loss": 0.8624, "step": 42205 }, { "epoch": 0.5144236042557859, "grad_norm": 1.8184829950332642, "learning_rate": 2.555740859525337e-06, "loss": 0.7864, "step": 42210 }, { "epoch": 0.5144845404799337, "grad_norm": 2.1907880306243896, "learning_rate": 2.5554201411161e-06, "loss": 0.837, "step": 42215 }, { "epoch": 0.5145454767040815, "grad_norm": 2.1196815967559814, "learning_rate": 2.5550994227068635e-06, "loss": 0.8541, "step": 42220 }, { "epoch": 0.5146064129282293, "grad_norm": 1.8703771829605103, "learning_rate": 2.554778704297627e-06, "loss": 0.8137, "step": 42225 }, { "epoch": 0.5146673491523771, "grad_norm": 2.0903735160827637, "learning_rate": 2.55445798588839e-06, "loss": 0.8164, "step": 42230 }, { "epoch": 0.5147282853765249, "grad_norm": 2.1780922412872314, "learning_rate": 2.554137267479154e-06, "loss": 0.7574, "step": 42235 }, { "epoch": 0.5147892216006728, "grad_norm": 2.031724691390991, "learning_rate": 2.553816549069917e-06, "loss": 0.797, "step": 42240 }, { "epoch": 0.5148501578248206, "grad_norm": 1.6468356847763062, "learning_rate": 2.55349583066068e-06, "loss": 0.8164, "step": 42245 }, { "epoch": 0.5149110940489684, "grad_norm": 2.5362348556518555, "learning_rate": 2.5531751122514437e-06, "loss": 0.8533, "step": 42250 }, { "epoch": 0.5149720302731161, "grad_norm": 1.9569709300994873, "learning_rate": 2.5528543938422068e-06, "loss": 0.8723, "step": 42255 }, { "epoch": 0.5150329664972639, "grad_norm": 1.827979564666748, "learning_rate": 2.5525336754329698e-06, "loss": 0.9363, "step": 42260 }, { "epoch": 0.5150939027214118, "grad_norm": 1.7862653732299805, "learning_rate": 2.5522129570237336e-06, "loss": 0.749, "step": 42265 }, { "epoch": 0.5151548389455596, "grad_norm": 1.8589633703231812, "learning_rate": 2.5518922386144967e-06, "loss": 0.7806, "step": 42270 }, { "epoch": 0.5152157751697074, "grad_norm": 1.875844955444336, "learning_rate": 2.55157152020526e-06, "loss": 0.791, "step": 42275 }, { "epoch": 0.5152767113938552, "grad_norm": 1.7250584363937378, "learning_rate": 2.551250801796023e-06, "loss": 0.8088, "step": 42280 }, { "epoch": 0.515337647618003, "grad_norm": 1.8130303621292114, "learning_rate": 2.5509300833867866e-06, "loss": 0.8767, "step": 42285 }, { "epoch": 0.5153985838421508, "grad_norm": 2.1100709438323975, "learning_rate": 2.55060936497755e-06, "loss": 0.8405, "step": 42290 }, { "epoch": 0.5154595200662986, "grad_norm": 1.76331627368927, "learning_rate": 2.550288646568313e-06, "loss": 0.8164, "step": 42295 }, { "epoch": 0.5155204562904464, "grad_norm": 2.3222391605377197, "learning_rate": 2.549967928159077e-06, "loss": 0.8769, "step": 42300 }, { "epoch": 0.5155813925145942, "grad_norm": 2.021390438079834, "learning_rate": 2.54964720974984e-06, "loss": 0.8261, "step": 42305 }, { "epoch": 0.515642328738742, "grad_norm": 2.0716793537139893, "learning_rate": 2.549326491340603e-06, "loss": 0.8176, "step": 42310 }, { "epoch": 0.5157032649628899, "grad_norm": 1.9853215217590332, "learning_rate": 2.5490057729313668e-06, "loss": 0.9082, "step": 42315 }, { "epoch": 0.5157642011870377, "grad_norm": 2.0846810340881348, "learning_rate": 2.54868505452213e-06, "loss": 0.8325, "step": 42320 }, { "epoch": 0.5158251374111854, "grad_norm": 2.2080211639404297, "learning_rate": 2.548364336112893e-06, "loss": 0.8429, "step": 42325 }, { "epoch": 0.5158860736353332, "grad_norm": 2.0979387760162354, "learning_rate": 2.5480436177036567e-06, "loss": 0.87, "step": 42330 }, { "epoch": 0.515947009859481, "grad_norm": 1.9004563093185425, "learning_rate": 2.5477228992944197e-06, "loss": 0.8903, "step": 42335 }, { "epoch": 0.5160079460836289, "grad_norm": 1.8893178701400757, "learning_rate": 2.5474021808851827e-06, "loss": 0.8402, "step": 42340 }, { "epoch": 0.5160688823077767, "grad_norm": 2.1683828830718994, "learning_rate": 2.5470814624759466e-06, "loss": 0.9041, "step": 42345 }, { "epoch": 0.5161298185319245, "grad_norm": 2.003141164779663, "learning_rate": 2.5467607440667096e-06, "loss": 0.7681, "step": 42350 }, { "epoch": 0.5161907547560723, "grad_norm": 1.8850314617156982, "learning_rate": 2.546440025657473e-06, "loss": 0.8245, "step": 42355 }, { "epoch": 0.5162516909802201, "grad_norm": 2.5686421394348145, "learning_rate": 2.5461193072482365e-06, "loss": 0.8724, "step": 42360 }, { "epoch": 0.5163126272043679, "grad_norm": 1.9104725122451782, "learning_rate": 2.5457985888389995e-06, "loss": 0.8922, "step": 42365 }, { "epoch": 0.5163735634285157, "grad_norm": 1.9705668687820435, "learning_rate": 2.545477870429763e-06, "loss": 0.8348, "step": 42370 }, { "epoch": 0.5164344996526635, "grad_norm": 1.9721624851226807, "learning_rate": 2.545157152020526e-06, "loss": 0.833, "step": 42375 }, { "epoch": 0.5164954358768113, "grad_norm": 1.8942830562591553, "learning_rate": 2.54483643361129e-06, "loss": 0.7875, "step": 42380 }, { "epoch": 0.5165563721009592, "grad_norm": 3.300072193145752, "learning_rate": 2.544515715202053e-06, "loss": 0.8374, "step": 42385 }, { "epoch": 0.516617308325107, "grad_norm": 1.9578241109848022, "learning_rate": 2.544194996792816e-06, "loss": 0.8215, "step": 42390 }, { "epoch": 0.5166782445492547, "grad_norm": 1.977767825126648, "learning_rate": 2.5438742783835797e-06, "loss": 0.9065, "step": 42395 }, { "epoch": 0.5167391807734025, "grad_norm": 1.9788683652877808, "learning_rate": 2.5435535599743427e-06, "loss": 0.7858, "step": 42400 }, { "epoch": 0.5168001169975504, "grad_norm": 1.9610661268234253, "learning_rate": 2.5432328415651057e-06, "loss": 0.8118, "step": 42405 }, { "epoch": 0.5168610532216982, "grad_norm": 2.047100067138672, "learning_rate": 2.5429121231558696e-06, "loss": 0.91, "step": 42410 }, { "epoch": 0.516921989445846, "grad_norm": 2.1247339248657227, "learning_rate": 2.5425914047466326e-06, "loss": 0.8469, "step": 42415 }, { "epoch": 0.5169829256699938, "grad_norm": 2.0482614040374756, "learning_rate": 2.5422706863373956e-06, "loss": 0.8056, "step": 42420 }, { "epoch": 0.5170438618941416, "grad_norm": 1.8635221719741821, "learning_rate": 2.5419499679281595e-06, "loss": 0.9282, "step": 42425 }, { "epoch": 0.5171047981182894, "grad_norm": 2.6263930797576904, "learning_rate": 2.5416292495189225e-06, "loss": 0.8653, "step": 42430 }, { "epoch": 0.5171657343424372, "grad_norm": 2.162492275238037, "learning_rate": 2.541308531109686e-06, "loss": 0.8564, "step": 42435 }, { "epoch": 0.517226670566585, "grad_norm": 1.7782624959945679, "learning_rate": 2.5409878127004494e-06, "loss": 0.8454, "step": 42440 }, { "epoch": 0.5172876067907328, "grad_norm": 2.323423147201538, "learning_rate": 2.5406670942912124e-06, "loss": 0.9398, "step": 42445 }, { "epoch": 0.5173485430148806, "grad_norm": 2.077669620513916, "learning_rate": 2.540346375881976e-06, "loss": 0.8107, "step": 42450 }, { "epoch": 0.5174094792390285, "grad_norm": 1.821333646774292, "learning_rate": 2.540025657472739e-06, "loss": 0.846, "step": 42455 }, { "epoch": 0.5174704154631763, "grad_norm": 1.7667946815490723, "learning_rate": 2.5397049390635027e-06, "loss": 0.8023, "step": 42460 }, { "epoch": 0.517531351687324, "grad_norm": 1.7663803100585938, "learning_rate": 2.5393842206542657e-06, "loss": 0.8492, "step": 42465 }, { "epoch": 0.5175922879114718, "grad_norm": 2.1306891441345215, "learning_rate": 2.5390635022450288e-06, "loss": 0.8961, "step": 42470 }, { "epoch": 0.5176532241356196, "grad_norm": 2.1601595878601074, "learning_rate": 2.5387427838357926e-06, "loss": 0.7553, "step": 42475 }, { "epoch": 0.5177141603597675, "grad_norm": 2.049814224243164, "learning_rate": 2.5384220654265556e-06, "loss": 0.8392, "step": 42480 }, { "epoch": 0.5177750965839153, "grad_norm": 1.8668233156204224, "learning_rate": 2.5381013470173187e-06, "loss": 0.8723, "step": 42485 }, { "epoch": 0.5178360328080631, "grad_norm": 1.8223055601119995, "learning_rate": 2.5377806286080825e-06, "loss": 0.8517, "step": 42490 }, { "epoch": 0.5178969690322109, "grad_norm": 2.0501863956451416, "learning_rate": 2.5374599101988455e-06, "loss": 0.8197, "step": 42495 }, { "epoch": 0.5179579052563587, "grad_norm": 1.6431478261947632, "learning_rate": 2.5371391917896086e-06, "loss": 0.8109, "step": 42500 }, { "epoch": 0.5180188414805065, "grad_norm": 1.988295555114746, "learning_rate": 2.5368184733803724e-06, "loss": 0.805, "step": 42505 }, { "epoch": 0.5180797777046543, "grad_norm": 1.8959764242172241, "learning_rate": 2.5364977549711354e-06, "loss": 0.8693, "step": 42510 }, { "epoch": 0.5181407139288021, "grad_norm": 1.9711636304855347, "learning_rate": 2.536177036561899e-06, "loss": 0.832, "step": 42515 }, { "epoch": 0.5182016501529499, "grad_norm": 2.041644811630249, "learning_rate": 2.5358563181526623e-06, "loss": 0.8266, "step": 42520 }, { "epoch": 0.5182625863770978, "grad_norm": 2.083709478378296, "learning_rate": 2.5355355997434258e-06, "loss": 0.8356, "step": 42525 }, { "epoch": 0.5183235226012456, "grad_norm": 1.9554804563522339, "learning_rate": 2.5352148813341888e-06, "loss": 0.7982, "step": 42530 }, { "epoch": 0.5183844588253933, "grad_norm": 2.085808277130127, "learning_rate": 2.534894162924952e-06, "loss": 0.7858, "step": 42535 }, { "epoch": 0.5184453950495411, "grad_norm": 2.1024856567382812, "learning_rate": 2.5345734445157157e-06, "loss": 0.8437, "step": 42540 }, { "epoch": 0.518506331273689, "grad_norm": 2.0980942249298096, "learning_rate": 2.5342527261064787e-06, "loss": 0.8179, "step": 42545 }, { "epoch": 0.5185672674978368, "grad_norm": 2.2133140563964844, "learning_rate": 2.5339320076972417e-06, "loss": 0.8137, "step": 42550 }, { "epoch": 0.5186282037219846, "grad_norm": 1.6000722646713257, "learning_rate": 2.5336112892880056e-06, "loss": 0.8586, "step": 42555 }, { "epoch": 0.5186891399461324, "grad_norm": 2.027491807937622, "learning_rate": 2.5332905708787686e-06, "loss": 0.8681, "step": 42560 }, { "epoch": 0.5187500761702802, "grad_norm": 1.8192596435546875, "learning_rate": 2.5329698524695316e-06, "loss": 0.8204, "step": 42565 }, { "epoch": 0.518811012394428, "grad_norm": 1.8878215551376343, "learning_rate": 2.5326491340602955e-06, "loss": 0.8442, "step": 42570 }, { "epoch": 0.5188719486185758, "grad_norm": 1.712924838066101, "learning_rate": 2.5323284156510585e-06, "loss": 0.8209, "step": 42575 }, { "epoch": 0.5189328848427236, "grad_norm": 1.7409324645996094, "learning_rate": 2.532007697241822e-06, "loss": 0.8543, "step": 42580 }, { "epoch": 0.5189938210668714, "grad_norm": 2.087204933166504, "learning_rate": 2.5316869788325853e-06, "loss": 0.749, "step": 42585 }, { "epoch": 0.5190547572910192, "grad_norm": 1.761415958404541, "learning_rate": 2.5313662604233484e-06, "loss": 0.805, "step": 42590 }, { "epoch": 0.5191156935151671, "grad_norm": 1.9264966249465942, "learning_rate": 2.531045542014112e-06, "loss": 0.8356, "step": 42595 }, { "epoch": 0.5191766297393149, "grad_norm": 1.9970238208770752, "learning_rate": 2.5307248236048752e-06, "loss": 0.8738, "step": 42600 }, { "epoch": 0.5192375659634626, "grad_norm": 2.323317766189575, "learning_rate": 2.5304041051956387e-06, "loss": 0.8713, "step": 42605 }, { "epoch": 0.5192985021876104, "grad_norm": 1.7793488502502441, "learning_rate": 2.5300833867864017e-06, "loss": 0.8421, "step": 42610 }, { "epoch": 0.5193594384117582, "grad_norm": 2.110839366912842, "learning_rate": 2.529762668377165e-06, "loss": 0.8199, "step": 42615 }, { "epoch": 0.5194203746359061, "grad_norm": 2.0655839443206787, "learning_rate": 2.5294419499679286e-06, "loss": 0.9317, "step": 42620 }, { "epoch": 0.5194813108600539, "grad_norm": 2.1410207748413086, "learning_rate": 2.5291212315586916e-06, "loss": 0.8181, "step": 42625 }, { "epoch": 0.5195422470842017, "grad_norm": 2.5931308269500732, "learning_rate": 2.5288005131494546e-06, "loss": 0.8626, "step": 42630 }, { "epoch": 0.5196031833083495, "grad_norm": 1.8853473663330078, "learning_rate": 2.5284797947402185e-06, "loss": 0.7962, "step": 42635 }, { "epoch": 0.5196641195324972, "grad_norm": 1.882448673248291, "learning_rate": 2.5281590763309815e-06, "loss": 0.873, "step": 42640 }, { "epoch": 0.5197250557566451, "grad_norm": 2.402031660079956, "learning_rate": 2.5278383579217445e-06, "loss": 0.7347, "step": 42645 }, { "epoch": 0.5197859919807929, "grad_norm": 1.8264546394348145, "learning_rate": 2.5275176395125084e-06, "loss": 0.7604, "step": 42650 }, { "epoch": 0.5198469282049407, "grad_norm": 3.1406807899475098, "learning_rate": 2.5271969211032714e-06, "loss": 0.7927, "step": 42655 }, { "epoch": 0.5199078644290885, "grad_norm": 1.8179985284805298, "learning_rate": 2.526876202694035e-06, "loss": 0.851, "step": 42660 }, { "epoch": 0.5199688006532364, "grad_norm": 1.9792920351028442, "learning_rate": 2.5265554842847983e-06, "loss": 0.8686, "step": 42665 }, { "epoch": 0.5200297368773841, "grad_norm": 2.067474603652954, "learning_rate": 2.5262347658755613e-06, "loss": 0.9089, "step": 42670 }, { "epoch": 0.5200906731015319, "grad_norm": 2.1585638523101807, "learning_rate": 2.5259140474663247e-06, "loss": 0.8693, "step": 42675 }, { "epoch": 0.5201516093256797, "grad_norm": 1.7133508920669556, "learning_rate": 2.525593329057088e-06, "loss": 0.8359, "step": 42680 }, { "epoch": 0.5202125455498275, "grad_norm": 2.259382963180542, "learning_rate": 2.5252726106478516e-06, "loss": 0.8357, "step": 42685 }, { "epoch": 0.5202734817739754, "grad_norm": 2.619631290435791, "learning_rate": 2.5249518922386146e-06, "loss": 0.8813, "step": 42690 }, { "epoch": 0.5203344179981232, "grad_norm": 1.7918449640274048, "learning_rate": 2.524631173829378e-06, "loss": 0.9042, "step": 42695 }, { "epoch": 0.520395354222271, "grad_norm": 2.2410876750946045, "learning_rate": 2.5243104554201415e-06, "loss": 0.8585, "step": 42700 }, { "epoch": 0.5204562904464187, "grad_norm": 1.7996785640716553, "learning_rate": 2.5239897370109045e-06, "loss": 0.8122, "step": 42705 }, { "epoch": 0.5205172266705665, "grad_norm": 1.5642728805541992, "learning_rate": 2.5236690186016675e-06, "loss": 0.8407, "step": 42710 }, { "epoch": 0.5205781628947144, "grad_norm": 2.2921929359436035, "learning_rate": 2.5233483001924314e-06, "loss": 0.8265, "step": 42715 }, { "epoch": 0.5206390991188622, "grad_norm": 1.762447476387024, "learning_rate": 2.5230275817831944e-06, "loss": 0.8434, "step": 42720 }, { "epoch": 0.52070003534301, "grad_norm": 2.299393892288208, "learning_rate": 2.5227068633739574e-06, "loss": 0.9155, "step": 42725 }, { "epoch": 0.5207609715671578, "grad_norm": 2.0745670795440674, "learning_rate": 2.5223861449647213e-06, "loss": 0.8467, "step": 42730 }, { "epoch": 0.5208219077913057, "grad_norm": 2.0093562602996826, "learning_rate": 2.5220654265554843e-06, "loss": 0.8636, "step": 42735 }, { "epoch": 0.5208828440154534, "grad_norm": 1.8517253398895264, "learning_rate": 2.5217447081462478e-06, "loss": 0.9003, "step": 42740 }, { "epoch": 0.5209437802396012, "grad_norm": 2.239793300628662, "learning_rate": 2.521423989737011e-06, "loss": 0.8857, "step": 42745 }, { "epoch": 0.521004716463749, "grad_norm": 2.043384552001953, "learning_rate": 2.5211032713277746e-06, "loss": 0.7655, "step": 42750 }, { "epoch": 0.5210656526878968, "grad_norm": 1.7287689447402954, "learning_rate": 2.5207825529185377e-06, "loss": 0.8352, "step": 42755 }, { "epoch": 0.5211265889120447, "grad_norm": 2.2784218788146973, "learning_rate": 2.520461834509301e-06, "loss": 0.8448, "step": 42760 }, { "epoch": 0.5211875251361925, "grad_norm": 1.9329283237457275, "learning_rate": 2.5201411161000645e-06, "loss": 0.8967, "step": 42765 }, { "epoch": 0.5212484613603403, "grad_norm": 1.8349660634994507, "learning_rate": 2.5198203976908276e-06, "loss": 0.7717, "step": 42770 }, { "epoch": 0.521309397584488, "grad_norm": 1.7257438898086548, "learning_rate": 2.5194996792815914e-06, "loss": 0.8202, "step": 42775 }, { "epoch": 0.5213703338086358, "grad_norm": 2.3624837398529053, "learning_rate": 2.5191789608723544e-06, "loss": 0.7638, "step": 42780 }, { "epoch": 0.5214312700327837, "grad_norm": 2.0928685665130615, "learning_rate": 2.5188582424631175e-06, "loss": 0.8506, "step": 42785 }, { "epoch": 0.5214922062569315, "grad_norm": 2.1962220668792725, "learning_rate": 2.5185375240538805e-06, "loss": 0.8328, "step": 42790 }, { "epoch": 0.5215531424810793, "grad_norm": 1.9117209911346436, "learning_rate": 2.5182168056446443e-06, "loss": 0.857, "step": 42795 }, { "epoch": 0.5216140787052271, "grad_norm": 1.9076545238494873, "learning_rate": 2.5178960872354074e-06, "loss": 0.8316, "step": 42800 }, { "epoch": 0.521675014929375, "grad_norm": 1.8238334655761719, "learning_rate": 2.517575368826171e-06, "loss": 0.8701, "step": 42805 }, { "epoch": 0.5217359511535227, "grad_norm": 1.7884787321090698, "learning_rate": 2.5172546504169342e-06, "loss": 0.8584, "step": 42810 }, { "epoch": 0.5217968873776705, "grad_norm": 1.7054778337478638, "learning_rate": 2.5169339320076972e-06, "loss": 0.8731, "step": 42815 }, { "epoch": 0.5218578236018183, "grad_norm": 1.8649613857269287, "learning_rate": 2.5166132135984607e-06, "loss": 0.8487, "step": 42820 }, { "epoch": 0.5219187598259661, "grad_norm": 1.9271153211593628, "learning_rate": 2.516292495189224e-06, "loss": 0.8514, "step": 42825 }, { "epoch": 0.521979696050114, "grad_norm": 1.8308824300765991, "learning_rate": 2.5159717767799876e-06, "loss": 0.8848, "step": 42830 }, { "epoch": 0.5220406322742618, "grad_norm": 2.09481143951416, "learning_rate": 2.5156510583707506e-06, "loss": 0.8882, "step": 42835 }, { "epoch": 0.5221015684984096, "grad_norm": 1.9720332622528076, "learning_rate": 2.515330339961514e-06, "loss": 0.8479, "step": 42840 }, { "epoch": 0.5221625047225573, "grad_norm": 2.0826375484466553, "learning_rate": 2.5150096215522775e-06, "loss": 0.7842, "step": 42845 }, { "epoch": 0.5222234409467051, "grad_norm": 2.0550103187561035, "learning_rate": 2.5146889031430405e-06, "loss": 0.8752, "step": 42850 }, { "epoch": 0.522284377170853, "grad_norm": 1.8465282917022705, "learning_rate": 2.5143681847338043e-06, "loss": 0.8391, "step": 42855 }, { "epoch": 0.5223453133950008, "grad_norm": 2.088221549987793, "learning_rate": 2.5140474663245674e-06, "loss": 0.7816, "step": 42860 }, { "epoch": 0.5224062496191486, "grad_norm": 1.900696873664856, "learning_rate": 2.5137267479153304e-06, "loss": 0.7205, "step": 42865 }, { "epoch": 0.5224671858432964, "grad_norm": 2.2434260845184326, "learning_rate": 2.5134060295060934e-06, "loss": 0.8456, "step": 42870 }, { "epoch": 0.5225281220674443, "grad_norm": 2.5181591510772705, "learning_rate": 2.5130853110968573e-06, "loss": 0.7865, "step": 42875 }, { "epoch": 0.522589058291592, "grad_norm": 2.2302379608154297, "learning_rate": 2.5127645926876203e-06, "loss": 0.9308, "step": 42880 }, { "epoch": 0.5226499945157398, "grad_norm": 1.7588976621627808, "learning_rate": 2.5124438742783837e-06, "loss": 0.9279, "step": 42885 }, { "epoch": 0.5227109307398876, "grad_norm": 1.8643229007720947, "learning_rate": 2.512123155869147e-06, "loss": 0.8393, "step": 42890 }, { "epoch": 0.5227718669640354, "grad_norm": 2.3569977283477783, "learning_rate": 2.51180243745991e-06, "loss": 0.8544, "step": 42895 }, { "epoch": 0.5228328031881833, "grad_norm": 2.4025919437408447, "learning_rate": 2.5114817190506736e-06, "loss": 0.8608, "step": 42900 }, { "epoch": 0.5228937394123311, "grad_norm": 1.7723814249038696, "learning_rate": 2.511161000641437e-06, "loss": 0.9015, "step": 42905 }, { "epoch": 0.5229546756364789, "grad_norm": 2.0121922492980957, "learning_rate": 2.5108402822322005e-06, "loss": 0.8051, "step": 42910 }, { "epoch": 0.5230156118606266, "grad_norm": 1.7031652927398682, "learning_rate": 2.5105195638229635e-06, "loss": 0.7859, "step": 42915 }, { "epoch": 0.5230765480847744, "grad_norm": 1.8307889699935913, "learning_rate": 2.510198845413727e-06, "loss": 0.8041, "step": 42920 }, { "epoch": 0.5231374843089223, "grad_norm": 1.8695112466812134, "learning_rate": 2.5098781270044904e-06, "loss": 0.786, "step": 42925 }, { "epoch": 0.5231984205330701, "grad_norm": 1.8797972202301025, "learning_rate": 2.5095574085952534e-06, "loss": 0.8194, "step": 42930 }, { "epoch": 0.5232593567572179, "grad_norm": 2.332287549972534, "learning_rate": 2.5092366901860173e-06, "loss": 0.8137, "step": 42935 }, { "epoch": 0.5233202929813657, "grad_norm": 2.0564539432525635, "learning_rate": 2.5089159717767803e-06, "loss": 0.8528, "step": 42940 }, { "epoch": 0.5233812292055136, "grad_norm": 1.9216595888137817, "learning_rate": 2.5085952533675433e-06, "loss": 0.8731, "step": 42945 }, { "epoch": 0.5234421654296613, "grad_norm": 2.1583950519561768, "learning_rate": 2.508274534958307e-06, "loss": 0.8415, "step": 42950 }, { "epoch": 0.5235031016538091, "grad_norm": 2.224339485168457, "learning_rate": 2.50795381654907e-06, "loss": 0.8782, "step": 42955 }, { "epoch": 0.5235640378779569, "grad_norm": 2.121955633163452, "learning_rate": 2.507633098139833e-06, "loss": 0.8728, "step": 42960 }, { "epoch": 0.5236249741021047, "grad_norm": 2.0343844890594482, "learning_rate": 2.5073123797305966e-06, "loss": 0.8677, "step": 42965 }, { "epoch": 0.5236859103262526, "grad_norm": 1.8853203058242798, "learning_rate": 2.50699166132136e-06, "loss": 0.807, "step": 42970 }, { "epoch": 0.5237468465504004, "grad_norm": 2.3565359115600586, "learning_rate": 2.5066709429121235e-06, "loss": 0.8453, "step": 42975 }, { "epoch": 0.5238077827745482, "grad_norm": 1.852919578552246, "learning_rate": 2.5063502245028865e-06, "loss": 0.8833, "step": 42980 }, { "epoch": 0.5238687189986959, "grad_norm": 2.1381657123565674, "learning_rate": 2.50602950609365e-06, "loss": 0.7773, "step": 42985 }, { "epoch": 0.5239296552228437, "grad_norm": 1.9683789014816284, "learning_rate": 2.5057087876844134e-06, "loss": 0.8037, "step": 42990 }, { "epoch": 0.5239905914469916, "grad_norm": 2.1459784507751465, "learning_rate": 2.5053880692751764e-06, "loss": 0.8647, "step": 42995 }, { "epoch": 0.5240515276711394, "grad_norm": 1.87225341796875, "learning_rate": 2.5050673508659403e-06, "loss": 0.8497, "step": 43000 }, { "epoch": 0.5241124638952872, "grad_norm": 2.309351682662964, "learning_rate": 2.5047466324567033e-06, "loss": 0.7848, "step": 43005 }, { "epoch": 0.524173400119435, "grad_norm": 2.2695696353912354, "learning_rate": 2.5044259140474663e-06, "loss": 0.8185, "step": 43010 }, { "epoch": 0.5242343363435829, "grad_norm": 2.0563371181488037, "learning_rate": 2.50410519563823e-06, "loss": 0.8537, "step": 43015 }, { "epoch": 0.5242952725677306, "grad_norm": 1.830615520477295, "learning_rate": 2.5037844772289932e-06, "loss": 0.83, "step": 43020 }, { "epoch": 0.5243562087918784, "grad_norm": 1.703382968902588, "learning_rate": 2.5034637588197562e-06, "loss": 0.8117, "step": 43025 }, { "epoch": 0.5244171450160262, "grad_norm": 2.2666373252868652, "learning_rate": 2.50314304041052e-06, "loss": 0.8052, "step": 43030 }, { "epoch": 0.524478081240174, "grad_norm": 1.965490698814392, "learning_rate": 2.502822322001283e-06, "loss": 0.9188, "step": 43035 }, { "epoch": 0.5245390174643219, "grad_norm": 2.0725557804107666, "learning_rate": 2.502501603592046e-06, "loss": 0.8753, "step": 43040 }, { "epoch": 0.5245999536884697, "grad_norm": 1.9152559041976929, "learning_rate": 2.5021808851828096e-06, "loss": 0.7971, "step": 43045 }, { "epoch": 0.5246608899126175, "grad_norm": 2.1007885932922363, "learning_rate": 2.501860166773573e-06, "loss": 0.8904, "step": 43050 }, { "epoch": 0.5247218261367652, "grad_norm": 2.102912664413452, "learning_rate": 2.5015394483643365e-06, "loss": 0.8612, "step": 43055 }, { "epoch": 0.524782762360913, "grad_norm": 1.845954179763794, "learning_rate": 2.5012187299550995e-06, "loss": 0.8052, "step": 43060 }, { "epoch": 0.5248436985850609, "grad_norm": 1.6695172786712646, "learning_rate": 2.500898011545863e-06, "loss": 0.8745, "step": 43065 }, { "epoch": 0.5249046348092087, "grad_norm": 1.7213764190673828, "learning_rate": 2.5005772931366264e-06, "loss": 0.8959, "step": 43070 }, { "epoch": 0.5249655710333565, "grad_norm": 1.8267680406570435, "learning_rate": 2.5002565747273894e-06, "loss": 0.8926, "step": 43075 }, { "epoch": 0.5250265072575043, "grad_norm": 1.7018786668777466, "learning_rate": 2.499935856318153e-06, "loss": 0.7694, "step": 43080 }, { "epoch": 0.5250874434816521, "grad_norm": 1.8640832901000977, "learning_rate": 2.4996151379089162e-06, "loss": 0.8729, "step": 43085 }, { "epoch": 0.5251483797057999, "grad_norm": 1.8637518882751465, "learning_rate": 2.4992944194996797e-06, "loss": 0.8922, "step": 43090 }, { "epoch": 0.5252093159299477, "grad_norm": 2.001339912414551, "learning_rate": 2.4989737010904427e-06, "loss": 0.8159, "step": 43095 }, { "epoch": 0.5252702521540955, "grad_norm": 1.948048710823059, "learning_rate": 2.498652982681206e-06, "loss": 0.8066, "step": 43100 }, { "epoch": 0.5253311883782433, "grad_norm": 2.018963575363159, "learning_rate": 2.498332264271969e-06, "loss": 0.8415, "step": 43105 }, { "epoch": 0.5253921246023912, "grad_norm": 2.5448997020721436, "learning_rate": 2.4980115458627326e-06, "loss": 0.8211, "step": 43110 }, { "epoch": 0.525453060826539, "grad_norm": 2.134680986404419, "learning_rate": 2.497690827453496e-06, "loss": 0.871, "step": 43115 }, { "epoch": 0.5255139970506868, "grad_norm": 1.8636236190795898, "learning_rate": 2.497370109044259e-06, "loss": 0.839, "step": 43120 }, { "epoch": 0.5255749332748345, "grad_norm": 1.9290622472763062, "learning_rate": 2.4970493906350225e-06, "loss": 0.8479, "step": 43125 }, { "epoch": 0.5256358694989823, "grad_norm": 1.7589759826660156, "learning_rate": 2.496728672225786e-06, "loss": 0.8183, "step": 43130 }, { "epoch": 0.5256968057231302, "grad_norm": 4.095205783843994, "learning_rate": 2.4964079538165494e-06, "loss": 0.8018, "step": 43135 }, { "epoch": 0.525757741947278, "grad_norm": 2.2186992168426514, "learning_rate": 2.496087235407313e-06, "loss": 0.8457, "step": 43140 }, { "epoch": 0.5258186781714258, "grad_norm": 2.2206437587738037, "learning_rate": 2.495766516998076e-06, "loss": 0.7907, "step": 43145 }, { "epoch": 0.5258796143955736, "grad_norm": 2.0695364475250244, "learning_rate": 2.4954457985888393e-06, "loss": 0.8464, "step": 43150 }, { "epoch": 0.5259405506197214, "grad_norm": 2.0845823287963867, "learning_rate": 2.4951250801796027e-06, "loss": 0.8527, "step": 43155 }, { "epoch": 0.5260014868438692, "grad_norm": 1.7761297225952148, "learning_rate": 2.4948043617703657e-06, "loss": 0.8117, "step": 43160 }, { "epoch": 0.526062423068017, "grad_norm": 1.6585177183151245, "learning_rate": 2.494483643361129e-06, "loss": 0.7719, "step": 43165 }, { "epoch": 0.5261233592921648, "grad_norm": 2.067368507385254, "learning_rate": 2.4941629249518926e-06, "loss": 0.8591, "step": 43170 }, { "epoch": 0.5261842955163126, "grad_norm": 2.263627767562866, "learning_rate": 2.4938422065426556e-06, "loss": 0.8813, "step": 43175 }, { "epoch": 0.5262452317404605, "grad_norm": 1.845844030380249, "learning_rate": 2.493521488133419e-06, "loss": 0.9188, "step": 43180 }, { "epoch": 0.5263061679646083, "grad_norm": 2.107602119445801, "learning_rate": 2.493200769724182e-06, "loss": 0.9307, "step": 43185 }, { "epoch": 0.5263671041887561, "grad_norm": 2.0551624298095703, "learning_rate": 2.4928800513149455e-06, "loss": 0.847, "step": 43190 }, { "epoch": 0.5264280404129038, "grad_norm": 1.8526307344436646, "learning_rate": 2.492559332905709e-06, "loss": 0.9009, "step": 43195 }, { "epoch": 0.5264889766370516, "grad_norm": 1.983646035194397, "learning_rate": 2.492238614496472e-06, "loss": 0.7941, "step": 43200 }, { "epoch": 0.5265499128611995, "grad_norm": 1.9523425102233887, "learning_rate": 2.4919178960872354e-06, "loss": 0.8801, "step": 43205 }, { "epoch": 0.5266108490853473, "grad_norm": 1.9209543466567993, "learning_rate": 2.491597177677999e-06, "loss": 0.9143, "step": 43210 }, { "epoch": 0.5266717853094951, "grad_norm": 1.9183136224746704, "learning_rate": 2.4912764592687623e-06, "loss": 0.7979, "step": 43215 }, { "epoch": 0.5267327215336429, "grad_norm": 1.9574010372161865, "learning_rate": 2.4909557408595257e-06, "loss": 0.8444, "step": 43220 }, { "epoch": 0.5267936577577907, "grad_norm": 1.7959496974945068, "learning_rate": 2.490635022450289e-06, "loss": 0.7817, "step": 43225 }, { "epoch": 0.5268545939819385, "grad_norm": 1.8159289360046387, "learning_rate": 2.490314304041052e-06, "loss": 0.8918, "step": 43230 }, { "epoch": 0.5269155302060863, "grad_norm": 1.8578083515167236, "learning_rate": 2.4899935856318156e-06, "loss": 0.8619, "step": 43235 }, { "epoch": 0.5269764664302341, "grad_norm": 2.2485406398773193, "learning_rate": 2.4896728672225787e-06, "loss": 0.8327, "step": 43240 }, { "epoch": 0.5270374026543819, "grad_norm": 1.727422833442688, "learning_rate": 2.489352148813342e-06, "loss": 0.8079, "step": 43245 }, { "epoch": 0.5270983388785297, "grad_norm": 1.7315797805786133, "learning_rate": 2.4890314304041055e-06, "loss": 0.8429, "step": 43250 }, { "epoch": 0.5271592751026776, "grad_norm": 1.98785400390625, "learning_rate": 2.4887107119948686e-06, "loss": 0.8616, "step": 43255 }, { "epoch": 0.5272202113268254, "grad_norm": 2.0877902507781982, "learning_rate": 2.488389993585632e-06, "loss": 0.8413, "step": 43260 }, { "epoch": 0.5272811475509731, "grad_norm": 1.5518258810043335, "learning_rate": 2.4880692751763954e-06, "loss": 0.8303, "step": 43265 }, { "epoch": 0.5273420837751209, "grad_norm": 2.0376596450805664, "learning_rate": 2.4877485567671585e-06, "loss": 0.8359, "step": 43270 }, { "epoch": 0.5274030199992688, "grad_norm": 1.9894671440124512, "learning_rate": 2.487427838357922e-06, "loss": 0.8186, "step": 43275 }, { "epoch": 0.5274639562234166, "grad_norm": 2.4611024856567383, "learning_rate": 2.4871071199486853e-06, "loss": 0.8826, "step": 43280 }, { "epoch": 0.5275248924475644, "grad_norm": 1.8086000680923462, "learning_rate": 2.4867864015394484e-06, "loss": 0.8561, "step": 43285 }, { "epoch": 0.5275858286717122, "grad_norm": 2.203493356704712, "learning_rate": 2.486465683130212e-06, "loss": 0.8179, "step": 43290 }, { "epoch": 0.52764676489586, "grad_norm": 1.8880070447921753, "learning_rate": 2.4861449647209752e-06, "loss": 0.8478, "step": 43295 }, { "epoch": 0.5277077011200078, "grad_norm": 2.0618860721588135, "learning_rate": 2.4858242463117387e-06, "loss": 0.7819, "step": 43300 }, { "epoch": 0.5277686373441556, "grad_norm": 1.846052646636963, "learning_rate": 2.485503527902502e-06, "loss": 0.8286, "step": 43305 }, { "epoch": 0.5278295735683034, "grad_norm": 1.7094882726669312, "learning_rate": 2.485182809493265e-06, "loss": 0.8437, "step": 43310 }, { "epoch": 0.5278905097924512, "grad_norm": 1.939490795135498, "learning_rate": 2.4848620910840286e-06, "loss": 0.8979, "step": 43315 }, { "epoch": 0.527951446016599, "grad_norm": 1.9852705001831055, "learning_rate": 2.4845413726747916e-06, "loss": 0.9091, "step": 43320 }, { "epoch": 0.5280123822407469, "grad_norm": 2.0657076835632324, "learning_rate": 2.484220654265555e-06, "loss": 0.787, "step": 43325 }, { "epoch": 0.5280733184648947, "grad_norm": 2.02864408493042, "learning_rate": 2.4838999358563185e-06, "loss": 0.8401, "step": 43330 }, { "epoch": 0.5281342546890424, "grad_norm": 1.7284518480300903, "learning_rate": 2.4835792174470815e-06, "loss": 0.8293, "step": 43335 }, { "epoch": 0.5281951909131902, "grad_norm": 1.677879810333252, "learning_rate": 2.483258499037845e-06, "loss": 0.7969, "step": 43340 }, { "epoch": 0.528256127137338, "grad_norm": 2.0434374809265137, "learning_rate": 2.4829377806286084e-06, "loss": 0.8342, "step": 43345 }, { "epoch": 0.5283170633614859, "grad_norm": 3.2033538818359375, "learning_rate": 2.4826170622193714e-06, "loss": 0.8401, "step": 43350 }, { "epoch": 0.5283779995856337, "grad_norm": 2.128067970275879, "learning_rate": 2.482296343810135e-06, "loss": 0.7621, "step": 43355 }, { "epoch": 0.5284389358097815, "grad_norm": 1.6224775314331055, "learning_rate": 2.4819756254008983e-06, "loss": 0.8739, "step": 43360 }, { "epoch": 0.5284998720339293, "grad_norm": 1.9496022462844849, "learning_rate": 2.4816549069916617e-06, "loss": 0.8511, "step": 43365 }, { "epoch": 0.528560808258077, "grad_norm": 2.030142307281494, "learning_rate": 2.4813341885824247e-06, "loss": 0.836, "step": 43370 }, { "epoch": 0.5286217444822249, "grad_norm": 1.7989225387573242, "learning_rate": 2.481013470173188e-06, "loss": 0.8046, "step": 43375 }, { "epoch": 0.5286826807063727, "grad_norm": 1.8812631368637085, "learning_rate": 2.4806927517639516e-06, "loss": 0.8302, "step": 43380 }, { "epoch": 0.5287436169305205, "grad_norm": 1.9285931587219238, "learning_rate": 2.480372033354715e-06, "loss": 0.8421, "step": 43385 }, { "epoch": 0.5288045531546683, "grad_norm": 1.9599820375442505, "learning_rate": 2.480051314945478e-06, "loss": 0.8881, "step": 43390 }, { "epoch": 0.5288654893788162, "grad_norm": 1.9657927751541138, "learning_rate": 2.4797305965362415e-06, "loss": 0.7817, "step": 43395 }, { "epoch": 0.528926425602964, "grad_norm": 2.2082672119140625, "learning_rate": 2.4794098781270045e-06, "loss": 0.806, "step": 43400 }, { "epoch": 0.5289873618271117, "grad_norm": 1.7804673910140991, "learning_rate": 2.479089159717768e-06, "loss": 0.8849, "step": 43405 }, { "epoch": 0.5290482980512595, "grad_norm": 1.6815165281295776, "learning_rate": 2.4787684413085314e-06, "loss": 0.849, "step": 43410 }, { "epoch": 0.5291092342754073, "grad_norm": 1.9267586469650269, "learning_rate": 2.4784477228992944e-06, "loss": 0.8507, "step": 43415 }, { "epoch": 0.5291701704995552, "grad_norm": 1.8525434732437134, "learning_rate": 2.478127004490058e-06, "loss": 0.7866, "step": 43420 }, { "epoch": 0.529231106723703, "grad_norm": 2.3275671005249023, "learning_rate": 2.4778062860808213e-06, "loss": 0.8395, "step": 43425 }, { "epoch": 0.5292920429478508, "grad_norm": 2.156583547592163, "learning_rate": 2.4774855676715843e-06, "loss": 0.8993, "step": 43430 }, { "epoch": 0.5293529791719986, "grad_norm": 2.2406723499298096, "learning_rate": 2.4771648492623478e-06, "loss": 0.815, "step": 43435 }, { "epoch": 0.5294139153961464, "grad_norm": 1.8878977298736572, "learning_rate": 2.476844130853111e-06, "loss": 0.877, "step": 43440 }, { "epoch": 0.5294748516202942, "grad_norm": 1.8550368547439575, "learning_rate": 2.4765234124438746e-06, "loss": 0.8115, "step": 43445 }, { "epoch": 0.529535787844442, "grad_norm": 1.9347407817840576, "learning_rate": 2.476202694034638e-06, "loss": 0.8397, "step": 43450 }, { "epoch": 0.5295967240685898, "grad_norm": 1.99250066280365, "learning_rate": 2.475881975625401e-06, "loss": 0.8592, "step": 43455 }, { "epoch": 0.5296576602927376, "grad_norm": 2.291672468185425, "learning_rate": 2.4755612572161645e-06, "loss": 0.9117, "step": 43460 }, { "epoch": 0.5297185965168855, "grad_norm": 2.009767770767212, "learning_rate": 2.475240538806928e-06, "loss": 0.8123, "step": 43465 }, { "epoch": 0.5297795327410333, "grad_norm": 1.707848310470581, "learning_rate": 2.474919820397691e-06, "loss": 0.8499, "step": 43470 }, { "epoch": 0.529840468965181, "grad_norm": 1.7877455949783325, "learning_rate": 2.4745991019884544e-06, "loss": 0.8326, "step": 43475 }, { "epoch": 0.5299014051893288, "grad_norm": 1.9899921417236328, "learning_rate": 2.4742783835792174e-06, "loss": 0.8609, "step": 43480 }, { "epoch": 0.5299623414134766, "grad_norm": 2.1551501750946045, "learning_rate": 2.473957665169981e-06, "loss": 0.8868, "step": 43485 }, { "epoch": 0.5300232776376245, "grad_norm": 1.9199199676513672, "learning_rate": 2.4736369467607443e-06, "loss": 0.8891, "step": 43490 }, { "epoch": 0.5300842138617723, "grad_norm": 1.8057522773742676, "learning_rate": 2.4733162283515073e-06, "loss": 0.7959, "step": 43495 }, { "epoch": 0.5301451500859201, "grad_norm": 1.8351272344589233, "learning_rate": 2.4729955099422708e-06, "loss": 0.7882, "step": 43500 }, { "epoch": 0.5302060863100679, "grad_norm": 2.015637159347534, "learning_rate": 2.4726747915330342e-06, "loss": 0.8664, "step": 43505 }, { "epoch": 0.5302670225342156, "grad_norm": 2.0637753009796143, "learning_rate": 2.4723540731237972e-06, "loss": 0.835, "step": 43510 }, { "epoch": 0.5303279587583635, "grad_norm": 1.8959884643554688, "learning_rate": 2.4720333547145607e-06, "loss": 0.8131, "step": 43515 }, { "epoch": 0.5303888949825113, "grad_norm": 1.9921189546585083, "learning_rate": 2.471712636305324e-06, "loss": 0.7744, "step": 43520 }, { "epoch": 0.5304498312066591, "grad_norm": 2.445140838623047, "learning_rate": 2.4713919178960876e-06, "loss": 0.9122, "step": 43525 }, { "epoch": 0.5305107674308069, "grad_norm": 2.0090999603271484, "learning_rate": 2.471071199486851e-06, "loss": 0.8236, "step": 43530 }, { "epoch": 0.5305717036549548, "grad_norm": 2.175692558288574, "learning_rate": 2.470750481077614e-06, "loss": 0.7444, "step": 43535 }, { "epoch": 0.5306326398791026, "grad_norm": 1.4737640619277954, "learning_rate": 2.4704297626683775e-06, "loss": 0.8085, "step": 43540 }, { "epoch": 0.5306935761032503, "grad_norm": 1.8372031450271606, "learning_rate": 2.470109044259141e-06, "loss": 0.8186, "step": 43545 }, { "epoch": 0.5307545123273981, "grad_norm": 1.8460332155227661, "learning_rate": 2.469788325849904e-06, "loss": 0.8717, "step": 43550 }, { "epoch": 0.5308154485515459, "grad_norm": 2.115647077560425, "learning_rate": 2.4694676074406674e-06, "loss": 0.7595, "step": 43555 }, { "epoch": 0.5308763847756938, "grad_norm": 2.4617676734924316, "learning_rate": 2.469146889031431e-06, "loss": 0.829, "step": 43560 }, { "epoch": 0.5309373209998416, "grad_norm": 2.1386942863464355, "learning_rate": 2.468826170622194e-06, "loss": 0.8458, "step": 43565 }, { "epoch": 0.5309982572239894, "grad_norm": 1.8594342470169067, "learning_rate": 2.4685054522129573e-06, "loss": 0.7983, "step": 43570 }, { "epoch": 0.5310591934481372, "grad_norm": 2.0913443565368652, "learning_rate": 2.4681847338037203e-06, "loss": 0.8469, "step": 43575 }, { "epoch": 0.531120129672285, "grad_norm": 2.1138505935668945, "learning_rate": 2.4678640153944837e-06, "loss": 0.8646, "step": 43580 }, { "epoch": 0.5311810658964328, "grad_norm": 1.8265823125839233, "learning_rate": 2.467543296985247e-06, "loss": 0.8121, "step": 43585 }, { "epoch": 0.5312420021205806, "grad_norm": 1.9157918691635132, "learning_rate": 2.4672225785760106e-06, "loss": 0.7666, "step": 43590 }, { "epoch": 0.5313029383447284, "grad_norm": 1.7228361368179321, "learning_rate": 2.4669018601667736e-06, "loss": 0.8417, "step": 43595 }, { "epoch": 0.5313638745688762, "grad_norm": 2.3527026176452637, "learning_rate": 2.466581141757537e-06, "loss": 0.8791, "step": 43600 }, { "epoch": 0.5314248107930241, "grad_norm": 2.1052324771881104, "learning_rate": 2.4662604233483005e-06, "loss": 0.8452, "step": 43605 }, { "epoch": 0.5314857470171719, "grad_norm": 1.9000173807144165, "learning_rate": 2.465939704939064e-06, "loss": 0.7719, "step": 43610 }, { "epoch": 0.5315466832413196, "grad_norm": 1.9698766469955444, "learning_rate": 2.465618986529827e-06, "loss": 0.816, "step": 43615 }, { "epoch": 0.5316076194654674, "grad_norm": 1.760246992111206, "learning_rate": 2.4652982681205904e-06, "loss": 0.8913, "step": 43620 }, { "epoch": 0.5316685556896152, "grad_norm": 1.7462528944015503, "learning_rate": 2.464977549711354e-06, "loss": 0.8306, "step": 43625 }, { "epoch": 0.5317294919137631, "grad_norm": 2.350595235824585, "learning_rate": 2.464656831302117e-06, "loss": 0.8131, "step": 43630 }, { "epoch": 0.5317904281379109, "grad_norm": 1.8480240106582642, "learning_rate": 2.4643361128928803e-06, "loss": 0.8565, "step": 43635 }, { "epoch": 0.5318513643620587, "grad_norm": 2.1500914096832275, "learning_rate": 2.4640153944836437e-06, "loss": 0.7761, "step": 43640 }, { "epoch": 0.5319123005862064, "grad_norm": 1.845849871635437, "learning_rate": 2.4636946760744067e-06, "loss": 0.7434, "step": 43645 }, { "epoch": 0.5319732368103542, "grad_norm": 1.9182603359222412, "learning_rate": 2.46337395766517e-06, "loss": 0.759, "step": 43650 }, { "epoch": 0.5320341730345021, "grad_norm": 1.7817567586898804, "learning_rate": 2.463053239255933e-06, "loss": 0.8399, "step": 43655 }, { "epoch": 0.5320951092586499, "grad_norm": 1.9013508558273315, "learning_rate": 2.4627325208466966e-06, "loss": 0.8407, "step": 43660 }, { "epoch": 0.5321560454827977, "grad_norm": 1.754563570022583, "learning_rate": 2.46241180243746e-06, "loss": 0.7832, "step": 43665 }, { "epoch": 0.5322169817069455, "grad_norm": 2.1367673873901367, "learning_rate": 2.4620910840282235e-06, "loss": 0.8542, "step": 43670 }, { "epoch": 0.5322779179310934, "grad_norm": 2.57352352142334, "learning_rate": 2.461770365618987e-06, "loss": 0.8113, "step": 43675 }, { "epoch": 0.5323388541552411, "grad_norm": 2.4924633502960205, "learning_rate": 2.46144964720975e-06, "loss": 0.891, "step": 43680 }, { "epoch": 0.5323997903793889, "grad_norm": 2.1632745265960693, "learning_rate": 2.4611289288005134e-06, "loss": 0.8284, "step": 43685 }, { "epoch": 0.5324607266035367, "grad_norm": 1.9933440685272217, "learning_rate": 2.460808210391277e-06, "loss": 0.8725, "step": 43690 }, { "epoch": 0.5325216628276845, "grad_norm": 2.3133749961853027, "learning_rate": 2.46048749198204e-06, "loss": 0.825, "step": 43695 }, { "epoch": 0.5325825990518324, "grad_norm": 1.8588857650756836, "learning_rate": 2.4601667735728033e-06, "loss": 0.8929, "step": 43700 }, { "epoch": 0.5326435352759802, "grad_norm": 2.2064995765686035, "learning_rate": 2.4598460551635668e-06, "loss": 0.7709, "step": 43705 }, { "epoch": 0.532704471500128, "grad_norm": 2.0037434101104736, "learning_rate": 2.4595253367543298e-06, "loss": 0.8819, "step": 43710 }, { "epoch": 0.5327654077242757, "grad_norm": 1.8697589635849, "learning_rate": 2.459204618345093e-06, "loss": 0.8076, "step": 43715 }, { "epoch": 0.5328263439484235, "grad_norm": 2.0688235759735107, "learning_rate": 2.4588838999358566e-06, "loss": 0.8011, "step": 43720 }, { "epoch": 0.5328872801725714, "grad_norm": 2.182015895843506, "learning_rate": 2.4585631815266197e-06, "loss": 0.8729, "step": 43725 }, { "epoch": 0.5329482163967192, "grad_norm": 1.7869774103164673, "learning_rate": 2.458242463117383e-06, "loss": 0.7532, "step": 43730 }, { "epoch": 0.533009152620867, "grad_norm": 2.2568955421447754, "learning_rate": 2.457921744708146e-06, "loss": 0.8716, "step": 43735 }, { "epoch": 0.5330700888450148, "grad_norm": 1.653357982635498, "learning_rate": 2.4576010262989096e-06, "loss": 0.7696, "step": 43740 }, { "epoch": 0.5331310250691627, "grad_norm": 1.8305492401123047, "learning_rate": 2.457280307889673e-06, "loss": 0.7819, "step": 43745 }, { "epoch": 0.5331919612933104, "grad_norm": 1.8640388250350952, "learning_rate": 2.4569595894804364e-06, "loss": 0.7833, "step": 43750 }, { "epoch": 0.5332528975174582, "grad_norm": 1.810136318206787, "learning_rate": 2.4566388710712e-06, "loss": 0.8665, "step": 43755 }, { "epoch": 0.533313833741606, "grad_norm": 1.6260522603988647, "learning_rate": 2.456318152661963e-06, "loss": 0.8831, "step": 43760 }, { "epoch": 0.5333747699657538, "grad_norm": 1.8719415664672852, "learning_rate": 2.4559974342527263e-06, "loss": 0.8927, "step": 43765 }, { "epoch": 0.5334357061899017, "grad_norm": 2.2909722328186035, "learning_rate": 2.4556767158434898e-06, "loss": 0.8612, "step": 43770 }, { "epoch": 0.5334966424140495, "grad_norm": 2.1448817253112793, "learning_rate": 2.455355997434253e-06, "loss": 0.8691, "step": 43775 }, { "epoch": 0.5335575786381973, "grad_norm": 2.217907428741455, "learning_rate": 2.4550352790250162e-06, "loss": 0.8512, "step": 43780 }, { "epoch": 0.533618514862345, "grad_norm": 2.170797824859619, "learning_rate": 2.4547145606157797e-06, "loss": 0.8717, "step": 43785 }, { "epoch": 0.5336794510864928, "grad_norm": 2.0050721168518066, "learning_rate": 2.4543938422065427e-06, "loss": 0.8341, "step": 43790 }, { "epoch": 0.5337403873106407, "grad_norm": 1.827155351638794, "learning_rate": 2.454073123797306e-06, "loss": 0.8463, "step": 43795 }, { "epoch": 0.5338013235347885, "grad_norm": 2.2929465770721436, "learning_rate": 2.4537524053880696e-06, "loss": 0.8224, "step": 43800 }, { "epoch": 0.5338622597589363, "grad_norm": 1.8132480382919312, "learning_rate": 2.4534316869788326e-06, "loss": 0.8995, "step": 43805 }, { "epoch": 0.5339231959830841, "grad_norm": 2.003506660461426, "learning_rate": 2.453110968569596e-06, "loss": 0.9335, "step": 43810 }, { "epoch": 0.533984132207232, "grad_norm": 1.79229736328125, "learning_rate": 2.4527902501603595e-06, "loss": 0.8387, "step": 43815 }, { "epoch": 0.5340450684313797, "grad_norm": 1.9765580892562866, "learning_rate": 2.4524695317511225e-06, "loss": 0.871, "step": 43820 }, { "epoch": 0.5341060046555275, "grad_norm": 2.22051739692688, "learning_rate": 2.452148813341886e-06, "loss": 0.7936, "step": 43825 }, { "epoch": 0.5341669408796753, "grad_norm": 2.3127810955047607, "learning_rate": 2.4518280949326494e-06, "loss": 0.8757, "step": 43830 }, { "epoch": 0.5342278771038231, "grad_norm": 2.299238920211792, "learning_rate": 2.451507376523413e-06, "loss": 0.7645, "step": 43835 }, { "epoch": 0.534288813327971, "grad_norm": 2.024998188018799, "learning_rate": 2.4511866581141763e-06, "loss": 0.8861, "step": 43840 }, { "epoch": 0.5343497495521188, "grad_norm": 2.014122486114502, "learning_rate": 2.4508659397049393e-06, "loss": 0.8634, "step": 43845 }, { "epoch": 0.5344106857762666, "grad_norm": 2.186840057373047, "learning_rate": 2.4505452212957027e-06, "loss": 0.8099, "step": 43850 }, { "epoch": 0.5344716220004143, "grad_norm": 1.8635637760162354, "learning_rate": 2.450224502886466e-06, "loss": 0.8948, "step": 43855 }, { "epoch": 0.5345325582245621, "grad_norm": 1.8834446668624878, "learning_rate": 2.449903784477229e-06, "loss": 0.8804, "step": 43860 }, { "epoch": 0.53459349444871, "grad_norm": 1.7307019233703613, "learning_rate": 2.4495830660679926e-06, "loss": 0.8408, "step": 43865 }, { "epoch": 0.5346544306728578, "grad_norm": 2.1480486392974854, "learning_rate": 2.4492623476587556e-06, "loss": 0.9, "step": 43870 }, { "epoch": 0.5347153668970056, "grad_norm": 2.4955804347991943, "learning_rate": 2.448941629249519e-06, "loss": 0.8269, "step": 43875 }, { "epoch": 0.5347763031211534, "grad_norm": 1.8903638124465942, "learning_rate": 2.4486209108402825e-06, "loss": 0.8295, "step": 43880 }, { "epoch": 0.5348372393453013, "grad_norm": 1.9541332721710205, "learning_rate": 2.4483001924310455e-06, "loss": 0.8499, "step": 43885 }, { "epoch": 0.534898175569449, "grad_norm": 1.8221811056137085, "learning_rate": 2.447979474021809e-06, "loss": 0.879, "step": 43890 }, { "epoch": 0.5349591117935968, "grad_norm": 2.8038642406463623, "learning_rate": 2.4476587556125724e-06, "loss": 0.8502, "step": 43895 }, { "epoch": 0.5350200480177446, "grad_norm": 1.8474524021148682, "learning_rate": 2.4473380372033354e-06, "loss": 0.8255, "step": 43900 }, { "epoch": 0.5350809842418924, "grad_norm": 1.835228681564331, "learning_rate": 2.447017318794099e-06, "loss": 0.821, "step": 43905 }, { "epoch": 0.5351419204660403, "grad_norm": 1.8801920413970947, "learning_rate": 2.4466966003848623e-06, "loss": 0.8163, "step": 43910 }, { "epoch": 0.5352028566901881, "grad_norm": 1.705029845237732, "learning_rate": 2.4463758819756257e-06, "loss": 0.7629, "step": 43915 }, { "epoch": 0.5352637929143359, "grad_norm": 1.9861044883728027, "learning_rate": 2.446055163566389e-06, "loss": 0.851, "step": 43920 }, { "epoch": 0.5353247291384836, "grad_norm": 1.7574279308319092, "learning_rate": 2.445734445157152e-06, "loss": 0.7986, "step": 43925 }, { "epoch": 0.5353856653626314, "grad_norm": 1.7515212297439575, "learning_rate": 2.4454137267479156e-06, "loss": 0.821, "step": 43930 }, { "epoch": 0.5354466015867793, "grad_norm": 1.6900391578674316, "learning_rate": 2.445093008338679e-06, "loss": 0.8621, "step": 43935 }, { "epoch": 0.5355075378109271, "grad_norm": 2.028219223022461, "learning_rate": 2.444772289929442e-06, "loss": 0.8469, "step": 43940 }, { "epoch": 0.5355684740350749, "grad_norm": 1.7900490760803223, "learning_rate": 2.4444515715202055e-06, "loss": 0.8374, "step": 43945 }, { "epoch": 0.5356294102592227, "grad_norm": 1.8387980461120605, "learning_rate": 2.4441308531109685e-06, "loss": 0.8204, "step": 43950 }, { "epoch": 0.5356903464833705, "grad_norm": 1.8813395500183105, "learning_rate": 2.443810134701732e-06, "loss": 0.7795, "step": 43955 }, { "epoch": 0.5357512827075183, "grad_norm": 1.6815181970596313, "learning_rate": 2.4434894162924954e-06, "loss": 0.837, "step": 43960 }, { "epoch": 0.5358122189316661, "grad_norm": 2.683098793029785, "learning_rate": 2.4431686978832584e-06, "loss": 0.8268, "step": 43965 }, { "epoch": 0.5358731551558139, "grad_norm": 1.8022500276565552, "learning_rate": 2.442847979474022e-06, "loss": 0.8431, "step": 43970 }, { "epoch": 0.5359340913799617, "grad_norm": 1.880437970161438, "learning_rate": 2.4425272610647853e-06, "loss": 0.8055, "step": 43975 }, { "epoch": 0.5359950276041096, "grad_norm": 2.23618483543396, "learning_rate": 2.4422065426555488e-06, "loss": 0.8809, "step": 43980 }, { "epoch": 0.5360559638282574, "grad_norm": 2.093630313873291, "learning_rate": 2.4418858242463118e-06, "loss": 0.7573, "step": 43985 }, { "epoch": 0.5361169000524052, "grad_norm": 1.8910878896713257, "learning_rate": 2.4415651058370752e-06, "loss": 0.7787, "step": 43990 }, { "epoch": 0.5361778362765529, "grad_norm": 1.8247618675231934, "learning_rate": 2.4412443874278387e-06, "loss": 0.8424, "step": 43995 }, { "epoch": 0.5362387725007007, "grad_norm": 2.273211717605591, "learning_rate": 2.440923669018602e-06, "loss": 0.9423, "step": 44000 }, { "epoch": 0.5362997087248486, "grad_norm": 2.061049461364746, "learning_rate": 2.440602950609365e-06, "loss": 0.8095, "step": 44005 }, { "epoch": 0.5363606449489964, "grad_norm": 1.8340550661087036, "learning_rate": 2.4402822322001286e-06, "loss": 0.8549, "step": 44010 }, { "epoch": 0.5364215811731442, "grad_norm": 2.5290346145629883, "learning_rate": 2.439961513790892e-06, "loss": 0.8615, "step": 44015 }, { "epoch": 0.536482517397292, "grad_norm": 3.522040367126465, "learning_rate": 2.439640795381655e-06, "loss": 0.804, "step": 44020 }, { "epoch": 0.5365434536214398, "grad_norm": 2.103273868560791, "learning_rate": 2.4393200769724185e-06, "loss": 0.8257, "step": 44025 }, { "epoch": 0.5366043898455876, "grad_norm": 1.928465723991394, "learning_rate": 2.4389993585631815e-06, "loss": 0.8051, "step": 44030 }, { "epoch": 0.5366653260697354, "grad_norm": 1.8658117055892944, "learning_rate": 2.438678640153945e-06, "loss": 0.9224, "step": 44035 }, { "epoch": 0.5367262622938832, "grad_norm": 2.161729574203491, "learning_rate": 2.4383579217447084e-06, "loss": 0.7426, "step": 44040 }, { "epoch": 0.536787198518031, "grad_norm": 1.876228928565979, "learning_rate": 2.4380372033354714e-06, "loss": 0.8549, "step": 44045 }, { "epoch": 0.5368481347421789, "grad_norm": 1.8142037391662598, "learning_rate": 2.437716484926235e-06, "loss": 0.9373, "step": 44050 }, { "epoch": 0.5369090709663267, "grad_norm": 1.8073369264602661, "learning_rate": 2.4373957665169983e-06, "loss": 0.8291, "step": 44055 }, { "epoch": 0.5369700071904745, "grad_norm": 1.8112040758132935, "learning_rate": 2.4370750481077617e-06, "loss": 0.8273, "step": 44060 }, { "epoch": 0.5370309434146222, "grad_norm": 2.495800018310547, "learning_rate": 2.436754329698525e-06, "loss": 0.8419, "step": 44065 }, { "epoch": 0.53709187963877, "grad_norm": 2.077495574951172, "learning_rate": 2.436433611289288e-06, "loss": 0.8048, "step": 44070 }, { "epoch": 0.5371528158629179, "grad_norm": 2.4564096927642822, "learning_rate": 2.4361128928800516e-06, "loss": 0.8391, "step": 44075 }, { "epoch": 0.5372137520870657, "grad_norm": 2.039534568786621, "learning_rate": 2.435792174470815e-06, "loss": 0.87, "step": 44080 }, { "epoch": 0.5372746883112135, "grad_norm": 2.0078530311584473, "learning_rate": 2.435471456061578e-06, "loss": 0.781, "step": 44085 }, { "epoch": 0.5373356245353613, "grad_norm": 1.888085126876831, "learning_rate": 2.4351507376523415e-06, "loss": 0.8407, "step": 44090 }, { "epoch": 0.5373965607595091, "grad_norm": 1.8435132503509521, "learning_rate": 2.434830019243105e-06, "loss": 0.8737, "step": 44095 }, { "epoch": 0.5374574969836569, "grad_norm": 1.8961116075515747, "learning_rate": 2.434509300833868e-06, "loss": 0.8474, "step": 44100 }, { "epoch": 0.5375184332078047, "grad_norm": 2.0829126834869385, "learning_rate": 2.4341885824246314e-06, "loss": 0.9059, "step": 44105 }, { "epoch": 0.5375793694319525, "grad_norm": 2.2527151107788086, "learning_rate": 2.433867864015395e-06, "loss": 0.8965, "step": 44110 }, { "epoch": 0.5376403056561003, "grad_norm": 1.778289556503296, "learning_rate": 2.433547145606158e-06, "loss": 0.8507, "step": 44115 }, { "epoch": 0.5377012418802481, "grad_norm": 2.098198175430298, "learning_rate": 2.4332264271969213e-06, "loss": 0.8619, "step": 44120 }, { "epoch": 0.537762178104396, "grad_norm": 1.7569035291671753, "learning_rate": 2.4329057087876843e-06, "loss": 0.8522, "step": 44125 }, { "epoch": 0.5378231143285438, "grad_norm": 1.979600191116333, "learning_rate": 2.4325849903784477e-06, "loss": 0.8909, "step": 44130 }, { "epoch": 0.5378840505526915, "grad_norm": 1.9202204942703247, "learning_rate": 2.432264271969211e-06, "loss": 0.8607, "step": 44135 }, { "epoch": 0.5379449867768393, "grad_norm": 1.7715299129486084, "learning_rate": 2.4319435535599746e-06, "loss": 0.812, "step": 44140 }, { "epoch": 0.5380059230009872, "grad_norm": 1.8060611486434937, "learning_rate": 2.431622835150738e-06, "loss": 0.8528, "step": 44145 }, { "epoch": 0.538066859225135, "grad_norm": 1.8090165853500366, "learning_rate": 2.4313021167415015e-06, "loss": 0.8329, "step": 44150 }, { "epoch": 0.5381277954492828, "grad_norm": 1.7791550159454346, "learning_rate": 2.4309813983322645e-06, "loss": 0.8622, "step": 44155 }, { "epoch": 0.5381887316734306, "grad_norm": 2.0026190280914307, "learning_rate": 2.430660679923028e-06, "loss": 0.7879, "step": 44160 }, { "epoch": 0.5382496678975784, "grad_norm": 2.189528703689575, "learning_rate": 2.430339961513791e-06, "loss": 0.8799, "step": 44165 }, { "epoch": 0.5383106041217262, "grad_norm": 2.2830183506011963, "learning_rate": 2.4300192431045544e-06, "loss": 0.7947, "step": 44170 }, { "epoch": 0.538371540345874, "grad_norm": 1.8853435516357422, "learning_rate": 2.429698524695318e-06, "loss": 0.8528, "step": 44175 }, { "epoch": 0.5384324765700218, "grad_norm": 1.8915035724639893, "learning_rate": 2.429377806286081e-06, "loss": 0.7884, "step": 44180 }, { "epoch": 0.5384934127941696, "grad_norm": 2.2758729457855225, "learning_rate": 2.4290570878768443e-06, "loss": 0.8071, "step": 44185 }, { "epoch": 0.5385543490183174, "grad_norm": 2.2182977199554443, "learning_rate": 2.4287363694676078e-06, "loss": 0.8749, "step": 44190 }, { "epoch": 0.5386152852424653, "grad_norm": 1.716638445854187, "learning_rate": 2.4284156510583708e-06, "loss": 0.8836, "step": 44195 }, { "epoch": 0.5386762214666131, "grad_norm": 1.827582597732544, "learning_rate": 2.428094932649134e-06, "loss": 0.8217, "step": 44200 }, { "epoch": 0.5387371576907608, "grad_norm": 1.7702471017837524, "learning_rate": 2.4277742142398977e-06, "loss": 0.8078, "step": 44205 }, { "epoch": 0.5387980939149086, "grad_norm": 1.8744617700576782, "learning_rate": 2.4274534958306607e-06, "loss": 0.8563, "step": 44210 }, { "epoch": 0.5388590301390564, "grad_norm": 2.073152542114258, "learning_rate": 2.427132777421424e-06, "loss": 0.8129, "step": 44215 }, { "epoch": 0.5389199663632043, "grad_norm": 2.587864875793457, "learning_rate": 2.4268120590121875e-06, "loss": 0.8578, "step": 44220 }, { "epoch": 0.5389809025873521, "grad_norm": 1.7208753824234009, "learning_rate": 2.426491340602951e-06, "loss": 0.7371, "step": 44225 }, { "epoch": 0.5390418388114999, "grad_norm": 1.9748855829238892, "learning_rate": 2.4261706221937144e-06, "loss": 0.8417, "step": 44230 }, { "epoch": 0.5391027750356477, "grad_norm": 1.828770637512207, "learning_rate": 2.4258499037844774e-06, "loss": 0.8052, "step": 44235 }, { "epoch": 0.5391637112597955, "grad_norm": 2.338413715362549, "learning_rate": 2.425529185375241e-06, "loss": 0.9011, "step": 44240 }, { "epoch": 0.5392246474839433, "grad_norm": 2.1371355056762695, "learning_rate": 2.425208466966004e-06, "loss": 0.8337, "step": 44245 }, { "epoch": 0.5392855837080911, "grad_norm": 1.694655418395996, "learning_rate": 2.4248877485567673e-06, "loss": 0.9066, "step": 44250 }, { "epoch": 0.5393465199322389, "grad_norm": 1.7093747854232788, "learning_rate": 2.4245670301475308e-06, "loss": 0.9191, "step": 44255 }, { "epoch": 0.5394074561563867, "grad_norm": 1.854987382888794, "learning_rate": 2.424246311738294e-06, "loss": 0.8556, "step": 44260 }, { "epoch": 0.5394683923805346, "grad_norm": 1.9818512201309204, "learning_rate": 2.4239255933290572e-06, "loss": 0.9111, "step": 44265 }, { "epoch": 0.5395293286046824, "grad_norm": 1.697043538093567, "learning_rate": 2.4236048749198207e-06, "loss": 0.7769, "step": 44270 }, { "epoch": 0.5395902648288301, "grad_norm": 2.4747283458709717, "learning_rate": 2.4232841565105837e-06, "loss": 0.7809, "step": 44275 }, { "epoch": 0.5396512010529779, "grad_norm": 1.9086793661117554, "learning_rate": 2.422963438101347e-06, "loss": 0.8636, "step": 44280 }, { "epoch": 0.5397121372771257, "grad_norm": 1.8145580291748047, "learning_rate": 2.4226427196921106e-06, "loss": 0.881, "step": 44285 }, { "epoch": 0.5397730735012736, "grad_norm": 2.763530969619751, "learning_rate": 2.422322001282874e-06, "loss": 0.7986, "step": 44290 }, { "epoch": 0.5398340097254214, "grad_norm": 1.9446009397506714, "learning_rate": 2.422001282873637e-06, "loss": 0.8799, "step": 44295 }, { "epoch": 0.5398949459495692, "grad_norm": 1.7016040086746216, "learning_rate": 2.4216805644644005e-06, "loss": 0.8998, "step": 44300 }, { "epoch": 0.539955882173717, "grad_norm": 1.8141558170318604, "learning_rate": 2.421359846055164e-06, "loss": 0.7919, "step": 44305 }, { "epoch": 0.5400168183978648, "grad_norm": 1.7576645612716675, "learning_rate": 2.4210391276459274e-06, "loss": 0.7934, "step": 44310 }, { "epoch": 0.5400777546220126, "grad_norm": 1.8746525049209595, "learning_rate": 2.4207184092366904e-06, "loss": 0.8016, "step": 44315 }, { "epoch": 0.5401386908461604, "grad_norm": 2.1668808460235596, "learning_rate": 2.420397690827454e-06, "loss": 0.8204, "step": 44320 }, { "epoch": 0.5401996270703082, "grad_norm": 1.8369146585464478, "learning_rate": 2.420076972418217e-06, "loss": 0.8319, "step": 44325 }, { "epoch": 0.540260563294456, "grad_norm": 1.829468011856079, "learning_rate": 2.4197562540089803e-06, "loss": 0.8379, "step": 44330 }, { "epoch": 0.5403214995186039, "grad_norm": 1.8142048120498657, "learning_rate": 2.4194355355997437e-06, "loss": 0.7767, "step": 44335 }, { "epoch": 0.5403824357427517, "grad_norm": 1.8255654573440552, "learning_rate": 2.4191148171905067e-06, "loss": 0.8161, "step": 44340 }, { "epoch": 0.5404433719668994, "grad_norm": 1.7660764455795288, "learning_rate": 2.41879409878127e-06, "loss": 0.866, "step": 44345 }, { "epoch": 0.5405043081910472, "grad_norm": 2.15256929397583, "learning_rate": 2.4184733803720336e-06, "loss": 0.8141, "step": 44350 }, { "epoch": 0.540565244415195, "grad_norm": 1.9256806373596191, "learning_rate": 2.4181526619627966e-06, "loss": 0.8707, "step": 44355 }, { "epoch": 0.5406261806393429, "grad_norm": 2.320513963699341, "learning_rate": 2.41783194355356e-06, "loss": 0.8714, "step": 44360 }, { "epoch": 0.5406871168634907, "grad_norm": 1.7028391361236572, "learning_rate": 2.4175112251443235e-06, "loss": 0.7574, "step": 44365 }, { "epoch": 0.5407480530876385, "grad_norm": 2.420243740081787, "learning_rate": 2.417190506735087e-06, "loss": 0.8005, "step": 44370 }, { "epoch": 0.5408089893117863, "grad_norm": 3.065863847732544, "learning_rate": 2.4168697883258504e-06, "loss": 0.804, "step": 44375 }, { "epoch": 0.540869925535934, "grad_norm": 1.9081910848617554, "learning_rate": 2.4165490699166134e-06, "loss": 0.8981, "step": 44380 }, { "epoch": 0.5409308617600819, "grad_norm": 2.0268166065216064, "learning_rate": 2.416228351507377e-06, "loss": 0.8222, "step": 44385 }, { "epoch": 0.5409917979842297, "grad_norm": 2.2131192684173584, "learning_rate": 2.4159076330981403e-06, "loss": 0.8343, "step": 44390 }, { "epoch": 0.5410527342083775, "grad_norm": 2.107769250869751, "learning_rate": 2.4155869146889033e-06, "loss": 0.8973, "step": 44395 }, { "epoch": 0.5411136704325253, "grad_norm": 1.9123082160949707, "learning_rate": 2.4152661962796667e-06, "loss": 0.7565, "step": 44400 }, { "epoch": 0.5411746066566732, "grad_norm": 1.7990691661834717, "learning_rate": 2.4149454778704298e-06, "loss": 0.8299, "step": 44405 }, { "epoch": 0.541235542880821, "grad_norm": 1.9051669836044312, "learning_rate": 2.414624759461193e-06, "loss": 0.8361, "step": 44410 }, { "epoch": 0.5412964791049687, "grad_norm": 1.6955926418304443, "learning_rate": 2.4143040410519566e-06, "loss": 0.8433, "step": 44415 }, { "epoch": 0.5413574153291165, "grad_norm": 2.0610337257385254, "learning_rate": 2.4139833226427197e-06, "loss": 0.7779, "step": 44420 }, { "epoch": 0.5414183515532643, "grad_norm": 1.9347341060638428, "learning_rate": 2.413662604233483e-06, "loss": 0.8648, "step": 44425 }, { "epoch": 0.5414792877774122, "grad_norm": 1.921974539756775, "learning_rate": 2.4133418858242465e-06, "loss": 0.8219, "step": 44430 }, { "epoch": 0.54154022400156, "grad_norm": 2.387429714202881, "learning_rate": 2.4130211674150096e-06, "loss": 0.8543, "step": 44435 }, { "epoch": 0.5416011602257078, "grad_norm": 1.8788117170333862, "learning_rate": 2.412700449005773e-06, "loss": 0.8665, "step": 44440 }, { "epoch": 0.5416620964498556, "grad_norm": 2.1689343452453613, "learning_rate": 2.4123797305965364e-06, "loss": 0.8015, "step": 44445 }, { "epoch": 0.5417230326740033, "grad_norm": 1.7311732769012451, "learning_rate": 2.4120590121873e-06, "loss": 0.8404, "step": 44450 }, { "epoch": 0.5417839688981512, "grad_norm": 2.0085608959198, "learning_rate": 2.4117382937780633e-06, "loss": 0.8115, "step": 44455 }, { "epoch": 0.541844905122299, "grad_norm": 2.006730318069458, "learning_rate": 2.4114175753688263e-06, "loss": 0.8295, "step": 44460 }, { "epoch": 0.5419058413464468, "grad_norm": 1.8796741962432861, "learning_rate": 2.4110968569595898e-06, "loss": 0.8143, "step": 44465 }, { "epoch": 0.5419667775705946, "grad_norm": 1.9250906705856323, "learning_rate": 2.410776138550353e-06, "loss": 0.8121, "step": 44470 }, { "epoch": 0.5420277137947425, "grad_norm": 2.024620294570923, "learning_rate": 2.4104554201411162e-06, "loss": 0.7704, "step": 44475 }, { "epoch": 0.5420886500188903, "grad_norm": 1.9540997743606567, "learning_rate": 2.4101347017318797e-06, "loss": 0.7819, "step": 44480 }, { "epoch": 0.542149586243038, "grad_norm": 1.7563316822052002, "learning_rate": 2.409813983322643e-06, "loss": 0.799, "step": 44485 }, { "epoch": 0.5422105224671858, "grad_norm": 2.297642469406128, "learning_rate": 2.409493264913406e-06, "loss": 0.7885, "step": 44490 }, { "epoch": 0.5422714586913336, "grad_norm": 2.4709887504577637, "learning_rate": 2.4091725465041696e-06, "loss": 0.8275, "step": 44495 }, { "epoch": 0.5423323949154815, "grad_norm": 1.6648536920547485, "learning_rate": 2.4088518280949326e-06, "loss": 0.7823, "step": 44500 }, { "epoch": 0.5423933311396293, "grad_norm": 1.9618812799453735, "learning_rate": 2.408531109685696e-06, "loss": 0.7858, "step": 44505 }, { "epoch": 0.5424542673637771, "grad_norm": 2.1348557472229004, "learning_rate": 2.4082103912764595e-06, "loss": 0.9078, "step": 44510 }, { "epoch": 0.5425152035879249, "grad_norm": 2.0620481967926025, "learning_rate": 2.407889672867223e-06, "loss": 0.8621, "step": 44515 }, { "epoch": 0.5425761398120726, "grad_norm": 1.4959650039672852, "learning_rate": 2.407568954457986e-06, "loss": 0.8371, "step": 44520 }, { "epoch": 0.5426370760362205, "grad_norm": 1.8058457374572754, "learning_rate": 2.4072482360487494e-06, "loss": 0.7555, "step": 44525 }, { "epoch": 0.5426980122603683, "grad_norm": 2.1532912254333496, "learning_rate": 2.406927517639513e-06, "loss": 0.8527, "step": 44530 }, { "epoch": 0.5427589484845161, "grad_norm": 1.7734158039093018, "learning_rate": 2.4066067992302762e-06, "loss": 0.8699, "step": 44535 }, { "epoch": 0.5428198847086639, "grad_norm": 1.9235615730285645, "learning_rate": 2.4062860808210393e-06, "loss": 0.7953, "step": 44540 }, { "epoch": 0.5428808209328118, "grad_norm": 2.0917913913726807, "learning_rate": 2.4059653624118027e-06, "loss": 0.8532, "step": 44545 }, { "epoch": 0.5429417571569596, "grad_norm": 2.010331869125366, "learning_rate": 2.405644644002566e-06, "loss": 0.7475, "step": 44550 }, { "epoch": 0.5430026933811073, "grad_norm": 1.8871740102767944, "learning_rate": 2.405323925593329e-06, "loss": 0.8525, "step": 44555 }, { "epoch": 0.5430636296052551, "grad_norm": 2.2304399013519287, "learning_rate": 2.4050032071840926e-06, "loss": 0.684, "step": 44560 }, { "epoch": 0.5431245658294029, "grad_norm": 2.234156608581543, "learning_rate": 2.404682488774856e-06, "loss": 0.8212, "step": 44565 }, { "epoch": 0.5431855020535508, "grad_norm": 2.187317132949829, "learning_rate": 2.404361770365619e-06, "loss": 0.7769, "step": 44570 }, { "epoch": 0.5432464382776986, "grad_norm": 2.0339393615722656, "learning_rate": 2.4040410519563825e-06, "loss": 0.7769, "step": 44575 }, { "epoch": 0.5433073745018464, "grad_norm": 1.8850181102752686, "learning_rate": 2.4037203335471455e-06, "loss": 0.8708, "step": 44580 }, { "epoch": 0.5433683107259941, "grad_norm": 2.1111316680908203, "learning_rate": 2.403399615137909e-06, "loss": 0.8587, "step": 44585 }, { "epoch": 0.5434292469501419, "grad_norm": 2.0456478595733643, "learning_rate": 2.4030788967286724e-06, "loss": 0.8129, "step": 44590 }, { "epoch": 0.5434901831742898, "grad_norm": 2.0965194702148438, "learning_rate": 2.402758178319436e-06, "loss": 0.82, "step": 44595 }, { "epoch": 0.5435511193984376, "grad_norm": 2.245239734649658, "learning_rate": 2.402437459910199e-06, "loss": 0.7992, "step": 44600 }, { "epoch": 0.5436120556225854, "grad_norm": 2.1062347888946533, "learning_rate": 2.4021167415009623e-06, "loss": 0.8173, "step": 44605 }, { "epoch": 0.5436729918467332, "grad_norm": 1.8109811544418335, "learning_rate": 2.4017960230917257e-06, "loss": 0.8116, "step": 44610 }, { "epoch": 0.5437339280708811, "grad_norm": 2.0422351360321045, "learning_rate": 2.401475304682489e-06, "loss": 0.8455, "step": 44615 }, { "epoch": 0.5437948642950288, "grad_norm": 2.5510647296905518, "learning_rate": 2.401154586273252e-06, "loss": 0.8391, "step": 44620 }, { "epoch": 0.5438558005191766, "grad_norm": 2.090306043624878, "learning_rate": 2.4008338678640156e-06, "loss": 0.8127, "step": 44625 }, { "epoch": 0.5439167367433244, "grad_norm": 1.749041199684143, "learning_rate": 2.400513149454779e-06, "loss": 0.8415, "step": 44630 }, { "epoch": 0.5439776729674722, "grad_norm": 1.8754475116729736, "learning_rate": 2.400192431045542e-06, "loss": 0.8058, "step": 44635 }, { "epoch": 0.5440386091916201, "grad_norm": 1.95705246925354, "learning_rate": 2.3998717126363055e-06, "loss": 0.8203, "step": 44640 }, { "epoch": 0.5440995454157679, "grad_norm": 1.785430669784546, "learning_rate": 2.399550994227069e-06, "loss": 0.8477, "step": 44645 }, { "epoch": 0.5441604816399157, "grad_norm": 1.9561231136322021, "learning_rate": 2.399230275817832e-06, "loss": 0.829, "step": 44650 }, { "epoch": 0.5442214178640634, "grad_norm": 1.6607396602630615, "learning_rate": 2.3989095574085954e-06, "loss": 0.8423, "step": 44655 }, { "epoch": 0.5442823540882112, "grad_norm": 1.985825538635254, "learning_rate": 2.3985888389993584e-06, "loss": 0.8172, "step": 44660 }, { "epoch": 0.5443432903123591, "grad_norm": 1.801403284072876, "learning_rate": 2.398268120590122e-06, "loss": 0.8081, "step": 44665 }, { "epoch": 0.5444042265365069, "grad_norm": 1.9377597570419312, "learning_rate": 2.3979474021808853e-06, "loss": 0.8024, "step": 44670 }, { "epoch": 0.5444651627606547, "grad_norm": 2.0813424587249756, "learning_rate": 2.3976266837716488e-06, "loss": 0.7592, "step": 44675 }, { "epoch": 0.5445260989848025, "grad_norm": 2.263387680053711, "learning_rate": 2.397305965362412e-06, "loss": 0.8205, "step": 44680 }, { "epoch": 0.5445870352089504, "grad_norm": 1.757854700088501, "learning_rate": 2.3969852469531752e-06, "loss": 0.8413, "step": 44685 }, { "epoch": 0.5446479714330981, "grad_norm": 2.210233449935913, "learning_rate": 2.3966645285439387e-06, "loss": 0.8826, "step": 44690 }, { "epoch": 0.5447089076572459, "grad_norm": 1.6954861879348755, "learning_rate": 2.396343810134702e-06, "loss": 0.851, "step": 44695 }, { "epoch": 0.5447698438813937, "grad_norm": 1.6636067628860474, "learning_rate": 2.396023091725465e-06, "loss": 0.8404, "step": 44700 }, { "epoch": 0.5448307801055415, "grad_norm": 1.891710638999939, "learning_rate": 2.3957023733162286e-06, "loss": 0.8298, "step": 44705 }, { "epoch": 0.5448917163296894, "grad_norm": 1.9108967781066895, "learning_rate": 2.395381654906992e-06, "loss": 0.871, "step": 44710 }, { "epoch": 0.5449526525538372, "grad_norm": 1.9808743000030518, "learning_rate": 2.395060936497755e-06, "loss": 0.9121, "step": 44715 }, { "epoch": 0.545013588777985, "grad_norm": 1.9337908029556274, "learning_rate": 2.3947402180885184e-06, "loss": 0.8329, "step": 44720 }, { "epoch": 0.5450745250021327, "grad_norm": 2.023019313812256, "learning_rate": 2.394419499679282e-06, "loss": 0.8887, "step": 44725 }, { "epoch": 0.5451354612262805, "grad_norm": 1.6978017091751099, "learning_rate": 2.394098781270045e-06, "loss": 0.8218, "step": 44730 }, { "epoch": 0.5451963974504284, "grad_norm": 2.2164652347564697, "learning_rate": 2.3937780628608083e-06, "loss": 0.7577, "step": 44735 }, { "epoch": 0.5452573336745762, "grad_norm": 2.3806350231170654, "learning_rate": 2.3934573444515718e-06, "loss": 0.8923, "step": 44740 }, { "epoch": 0.545318269898724, "grad_norm": 1.7265715599060059, "learning_rate": 2.393136626042335e-06, "loss": 0.8284, "step": 44745 }, { "epoch": 0.5453792061228718, "grad_norm": 2.486449956893921, "learning_rate": 2.3928159076330982e-06, "loss": 0.8772, "step": 44750 }, { "epoch": 0.5454401423470197, "grad_norm": 1.8586864471435547, "learning_rate": 2.3924951892238617e-06, "loss": 0.8003, "step": 44755 }, { "epoch": 0.5455010785711674, "grad_norm": 2.479482650756836, "learning_rate": 2.392174470814625e-06, "loss": 0.7799, "step": 44760 }, { "epoch": 0.5455620147953152, "grad_norm": 1.6239240169525146, "learning_rate": 2.3918537524053886e-06, "loss": 0.7768, "step": 44765 }, { "epoch": 0.545622951019463, "grad_norm": 2.1219725608825684, "learning_rate": 2.3915330339961516e-06, "loss": 0.7826, "step": 44770 }, { "epoch": 0.5456838872436108, "grad_norm": 1.722586750984192, "learning_rate": 2.391212315586915e-06, "loss": 0.8412, "step": 44775 }, { "epoch": 0.5457448234677587, "grad_norm": 2.0543291568756104, "learning_rate": 2.3908915971776785e-06, "loss": 0.8233, "step": 44780 }, { "epoch": 0.5458057596919065, "grad_norm": 1.6577130556106567, "learning_rate": 2.3905708787684415e-06, "loss": 0.7969, "step": 44785 }, { "epoch": 0.5458666959160543, "grad_norm": 1.716878056526184, "learning_rate": 2.390250160359205e-06, "loss": 0.8461, "step": 44790 }, { "epoch": 0.545927632140202, "grad_norm": 2.0238277912139893, "learning_rate": 2.389929441949968e-06, "loss": 0.8125, "step": 44795 }, { "epoch": 0.5459885683643498, "grad_norm": 1.728576898574829, "learning_rate": 2.3896087235407314e-06, "loss": 0.8647, "step": 44800 }, { "epoch": 0.5460495045884977, "grad_norm": 1.8121403455734253, "learning_rate": 2.389288005131495e-06, "loss": 0.8531, "step": 44805 }, { "epoch": 0.5461104408126455, "grad_norm": 1.4859224557876587, "learning_rate": 2.388967286722258e-06, "loss": 0.8095, "step": 44810 }, { "epoch": 0.5461713770367933, "grad_norm": 2.1332921981811523, "learning_rate": 2.3886465683130213e-06, "loss": 0.8386, "step": 44815 }, { "epoch": 0.5462323132609411, "grad_norm": 1.79073965549469, "learning_rate": 2.3883258499037847e-06, "loss": 0.8196, "step": 44820 }, { "epoch": 0.546293249485089, "grad_norm": 1.8534245491027832, "learning_rate": 2.3880051314945477e-06, "loss": 0.8081, "step": 44825 }, { "epoch": 0.5463541857092367, "grad_norm": 1.8777285814285278, "learning_rate": 2.387684413085311e-06, "loss": 0.9021, "step": 44830 }, { "epoch": 0.5464151219333845, "grad_norm": 2.3065621852874756, "learning_rate": 2.3873636946760746e-06, "loss": 0.7727, "step": 44835 }, { "epoch": 0.5464760581575323, "grad_norm": 1.809487223625183, "learning_rate": 2.387042976266838e-06, "loss": 0.8207, "step": 44840 }, { "epoch": 0.5465369943816801, "grad_norm": 1.8213284015655518, "learning_rate": 2.3867222578576015e-06, "loss": 0.816, "step": 44845 }, { "epoch": 0.546597930605828, "grad_norm": 1.8283703327178955, "learning_rate": 2.3864015394483645e-06, "loss": 0.8273, "step": 44850 }, { "epoch": 0.5466588668299758, "grad_norm": 1.8007527589797974, "learning_rate": 2.386080821039128e-06, "loss": 0.8364, "step": 44855 }, { "epoch": 0.5467198030541236, "grad_norm": 2.355865955352783, "learning_rate": 2.3857601026298914e-06, "loss": 0.8666, "step": 44860 }, { "epoch": 0.5467807392782713, "grad_norm": 2.1787188053131104, "learning_rate": 2.3854393842206544e-06, "loss": 0.7988, "step": 44865 }, { "epoch": 0.5468416755024191, "grad_norm": 1.7364026308059692, "learning_rate": 2.385118665811418e-06, "loss": 0.77, "step": 44870 }, { "epoch": 0.546902611726567, "grad_norm": 1.6841984987258911, "learning_rate": 2.384797947402181e-06, "loss": 0.804, "step": 44875 }, { "epoch": 0.5469635479507148, "grad_norm": 1.8067859411239624, "learning_rate": 2.3844772289929443e-06, "loss": 0.7915, "step": 44880 }, { "epoch": 0.5470244841748626, "grad_norm": 1.714808702468872, "learning_rate": 2.3841565105837077e-06, "loss": 0.8489, "step": 44885 }, { "epoch": 0.5470854203990104, "grad_norm": 1.942491054534912, "learning_rate": 2.3838357921744708e-06, "loss": 0.8206, "step": 44890 }, { "epoch": 0.5471463566231582, "grad_norm": 1.8905231952667236, "learning_rate": 2.383515073765234e-06, "loss": 0.8646, "step": 44895 }, { "epoch": 0.547207292847306, "grad_norm": 1.817240834236145, "learning_rate": 2.3831943553559976e-06, "loss": 0.8261, "step": 44900 }, { "epoch": 0.5472682290714538, "grad_norm": 2.5563032627105713, "learning_rate": 2.382873636946761e-06, "loss": 0.7826, "step": 44905 }, { "epoch": 0.5473291652956016, "grad_norm": 1.925000786781311, "learning_rate": 2.382552918537524e-06, "loss": 0.8435, "step": 44910 }, { "epoch": 0.5473901015197494, "grad_norm": 2.060703754425049, "learning_rate": 2.3822322001282875e-06, "loss": 0.839, "step": 44915 }, { "epoch": 0.5474510377438973, "grad_norm": 2.5697433948516846, "learning_rate": 2.381911481719051e-06, "loss": 0.8626, "step": 44920 }, { "epoch": 0.5475119739680451, "grad_norm": 2.1258034706115723, "learning_rate": 2.3815907633098144e-06, "loss": 0.9063, "step": 44925 }, { "epoch": 0.5475729101921929, "grad_norm": 1.9151690006256104, "learning_rate": 2.3812700449005774e-06, "loss": 0.83, "step": 44930 }, { "epoch": 0.5476338464163406, "grad_norm": 1.7267529964447021, "learning_rate": 2.380949326491341e-06, "loss": 0.8259, "step": 44935 }, { "epoch": 0.5476947826404884, "grad_norm": 2.2890355587005615, "learning_rate": 2.3806286080821043e-06, "loss": 0.8741, "step": 44940 }, { "epoch": 0.5477557188646363, "grad_norm": 2.031658411026001, "learning_rate": 2.3803078896728673e-06, "loss": 0.8101, "step": 44945 }, { "epoch": 0.5478166550887841, "grad_norm": 1.8519917726516724, "learning_rate": 2.3799871712636308e-06, "loss": 0.8662, "step": 44950 }, { "epoch": 0.5478775913129319, "grad_norm": 2.047792434692383, "learning_rate": 2.379666452854394e-06, "loss": 0.7964, "step": 44955 }, { "epoch": 0.5479385275370797, "grad_norm": 2.0527360439300537, "learning_rate": 2.3793457344451572e-06, "loss": 0.7679, "step": 44960 }, { "epoch": 0.5479994637612275, "grad_norm": 1.711995005607605, "learning_rate": 2.3790250160359207e-06, "loss": 0.7543, "step": 44965 }, { "epoch": 0.5480603999853753, "grad_norm": 1.7884019613265991, "learning_rate": 2.3787042976266837e-06, "loss": 0.8289, "step": 44970 }, { "epoch": 0.5481213362095231, "grad_norm": 2.203624725341797, "learning_rate": 2.378383579217447e-06, "loss": 0.8211, "step": 44975 }, { "epoch": 0.5481822724336709, "grad_norm": 1.796950340270996, "learning_rate": 2.3780628608082106e-06, "loss": 0.8057, "step": 44980 }, { "epoch": 0.5482432086578187, "grad_norm": 2.0468485355377197, "learning_rate": 2.377742142398974e-06, "loss": 0.902, "step": 44985 }, { "epoch": 0.5483041448819665, "grad_norm": 2.3246963024139404, "learning_rate": 2.3774214239897374e-06, "loss": 0.8387, "step": 44990 }, { "epoch": 0.5483650811061144, "grad_norm": 2.0040860176086426, "learning_rate": 2.3771007055805005e-06, "loss": 0.8139, "step": 44995 }, { "epoch": 0.5484260173302622, "grad_norm": 2.2866203784942627, "learning_rate": 2.376779987171264e-06, "loss": 0.8295, "step": 45000 }, { "epoch": 0.5484869535544099, "grad_norm": 1.9102685451507568, "learning_rate": 2.3764592687620273e-06, "loss": 0.8645, "step": 45005 }, { "epoch": 0.5485478897785577, "grad_norm": 1.8767575025558472, "learning_rate": 2.3761385503527904e-06, "loss": 0.7679, "step": 45010 }, { "epoch": 0.5486088260027056, "grad_norm": 2.2774100303649902, "learning_rate": 2.375817831943554e-06, "loss": 0.812, "step": 45015 }, { "epoch": 0.5486697622268534, "grad_norm": 1.8772932291030884, "learning_rate": 2.3754971135343172e-06, "loss": 0.8463, "step": 45020 }, { "epoch": 0.5487306984510012, "grad_norm": 2.0072567462921143, "learning_rate": 2.3751763951250803e-06, "loss": 0.8971, "step": 45025 }, { "epoch": 0.548791634675149, "grad_norm": 2.00248384475708, "learning_rate": 2.3748556767158437e-06, "loss": 0.8323, "step": 45030 }, { "epoch": 0.5488525708992968, "grad_norm": 2.0585997104644775, "learning_rate": 2.374534958306607e-06, "loss": 0.8032, "step": 45035 }, { "epoch": 0.5489135071234446, "grad_norm": 1.9720861911773682, "learning_rate": 2.37421423989737e-06, "loss": 0.828, "step": 45040 }, { "epoch": 0.5489744433475924, "grad_norm": 1.9957973957061768, "learning_rate": 2.3738935214881336e-06, "loss": 0.8391, "step": 45045 }, { "epoch": 0.5490353795717402, "grad_norm": 2.2287445068359375, "learning_rate": 2.3735728030788966e-06, "loss": 0.8718, "step": 45050 }, { "epoch": 0.549096315795888, "grad_norm": 2.0532846450805664, "learning_rate": 2.37325208466966e-06, "loss": 0.8439, "step": 45055 }, { "epoch": 0.5491572520200358, "grad_norm": 2.118520498275757, "learning_rate": 2.3729313662604235e-06, "loss": 0.7754, "step": 45060 }, { "epoch": 0.5492181882441837, "grad_norm": 1.9762499332427979, "learning_rate": 2.372610647851187e-06, "loss": 0.8477, "step": 45065 }, { "epoch": 0.5492791244683315, "grad_norm": 2.2381651401519775, "learning_rate": 2.3722899294419504e-06, "loss": 0.8485, "step": 45070 }, { "epoch": 0.5493400606924792, "grad_norm": 1.7181586027145386, "learning_rate": 2.3719692110327134e-06, "loss": 0.8951, "step": 45075 }, { "epoch": 0.549400996916627, "grad_norm": 1.843496322631836, "learning_rate": 2.371648492623477e-06, "loss": 0.8812, "step": 45080 }, { "epoch": 0.5494619331407748, "grad_norm": 1.8407189846038818, "learning_rate": 2.3713277742142403e-06, "loss": 0.7655, "step": 45085 }, { "epoch": 0.5495228693649227, "grad_norm": 1.6553621292114258, "learning_rate": 2.3710070558050033e-06, "loss": 0.8056, "step": 45090 }, { "epoch": 0.5495838055890705, "grad_norm": 1.7942004203796387, "learning_rate": 2.3706863373957667e-06, "loss": 0.842, "step": 45095 }, { "epoch": 0.5496447418132183, "grad_norm": 2.062793254852295, "learning_rate": 2.37036561898653e-06, "loss": 0.8539, "step": 45100 }, { "epoch": 0.5497056780373661, "grad_norm": 2.48677659034729, "learning_rate": 2.370044900577293e-06, "loss": 0.7785, "step": 45105 }, { "epoch": 0.5497666142615139, "grad_norm": 1.7894846200942993, "learning_rate": 2.3697241821680566e-06, "loss": 0.8647, "step": 45110 }, { "epoch": 0.5498275504856617, "grad_norm": 1.73958158493042, "learning_rate": 2.36940346375882e-06, "loss": 0.8445, "step": 45115 }, { "epoch": 0.5498884867098095, "grad_norm": 1.8338730335235596, "learning_rate": 2.369082745349583e-06, "loss": 0.8632, "step": 45120 }, { "epoch": 0.5499494229339573, "grad_norm": 1.6680700778961182, "learning_rate": 2.3687620269403465e-06, "loss": 0.826, "step": 45125 }, { "epoch": 0.5500103591581051, "grad_norm": 2.173065185546875, "learning_rate": 2.36844130853111e-06, "loss": 0.8744, "step": 45130 }, { "epoch": 0.550071295382253, "grad_norm": 1.8229727745056152, "learning_rate": 2.368120590121873e-06, "loss": 0.8269, "step": 45135 }, { "epoch": 0.5501322316064008, "grad_norm": 1.8734632730484009, "learning_rate": 2.3677998717126364e-06, "loss": 0.8156, "step": 45140 }, { "epoch": 0.5501931678305485, "grad_norm": 2.640204668045044, "learning_rate": 2.3674791533034e-06, "loss": 0.8448, "step": 45145 }, { "epoch": 0.5502541040546963, "grad_norm": 1.9199471473693848, "learning_rate": 2.3671584348941633e-06, "loss": 0.7783, "step": 45150 }, { "epoch": 0.5503150402788441, "grad_norm": 2.4321818351745605, "learning_rate": 2.3668377164849267e-06, "loss": 0.8273, "step": 45155 }, { "epoch": 0.550375976502992, "grad_norm": 1.7293730974197388, "learning_rate": 2.3665169980756898e-06, "loss": 0.7982, "step": 45160 }, { "epoch": 0.5504369127271398, "grad_norm": 1.8494441509246826, "learning_rate": 2.366196279666453e-06, "loss": 0.8679, "step": 45165 }, { "epoch": 0.5504978489512876, "grad_norm": 2.5163309574127197, "learning_rate": 2.3658755612572162e-06, "loss": 0.7652, "step": 45170 }, { "epoch": 0.5505587851754354, "grad_norm": 2.1020820140838623, "learning_rate": 2.3655548428479797e-06, "loss": 0.8827, "step": 45175 }, { "epoch": 0.5506197213995832, "grad_norm": 1.7813947200775146, "learning_rate": 2.365234124438743e-06, "loss": 0.7849, "step": 45180 }, { "epoch": 0.550680657623731, "grad_norm": 2.1551764011383057, "learning_rate": 2.364913406029506e-06, "loss": 0.8744, "step": 45185 }, { "epoch": 0.5507415938478788, "grad_norm": 1.7629855871200562, "learning_rate": 2.3645926876202696e-06, "loss": 0.8146, "step": 45190 }, { "epoch": 0.5508025300720266, "grad_norm": 2.031351089477539, "learning_rate": 2.364271969211033e-06, "loss": 0.8526, "step": 45195 }, { "epoch": 0.5508634662961744, "grad_norm": 2.014305830001831, "learning_rate": 2.363951250801796e-06, "loss": 0.8809, "step": 45200 }, { "epoch": 0.5509244025203223, "grad_norm": 2.0731260776519775, "learning_rate": 2.3636305323925595e-06, "loss": 0.7953, "step": 45205 }, { "epoch": 0.5509853387444701, "grad_norm": 2.1339643001556396, "learning_rate": 2.363309813983323e-06, "loss": 0.8633, "step": 45210 }, { "epoch": 0.5510462749686178, "grad_norm": 1.7146015167236328, "learning_rate": 2.3629890955740863e-06, "loss": 0.7873, "step": 45215 }, { "epoch": 0.5511072111927656, "grad_norm": 2.081221342086792, "learning_rate": 2.3626683771648493e-06, "loss": 0.8476, "step": 45220 }, { "epoch": 0.5511681474169134, "grad_norm": 2.315321922302246, "learning_rate": 2.362347658755613e-06, "loss": 0.8706, "step": 45225 }, { "epoch": 0.5512290836410613, "grad_norm": 1.674114465713501, "learning_rate": 2.3620269403463762e-06, "loss": 0.8585, "step": 45230 }, { "epoch": 0.5512900198652091, "grad_norm": 2.0150725841522217, "learning_rate": 2.3617062219371397e-06, "loss": 0.8348, "step": 45235 }, { "epoch": 0.5513509560893569, "grad_norm": 2.159503936767578, "learning_rate": 2.3613855035279027e-06, "loss": 0.8481, "step": 45240 }, { "epoch": 0.5514118923135047, "grad_norm": 1.8759406805038452, "learning_rate": 2.361064785118666e-06, "loss": 0.8417, "step": 45245 }, { "epoch": 0.5514728285376524, "grad_norm": 1.9626513719558716, "learning_rate": 2.360744066709429e-06, "loss": 0.8266, "step": 45250 }, { "epoch": 0.5515337647618003, "grad_norm": 1.6557917594909668, "learning_rate": 2.3604233483001926e-06, "loss": 0.814, "step": 45255 }, { "epoch": 0.5515947009859481, "grad_norm": 2.2401459217071533, "learning_rate": 2.360102629890956e-06, "loss": 0.8539, "step": 45260 }, { "epoch": 0.5516556372100959, "grad_norm": 1.8531965017318726, "learning_rate": 2.359781911481719e-06, "loss": 0.819, "step": 45265 }, { "epoch": 0.5517165734342437, "grad_norm": 1.91908597946167, "learning_rate": 2.3594611930724825e-06, "loss": 0.7862, "step": 45270 }, { "epoch": 0.5517775096583916, "grad_norm": 1.9887573719024658, "learning_rate": 2.359140474663246e-06, "loss": 0.8287, "step": 45275 }, { "epoch": 0.5518384458825394, "grad_norm": 1.7554677724838257, "learning_rate": 2.358819756254009e-06, "loss": 0.8505, "step": 45280 }, { "epoch": 0.5518993821066871, "grad_norm": 1.9502768516540527, "learning_rate": 2.3584990378447724e-06, "loss": 0.8854, "step": 45285 }, { "epoch": 0.5519603183308349, "grad_norm": 1.8006151914596558, "learning_rate": 2.358178319435536e-06, "loss": 0.8329, "step": 45290 }, { "epoch": 0.5520212545549827, "grad_norm": 1.958983063697815, "learning_rate": 2.3578576010262993e-06, "loss": 0.8795, "step": 45295 }, { "epoch": 0.5520821907791306, "grad_norm": 2.038245677947998, "learning_rate": 2.3575368826170623e-06, "loss": 0.8586, "step": 45300 }, { "epoch": 0.5521431270032784, "grad_norm": 1.7379497289657593, "learning_rate": 2.3572161642078257e-06, "loss": 0.838, "step": 45305 }, { "epoch": 0.5522040632274262, "grad_norm": 1.9848617315292358, "learning_rate": 2.356895445798589e-06, "loss": 0.8353, "step": 45310 }, { "epoch": 0.552264999451574, "grad_norm": 2.285611152648926, "learning_rate": 2.3565747273893526e-06, "loss": 0.8289, "step": 45315 }, { "epoch": 0.5523259356757217, "grad_norm": 2.1944432258605957, "learning_rate": 2.3562540089801156e-06, "loss": 0.8428, "step": 45320 }, { "epoch": 0.5523868718998696, "grad_norm": 1.9341238737106323, "learning_rate": 2.355933290570879e-06, "loss": 0.8149, "step": 45325 }, { "epoch": 0.5524478081240174, "grad_norm": 1.981696367263794, "learning_rate": 2.3556125721616425e-06, "loss": 0.8582, "step": 45330 }, { "epoch": 0.5525087443481652, "grad_norm": 2.023785352706909, "learning_rate": 2.3552918537524055e-06, "loss": 0.8349, "step": 45335 }, { "epoch": 0.552569680572313, "grad_norm": 1.9125196933746338, "learning_rate": 2.354971135343169e-06, "loss": 0.898, "step": 45340 }, { "epoch": 0.5526306167964609, "grad_norm": 2.1875033378601074, "learning_rate": 2.354650416933932e-06, "loss": 0.8504, "step": 45345 }, { "epoch": 0.5526915530206087, "grad_norm": 2.4063165187835693, "learning_rate": 2.3543296985246954e-06, "loss": 0.8562, "step": 45350 }, { "epoch": 0.5527524892447564, "grad_norm": 2.054025173187256, "learning_rate": 2.354008980115459e-06, "loss": 0.8011, "step": 45355 }, { "epoch": 0.5528134254689042, "grad_norm": 2.5567703247070312, "learning_rate": 2.353688261706222e-06, "loss": 0.8818, "step": 45360 }, { "epoch": 0.552874361693052, "grad_norm": 2.176480293273926, "learning_rate": 2.3533675432969853e-06, "loss": 0.8344, "step": 45365 }, { "epoch": 0.5529352979171999, "grad_norm": 2.0534729957580566, "learning_rate": 2.3530468248877487e-06, "loss": 0.8388, "step": 45370 }, { "epoch": 0.5529962341413477, "grad_norm": 1.9120858907699585, "learning_rate": 2.352726106478512e-06, "loss": 0.8182, "step": 45375 }, { "epoch": 0.5530571703654955, "grad_norm": 2.2603538036346436, "learning_rate": 2.3524053880692756e-06, "loss": 0.9562, "step": 45380 }, { "epoch": 0.5531181065896433, "grad_norm": 1.7960606813430786, "learning_rate": 2.3520846696600386e-06, "loss": 0.7924, "step": 45385 }, { "epoch": 0.553179042813791, "grad_norm": 1.5637569427490234, "learning_rate": 2.351763951250802e-06, "loss": 0.7915, "step": 45390 }, { "epoch": 0.5532399790379389, "grad_norm": 1.8946692943572998, "learning_rate": 2.3514432328415655e-06, "loss": 0.8508, "step": 45395 }, { "epoch": 0.5533009152620867, "grad_norm": 1.8551392555236816, "learning_rate": 2.3511225144323285e-06, "loss": 0.7878, "step": 45400 }, { "epoch": 0.5533618514862345, "grad_norm": 1.6884359121322632, "learning_rate": 2.350801796023092e-06, "loss": 0.8803, "step": 45405 }, { "epoch": 0.5534227877103823, "grad_norm": 1.805702567100525, "learning_rate": 2.3504810776138554e-06, "loss": 0.8396, "step": 45410 }, { "epoch": 0.5534837239345302, "grad_norm": 1.775997281074524, "learning_rate": 2.3501603592046184e-06, "loss": 0.861, "step": 45415 }, { "epoch": 0.553544660158678, "grad_norm": 2.365422248840332, "learning_rate": 2.349839640795382e-06, "loss": 0.8402, "step": 45420 }, { "epoch": 0.5536055963828257, "grad_norm": 1.8904836177825928, "learning_rate": 2.349518922386145e-06, "loss": 0.8205, "step": 45425 }, { "epoch": 0.5536665326069735, "grad_norm": 2.1393024921417236, "learning_rate": 2.3491982039769083e-06, "loss": 0.8121, "step": 45430 }, { "epoch": 0.5537274688311213, "grad_norm": 2.202533483505249, "learning_rate": 2.3488774855676718e-06, "loss": 0.8235, "step": 45435 }, { "epoch": 0.5537884050552692, "grad_norm": 1.9080536365509033, "learning_rate": 2.348556767158435e-06, "loss": 0.8468, "step": 45440 }, { "epoch": 0.553849341279417, "grad_norm": 2.2187423706054688, "learning_rate": 2.3482360487491982e-06, "loss": 0.8217, "step": 45445 }, { "epoch": 0.5539102775035648, "grad_norm": 1.702760934829712, "learning_rate": 2.3479153303399617e-06, "loss": 0.788, "step": 45450 }, { "epoch": 0.5539712137277126, "grad_norm": 2.017843008041382, "learning_rate": 2.347594611930725e-06, "loss": 0.75, "step": 45455 }, { "epoch": 0.5540321499518603, "grad_norm": 1.9291963577270508, "learning_rate": 2.3472738935214886e-06, "loss": 0.8397, "step": 45460 }, { "epoch": 0.5540930861760082, "grad_norm": 1.9516046047210693, "learning_rate": 2.3469531751122516e-06, "loss": 0.734, "step": 45465 }, { "epoch": 0.554154022400156, "grad_norm": 2.6190345287323, "learning_rate": 2.346632456703015e-06, "loss": 0.8538, "step": 45470 }, { "epoch": 0.5542149586243038, "grad_norm": 2.3736183643341064, "learning_rate": 2.3463117382937785e-06, "loss": 0.8557, "step": 45475 }, { "epoch": 0.5542758948484516, "grad_norm": 1.7443772554397583, "learning_rate": 2.3459910198845415e-06, "loss": 0.7916, "step": 45480 }, { "epoch": 0.5543368310725995, "grad_norm": 1.8493382930755615, "learning_rate": 2.345670301475305e-06, "loss": 0.7995, "step": 45485 }, { "epoch": 0.5543977672967473, "grad_norm": 2.182375431060791, "learning_rate": 2.3453495830660683e-06, "loss": 0.8523, "step": 45490 }, { "epoch": 0.554458703520895, "grad_norm": 1.7943311929702759, "learning_rate": 2.3450288646568314e-06, "loss": 0.8899, "step": 45495 }, { "epoch": 0.5545196397450428, "grad_norm": 2.194218397140503, "learning_rate": 2.344708146247595e-06, "loss": 0.8588, "step": 45500 }, { "epoch": 0.5545805759691906, "grad_norm": 1.7872743606567383, "learning_rate": 2.344387427838358e-06, "loss": 0.8533, "step": 45505 }, { "epoch": 0.5546415121933385, "grad_norm": 1.8666417598724365, "learning_rate": 2.3440667094291213e-06, "loss": 0.7698, "step": 45510 }, { "epoch": 0.5547024484174863, "grad_norm": 1.617026686668396, "learning_rate": 2.3437459910198847e-06, "loss": 0.8756, "step": 45515 }, { "epoch": 0.5547633846416341, "grad_norm": 1.846587896347046, "learning_rate": 2.343425272610648e-06, "loss": 0.8924, "step": 45520 }, { "epoch": 0.5548243208657819, "grad_norm": 1.6301249265670776, "learning_rate": 2.343104554201411e-06, "loss": 0.8259, "step": 45525 }, { "epoch": 0.5548852570899296, "grad_norm": 2.0008163452148438, "learning_rate": 2.3427838357921746e-06, "loss": 0.8991, "step": 45530 }, { "epoch": 0.5549461933140775, "grad_norm": 1.9622281789779663, "learning_rate": 2.342463117382938e-06, "loss": 0.8242, "step": 45535 }, { "epoch": 0.5550071295382253, "grad_norm": 2.27162766456604, "learning_rate": 2.3421423989737015e-06, "loss": 0.8151, "step": 45540 }, { "epoch": 0.5550680657623731, "grad_norm": 1.8842918872833252, "learning_rate": 2.3418216805644645e-06, "loss": 0.8588, "step": 45545 }, { "epoch": 0.5551290019865209, "grad_norm": 1.6459202766418457, "learning_rate": 2.341500962155228e-06, "loss": 0.7807, "step": 45550 }, { "epoch": 0.5551899382106688, "grad_norm": 2.0455071926116943, "learning_rate": 2.3411802437459914e-06, "loss": 0.8604, "step": 45555 }, { "epoch": 0.5552508744348165, "grad_norm": 1.9419240951538086, "learning_rate": 2.3408595253367544e-06, "loss": 0.8608, "step": 45560 }, { "epoch": 0.5553118106589643, "grad_norm": 1.8504624366760254, "learning_rate": 2.340538806927518e-06, "loss": 0.8321, "step": 45565 }, { "epoch": 0.5553727468831121, "grad_norm": 1.8120476007461548, "learning_rate": 2.3402180885182813e-06, "loss": 0.8659, "step": 45570 }, { "epoch": 0.5554336831072599, "grad_norm": 1.9297864437103271, "learning_rate": 2.3398973701090443e-06, "loss": 0.7976, "step": 45575 }, { "epoch": 0.5554946193314078, "grad_norm": 1.9490785598754883, "learning_rate": 2.3395766516998077e-06, "loss": 0.8644, "step": 45580 }, { "epoch": 0.5555555555555556, "grad_norm": 1.9935874938964844, "learning_rate": 2.3392559332905707e-06, "loss": 0.7763, "step": 45585 }, { "epoch": 0.5556164917797034, "grad_norm": 2.3118174076080322, "learning_rate": 2.338935214881334e-06, "loss": 0.921, "step": 45590 }, { "epoch": 0.5556774280038511, "grad_norm": 2.1808791160583496, "learning_rate": 2.3386144964720976e-06, "loss": 0.8677, "step": 45595 }, { "epoch": 0.5557383642279989, "grad_norm": 2.0182626247406006, "learning_rate": 2.338293778062861e-06, "loss": 0.8322, "step": 45600 }, { "epoch": 0.5557993004521468, "grad_norm": 1.9109476804733276, "learning_rate": 2.3379730596536245e-06, "loss": 0.8837, "step": 45605 }, { "epoch": 0.5558602366762946, "grad_norm": 1.8946406841278076, "learning_rate": 2.3376523412443875e-06, "loss": 0.7536, "step": 45610 }, { "epoch": 0.5559211729004424, "grad_norm": 1.656343936920166, "learning_rate": 2.337331622835151e-06, "loss": 0.7932, "step": 45615 }, { "epoch": 0.5559821091245902, "grad_norm": 2.412215232849121, "learning_rate": 2.3370109044259144e-06, "loss": 0.8277, "step": 45620 }, { "epoch": 0.556043045348738, "grad_norm": 2.08483624458313, "learning_rate": 2.336690186016678e-06, "loss": 0.8423, "step": 45625 }, { "epoch": 0.5561039815728858, "grad_norm": 2.0491480827331543, "learning_rate": 2.336369467607441e-06, "loss": 0.7579, "step": 45630 }, { "epoch": 0.5561649177970336, "grad_norm": 1.897796392440796, "learning_rate": 2.3360487491982043e-06, "loss": 0.8409, "step": 45635 }, { "epoch": 0.5562258540211814, "grad_norm": 2.003617763519287, "learning_rate": 2.3357280307889673e-06, "loss": 0.9103, "step": 45640 }, { "epoch": 0.5562867902453292, "grad_norm": 1.8995312452316284, "learning_rate": 2.3354073123797308e-06, "loss": 0.8416, "step": 45645 }, { "epoch": 0.556347726469477, "grad_norm": 1.831189751625061, "learning_rate": 2.335086593970494e-06, "loss": 0.8206, "step": 45650 }, { "epoch": 0.5564086626936249, "grad_norm": 2.0136923789978027, "learning_rate": 2.3347658755612572e-06, "loss": 0.8186, "step": 45655 }, { "epoch": 0.5564695989177727, "grad_norm": 1.9667494297027588, "learning_rate": 2.3344451571520207e-06, "loss": 0.7741, "step": 45660 }, { "epoch": 0.5565305351419204, "grad_norm": 1.9740864038467407, "learning_rate": 2.334124438742784e-06, "loss": 0.799, "step": 45665 }, { "epoch": 0.5565914713660682, "grad_norm": 1.6876977682113647, "learning_rate": 2.333803720333547e-06, "loss": 0.7274, "step": 45670 }, { "epoch": 0.5566524075902161, "grad_norm": 1.9392127990722656, "learning_rate": 2.3334830019243106e-06, "loss": 0.7888, "step": 45675 }, { "epoch": 0.5567133438143639, "grad_norm": 1.815091848373413, "learning_rate": 2.333162283515074e-06, "loss": 0.8116, "step": 45680 }, { "epoch": 0.5567742800385117, "grad_norm": 1.8875621557235718, "learning_rate": 2.3328415651058374e-06, "loss": 0.787, "step": 45685 }, { "epoch": 0.5568352162626595, "grad_norm": 2.1403424739837646, "learning_rate": 2.332520846696601e-06, "loss": 0.7686, "step": 45690 }, { "epoch": 0.5568961524868073, "grad_norm": 1.5820133686065674, "learning_rate": 2.332200128287364e-06, "loss": 0.861, "step": 45695 }, { "epoch": 0.5569570887109551, "grad_norm": 1.921158790588379, "learning_rate": 2.3318794098781273e-06, "loss": 0.9072, "step": 45700 }, { "epoch": 0.5570180249351029, "grad_norm": 1.843696117401123, "learning_rate": 2.3315586914688908e-06, "loss": 0.894, "step": 45705 }, { "epoch": 0.5570789611592507, "grad_norm": 1.8719087839126587, "learning_rate": 2.331237973059654e-06, "loss": 0.7532, "step": 45710 }, { "epoch": 0.5571398973833985, "grad_norm": 1.803598165512085, "learning_rate": 2.3309172546504172e-06, "loss": 0.8815, "step": 45715 }, { "epoch": 0.5572008336075464, "grad_norm": 2.085581064224243, "learning_rate": 2.3305965362411802e-06, "loss": 0.8138, "step": 45720 }, { "epoch": 0.5572617698316942, "grad_norm": 1.6930423974990845, "learning_rate": 2.3302758178319437e-06, "loss": 0.8701, "step": 45725 }, { "epoch": 0.557322706055842, "grad_norm": 2.1517815589904785, "learning_rate": 2.329955099422707e-06, "loss": 0.9157, "step": 45730 }, { "epoch": 0.5573836422799897, "grad_norm": 2.1040003299713135, "learning_rate": 2.32963438101347e-06, "loss": 0.7578, "step": 45735 }, { "epoch": 0.5574445785041375, "grad_norm": 2.033674478530884, "learning_rate": 2.3293136626042336e-06, "loss": 0.7943, "step": 45740 }, { "epoch": 0.5575055147282854, "grad_norm": 2.1038053035736084, "learning_rate": 2.328992944194997e-06, "loss": 0.9063, "step": 45745 }, { "epoch": 0.5575664509524332, "grad_norm": 1.9419893026351929, "learning_rate": 2.32867222578576e-06, "loss": 0.9033, "step": 45750 }, { "epoch": 0.557627387176581, "grad_norm": 2.1295623779296875, "learning_rate": 2.3283515073765235e-06, "loss": 0.8613, "step": 45755 }, { "epoch": 0.5576883234007288, "grad_norm": 2.1357953548431396, "learning_rate": 2.328030788967287e-06, "loss": 0.7707, "step": 45760 }, { "epoch": 0.5577492596248766, "grad_norm": 1.961262583732605, "learning_rate": 2.3277100705580504e-06, "loss": 0.8108, "step": 45765 }, { "epoch": 0.5578101958490244, "grad_norm": 2.1504948139190674, "learning_rate": 2.327389352148814e-06, "loss": 0.7817, "step": 45770 }, { "epoch": 0.5578711320731722, "grad_norm": 2.0487020015716553, "learning_rate": 2.327068633739577e-06, "loss": 0.8466, "step": 45775 }, { "epoch": 0.55793206829732, "grad_norm": 2.088578701019287, "learning_rate": 2.3267479153303403e-06, "loss": 0.9217, "step": 45780 }, { "epoch": 0.5579930045214678, "grad_norm": 2.0951597690582275, "learning_rate": 2.3264271969211037e-06, "loss": 0.8301, "step": 45785 }, { "epoch": 0.5580539407456157, "grad_norm": 1.7801073789596558, "learning_rate": 2.3261064785118667e-06, "loss": 0.8416, "step": 45790 }, { "epoch": 0.5581148769697635, "grad_norm": 2.261749267578125, "learning_rate": 2.32578576010263e-06, "loss": 0.9122, "step": 45795 }, { "epoch": 0.5581758131939113, "grad_norm": 1.7257394790649414, "learning_rate": 2.325465041693393e-06, "loss": 0.85, "step": 45800 }, { "epoch": 0.558236749418059, "grad_norm": 1.7975250482559204, "learning_rate": 2.3251443232841566e-06, "loss": 0.8347, "step": 45805 }, { "epoch": 0.5582976856422068, "grad_norm": 1.8574849367141724, "learning_rate": 2.32482360487492e-06, "loss": 0.759, "step": 45810 }, { "epoch": 0.5583586218663547, "grad_norm": 1.841125249862671, "learning_rate": 2.324502886465683e-06, "loss": 0.8547, "step": 45815 }, { "epoch": 0.5584195580905025, "grad_norm": 1.916663408279419, "learning_rate": 2.3241821680564465e-06, "loss": 0.8136, "step": 45820 }, { "epoch": 0.5584804943146503, "grad_norm": 2.704911470413208, "learning_rate": 2.32386144964721e-06, "loss": 0.885, "step": 45825 }, { "epoch": 0.5585414305387981, "grad_norm": 2.1110568046569824, "learning_rate": 2.3235407312379734e-06, "loss": 0.8982, "step": 45830 }, { "epoch": 0.558602366762946, "grad_norm": 1.5807976722717285, "learning_rate": 2.3232200128287364e-06, "loss": 0.8255, "step": 45835 }, { "epoch": 0.5586633029870937, "grad_norm": 2.7611281871795654, "learning_rate": 2.3228992944195e-06, "loss": 0.803, "step": 45840 }, { "epoch": 0.5587242392112415, "grad_norm": 1.8501120805740356, "learning_rate": 2.3225785760102633e-06, "loss": 0.7327, "step": 45845 }, { "epoch": 0.5587851754353893, "grad_norm": 1.9378618001937866, "learning_rate": 2.3222578576010267e-06, "loss": 0.8306, "step": 45850 }, { "epoch": 0.5588461116595371, "grad_norm": 2.324126958847046, "learning_rate": 2.3219371391917897e-06, "loss": 0.7913, "step": 45855 }, { "epoch": 0.558907047883685, "grad_norm": 1.9552764892578125, "learning_rate": 2.321616420782553e-06, "loss": 0.8952, "step": 45860 }, { "epoch": 0.5589679841078328, "grad_norm": 2.346935272216797, "learning_rate": 2.3212957023733166e-06, "loss": 0.8649, "step": 45865 }, { "epoch": 0.5590289203319806, "grad_norm": 2.128573179244995, "learning_rate": 2.3209749839640796e-06, "loss": 0.8957, "step": 45870 }, { "epoch": 0.5590898565561283, "grad_norm": 2.128260850906372, "learning_rate": 2.320654265554843e-06, "loss": 0.8788, "step": 45875 }, { "epoch": 0.5591507927802761, "grad_norm": 2.0858633518218994, "learning_rate": 2.320333547145606e-06, "loss": 0.817, "step": 45880 }, { "epoch": 0.559211729004424, "grad_norm": 2.0386126041412354, "learning_rate": 2.3200128287363695e-06, "loss": 0.8669, "step": 45885 }, { "epoch": 0.5592726652285718, "grad_norm": 1.998369574546814, "learning_rate": 2.319692110327133e-06, "loss": 0.8287, "step": 45890 }, { "epoch": 0.5593336014527196, "grad_norm": 2.482882261276245, "learning_rate": 2.319371391917896e-06, "loss": 0.8525, "step": 45895 }, { "epoch": 0.5593945376768674, "grad_norm": 1.9726756811141968, "learning_rate": 2.3190506735086594e-06, "loss": 0.8095, "step": 45900 }, { "epoch": 0.5594554739010152, "grad_norm": 1.6265935897827148, "learning_rate": 2.318729955099423e-06, "loss": 0.861, "step": 45905 }, { "epoch": 0.559516410125163, "grad_norm": 2.121853828430176, "learning_rate": 2.3184092366901863e-06, "loss": 0.8339, "step": 45910 }, { "epoch": 0.5595773463493108, "grad_norm": 2.073399543762207, "learning_rate": 2.3180885182809498e-06, "loss": 0.7697, "step": 45915 }, { "epoch": 0.5596382825734586, "grad_norm": 2.0407519340515137, "learning_rate": 2.3177677998717128e-06, "loss": 0.7003, "step": 45920 }, { "epoch": 0.5596992187976064, "grad_norm": 2.204632043838501, "learning_rate": 2.3174470814624762e-06, "loss": 0.8288, "step": 45925 }, { "epoch": 0.5597601550217542, "grad_norm": 1.868776798248291, "learning_rate": 2.3171263630532397e-06, "loss": 0.7445, "step": 45930 }, { "epoch": 0.5598210912459021, "grad_norm": 1.7458539009094238, "learning_rate": 2.3168056446440027e-06, "loss": 0.8144, "step": 45935 }, { "epoch": 0.5598820274700499, "grad_norm": 1.9416100978851318, "learning_rate": 2.316484926234766e-06, "loss": 0.8847, "step": 45940 }, { "epoch": 0.5599429636941976, "grad_norm": 1.9329073429107666, "learning_rate": 2.3161642078255296e-06, "loss": 0.7593, "step": 45945 }, { "epoch": 0.5600038999183454, "grad_norm": 1.8525187969207764, "learning_rate": 2.3158434894162926e-06, "loss": 0.8269, "step": 45950 }, { "epoch": 0.5600648361424932, "grad_norm": 2.2495720386505127, "learning_rate": 2.315522771007056e-06, "loss": 0.8254, "step": 45955 }, { "epoch": 0.5601257723666411, "grad_norm": 1.9241044521331787, "learning_rate": 2.3152020525978195e-06, "loss": 0.8411, "step": 45960 }, { "epoch": 0.5601867085907889, "grad_norm": 2.0402989387512207, "learning_rate": 2.3148813341885825e-06, "loss": 0.89, "step": 45965 }, { "epoch": 0.5602476448149367, "grad_norm": 1.9839510917663574, "learning_rate": 2.314560615779346e-06, "loss": 0.802, "step": 45970 }, { "epoch": 0.5603085810390845, "grad_norm": 1.7608016729354858, "learning_rate": 2.314239897370109e-06, "loss": 0.8146, "step": 45975 }, { "epoch": 0.5603695172632323, "grad_norm": 1.7119789123535156, "learning_rate": 2.3139191789608724e-06, "loss": 0.897, "step": 45980 }, { "epoch": 0.5604304534873801, "grad_norm": 1.6548329591751099, "learning_rate": 2.313598460551636e-06, "loss": 0.8451, "step": 45985 }, { "epoch": 0.5604913897115279, "grad_norm": 1.9795653820037842, "learning_rate": 2.3132777421423992e-06, "loss": 0.8751, "step": 45990 }, { "epoch": 0.5605523259356757, "grad_norm": 2.2262284755706787, "learning_rate": 2.3129570237331627e-06, "loss": 0.8026, "step": 45995 }, { "epoch": 0.5606132621598235, "grad_norm": 2.1406002044677734, "learning_rate": 2.3126363053239257e-06, "loss": 0.8148, "step": 46000 }, { "epoch": 0.5606741983839714, "grad_norm": 1.8847371339797974, "learning_rate": 2.312315586914689e-06, "loss": 0.8224, "step": 46005 }, { "epoch": 0.5607351346081192, "grad_norm": 1.9400990009307861, "learning_rate": 2.3119948685054526e-06, "loss": 0.8285, "step": 46010 }, { "epoch": 0.5607960708322669, "grad_norm": 2.2417285442352295, "learning_rate": 2.3116741500962156e-06, "loss": 0.8195, "step": 46015 }, { "epoch": 0.5608570070564147, "grad_norm": 1.7327135801315308, "learning_rate": 2.311353431686979e-06, "loss": 0.7878, "step": 46020 }, { "epoch": 0.5609179432805625, "grad_norm": 1.9499143362045288, "learning_rate": 2.3110327132777425e-06, "loss": 0.8416, "step": 46025 }, { "epoch": 0.5609788795047104, "grad_norm": 2.0513534545898438, "learning_rate": 2.3107119948685055e-06, "loss": 0.9155, "step": 46030 }, { "epoch": 0.5610398157288582, "grad_norm": 2.1333565711975098, "learning_rate": 2.310391276459269e-06, "loss": 0.8976, "step": 46035 }, { "epoch": 0.561100751953006, "grad_norm": 2.134705066680908, "learning_rate": 2.3100705580500324e-06, "loss": 0.8354, "step": 46040 }, { "epoch": 0.5611616881771538, "grad_norm": 2.1176624298095703, "learning_rate": 2.3097498396407954e-06, "loss": 0.8045, "step": 46045 }, { "epoch": 0.5612226244013016, "grad_norm": 1.8354586362838745, "learning_rate": 2.309429121231559e-06, "loss": 0.8288, "step": 46050 }, { "epoch": 0.5612835606254494, "grad_norm": 2.1976308822631836, "learning_rate": 2.3091084028223223e-06, "loss": 0.8231, "step": 46055 }, { "epoch": 0.5613444968495972, "grad_norm": 1.9108844995498657, "learning_rate": 2.3087876844130853e-06, "loss": 0.8105, "step": 46060 }, { "epoch": 0.561405433073745, "grad_norm": 1.8200278282165527, "learning_rate": 2.3084669660038487e-06, "loss": 0.7572, "step": 46065 }, { "epoch": 0.5614663692978928, "grad_norm": 1.9448752403259277, "learning_rate": 2.308146247594612e-06, "loss": 0.809, "step": 46070 }, { "epoch": 0.5615273055220407, "grad_norm": 2.0614876747131348, "learning_rate": 2.3078255291853756e-06, "loss": 0.7882, "step": 46075 }, { "epoch": 0.5615882417461885, "grad_norm": 1.9173833131790161, "learning_rate": 2.307504810776139e-06, "loss": 0.7527, "step": 46080 }, { "epoch": 0.5616491779703362, "grad_norm": 1.8074816465377808, "learning_rate": 2.307184092366902e-06, "loss": 0.745, "step": 46085 }, { "epoch": 0.561710114194484, "grad_norm": 1.8413612842559814, "learning_rate": 2.3068633739576655e-06, "loss": 0.7531, "step": 46090 }, { "epoch": 0.5617710504186318, "grad_norm": 1.7916253805160522, "learning_rate": 2.3065426555484285e-06, "loss": 0.8184, "step": 46095 }, { "epoch": 0.5618319866427797, "grad_norm": 2.185619831085205, "learning_rate": 2.306221937139192e-06, "loss": 0.7998, "step": 46100 }, { "epoch": 0.5618929228669275, "grad_norm": 1.6981314420700073, "learning_rate": 2.3059012187299554e-06, "loss": 0.7616, "step": 46105 }, { "epoch": 0.5619538590910753, "grad_norm": 1.8310949802398682, "learning_rate": 2.3055805003207184e-06, "loss": 0.8353, "step": 46110 }, { "epoch": 0.5620147953152231, "grad_norm": 2.166937828063965, "learning_rate": 2.305259781911482e-06, "loss": 0.8405, "step": 46115 }, { "epoch": 0.5620757315393708, "grad_norm": 2.093472480773926, "learning_rate": 2.3049390635022453e-06, "loss": 0.899, "step": 46120 }, { "epoch": 0.5621366677635187, "grad_norm": 2.5271902084350586, "learning_rate": 2.3046183450930083e-06, "loss": 0.7938, "step": 46125 }, { "epoch": 0.5621976039876665, "grad_norm": 1.925055980682373, "learning_rate": 2.3042976266837718e-06, "loss": 0.8827, "step": 46130 }, { "epoch": 0.5622585402118143, "grad_norm": 1.8499130010604858, "learning_rate": 2.303976908274535e-06, "loss": 0.8105, "step": 46135 }, { "epoch": 0.5623194764359621, "grad_norm": 1.9681559801101685, "learning_rate": 2.3036561898652982e-06, "loss": 0.8407, "step": 46140 }, { "epoch": 0.56238041266011, "grad_norm": 2.066394090652466, "learning_rate": 2.3033354714560617e-06, "loss": 0.8421, "step": 46145 }, { "epoch": 0.5624413488842578, "grad_norm": 1.8569434881210327, "learning_rate": 2.303014753046825e-06, "loss": 0.8407, "step": 46150 }, { "epoch": 0.5625022851084055, "grad_norm": 1.9619042873382568, "learning_rate": 2.3026940346375885e-06, "loss": 0.8677, "step": 46155 }, { "epoch": 0.5625632213325533, "grad_norm": 1.8538730144500732, "learning_rate": 2.302373316228352e-06, "loss": 0.8217, "step": 46160 }, { "epoch": 0.5626241575567011, "grad_norm": 2.00103759765625, "learning_rate": 2.302052597819115e-06, "loss": 0.7585, "step": 46165 }, { "epoch": 0.562685093780849, "grad_norm": 2.0412678718566895, "learning_rate": 2.3017318794098784e-06, "loss": 0.8725, "step": 46170 }, { "epoch": 0.5627460300049968, "grad_norm": 2.0221643447875977, "learning_rate": 2.3014111610006415e-06, "loss": 0.777, "step": 46175 }, { "epoch": 0.5628069662291446, "grad_norm": 2.1486620903015137, "learning_rate": 2.301090442591405e-06, "loss": 0.8457, "step": 46180 }, { "epoch": 0.5628679024532924, "grad_norm": 1.9290030002593994, "learning_rate": 2.3007697241821683e-06, "loss": 0.7703, "step": 46185 }, { "epoch": 0.5629288386774401, "grad_norm": 2.290616989135742, "learning_rate": 2.3004490057729314e-06, "loss": 0.8115, "step": 46190 }, { "epoch": 0.562989774901588, "grad_norm": 1.917252779006958, "learning_rate": 2.300128287363695e-06, "loss": 0.8301, "step": 46195 }, { "epoch": 0.5630507111257358, "grad_norm": 2.0055394172668457, "learning_rate": 2.2998075689544582e-06, "loss": 0.8547, "step": 46200 }, { "epoch": 0.5631116473498836, "grad_norm": 1.9594309329986572, "learning_rate": 2.2994868505452213e-06, "loss": 0.8043, "step": 46205 }, { "epoch": 0.5631725835740314, "grad_norm": 1.8712058067321777, "learning_rate": 2.2991661321359847e-06, "loss": 0.8896, "step": 46210 }, { "epoch": 0.5632335197981793, "grad_norm": 2.8447225093841553, "learning_rate": 2.298845413726748e-06, "loss": 0.8502, "step": 46215 }, { "epoch": 0.5632944560223271, "grad_norm": 2.0340497493743896, "learning_rate": 2.2985246953175116e-06, "loss": 0.8281, "step": 46220 }, { "epoch": 0.5633553922464748, "grad_norm": 1.995797872543335, "learning_rate": 2.2982039769082746e-06, "loss": 0.8994, "step": 46225 }, { "epoch": 0.5634163284706226, "grad_norm": 1.8667889833450317, "learning_rate": 2.297883258499038e-06, "loss": 0.8244, "step": 46230 }, { "epoch": 0.5634772646947704, "grad_norm": 2.076775074005127, "learning_rate": 2.2975625400898015e-06, "loss": 0.7396, "step": 46235 }, { "epoch": 0.5635382009189183, "grad_norm": 2.036071538925171, "learning_rate": 2.297241821680565e-06, "loss": 0.7742, "step": 46240 }, { "epoch": 0.5635991371430661, "grad_norm": 2.0028903484344482, "learning_rate": 2.296921103271328e-06, "loss": 0.7854, "step": 46245 }, { "epoch": 0.5636600733672139, "grad_norm": 1.959503412246704, "learning_rate": 2.2966003848620914e-06, "loss": 0.9296, "step": 46250 }, { "epoch": 0.5637210095913617, "grad_norm": 2.1580963134765625, "learning_rate": 2.296279666452855e-06, "loss": 0.7588, "step": 46255 }, { "epoch": 0.5637819458155094, "grad_norm": 1.7939701080322266, "learning_rate": 2.295958948043618e-06, "loss": 0.8662, "step": 46260 }, { "epoch": 0.5638428820396573, "grad_norm": 2.2166430950164795, "learning_rate": 2.2956382296343813e-06, "loss": 0.7778, "step": 46265 }, { "epoch": 0.5639038182638051, "grad_norm": 2.061724901199341, "learning_rate": 2.2953175112251443e-06, "loss": 0.8177, "step": 46270 }, { "epoch": 0.5639647544879529, "grad_norm": 1.862247347831726, "learning_rate": 2.2949967928159077e-06, "loss": 0.8582, "step": 46275 }, { "epoch": 0.5640256907121007, "grad_norm": 1.9684573411941528, "learning_rate": 2.294676074406671e-06, "loss": 0.8263, "step": 46280 }, { "epoch": 0.5640866269362486, "grad_norm": 2.0299015045166016, "learning_rate": 2.294355355997434e-06, "loss": 0.8305, "step": 46285 }, { "epoch": 0.5641475631603964, "grad_norm": 1.6387616395950317, "learning_rate": 2.2940346375881976e-06, "loss": 0.8207, "step": 46290 }, { "epoch": 0.5642084993845441, "grad_norm": 2.0412232875823975, "learning_rate": 2.293713919178961e-06, "loss": 0.854, "step": 46295 }, { "epoch": 0.5642694356086919, "grad_norm": 1.8202550411224365, "learning_rate": 2.2933932007697245e-06, "loss": 0.8807, "step": 46300 }, { "epoch": 0.5643303718328397, "grad_norm": 1.8717809915542603, "learning_rate": 2.293072482360488e-06, "loss": 0.9079, "step": 46305 }, { "epoch": 0.5643913080569876, "grad_norm": 2.530818462371826, "learning_rate": 2.292751763951251e-06, "loss": 0.8508, "step": 46310 }, { "epoch": 0.5644522442811354, "grad_norm": 1.8676635026931763, "learning_rate": 2.2924310455420144e-06, "loss": 0.8224, "step": 46315 }, { "epoch": 0.5645131805052832, "grad_norm": 1.8057811260223389, "learning_rate": 2.292110327132778e-06, "loss": 0.9111, "step": 46320 }, { "epoch": 0.564574116729431, "grad_norm": 2.041585683822632, "learning_rate": 2.291789608723541e-06, "loss": 0.8269, "step": 46325 }, { "epoch": 0.5646350529535787, "grad_norm": 2.151745319366455, "learning_rate": 2.2914688903143043e-06, "loss": 0.895, "step": 46330 }, { "epoch": 0.5646959891777266, "grad_norm": 1.7347826957702637, "learning_rate": 2.2911481719050677e-06, "loss": 0.836, "step": 46335 }, { "epoch": 0.5647569254018744, "grad_norm": 2.4861552715301514, "learning_rate": 2.2908274534958308e-06, "loss": 0.9132, "step": 46340 }, { "epoch": 0.5648178616260222, "grad_norm": 2.175511121749878, "learning_rate": 2.290506735086594e-06, "loss": 0.8206, "step": 46345 }, { "epoch": 0.56487879785017, "grad_norm": 2.2489190101623535, "learning_rate": 2.290186016677357e-06, "loss": 0.8588, "step": 46350 }, { "epoch": 0.5649397340743179, "grad_norm": 2.111039876937866, "learning_rate": 2.2898652982681206e-06, "loss": 0.7933, "step": 46355 }, { "epoch": 0.5650006702984657, "grad_norm": 2.126167058944702, "learning_rate": 2.289544579858884e-06, "loss": 0.7886, "step": 46360 }, { "epoch": 0.5650616065226134, "grad_norm": 2.1345574855804443, "learning_rate": 2.289223861449647e-06, "loss": 0.8865, "step": 46365 }, { "epoch": 0.5651225427467612, "grad_norm": 2.356712818145752, "learning_rate": 2.2889031430404105e-06, "loss": 0.8805, "step": 46370 }, { "epoch": 0.565183478970909, "grad_norm": 2.0466527938842773, "learning_rate": 2.288582424631174e-06, "loss": 0.883, "step": 46375 }, { "epoch": 0.5652444151950569, "grad_norm": 1.8059167861938477, "learning_rate": 2.2882617062219374e-06, "loss": 0.8614, "step": 46380 }, { "epoch": 0.5653053514192047, "grad_norm": 1.8739526271820068, "learning_rate": 2.287940987812701e-06, "loss": 0.8394, "step": 46385 }, { "epoch": 0.5653662876433525, "grad_norm": 2.099910259246826, "learning_rate": 2.287620269403464e-06, "loss": 0.7658, "step": 46390 }, { "epoch": 0.5654272238675003, "grad_norm": 2.2353596687316895, "learning_rate": 2.2872995509942273e-06, "loss": 0.813, "step": 46395 }, { "epoch": 0.565488160091648, "grad_norm": 2.379608392715454, "learning_rate": 2.2869788325849908e-06, "loss": 0.8298, "step": 46400 }, { "epoch": 0.5655490963157959, "grad_norm": 2.3201828002929688, "learning_rate": 2.2866581141757538e-06, "loss": 0.8742, "step": 46405 }, { "epoch": 0.5656100325399437, "grad_norm": 2.141094446182251, "learning_rate": 2.2863373957665172e-06, "loss": 0.8544, "step": 46410 }, { "epoch": 0.5656709687640915, "grad_norm": 2.0215225219726562, "learning_rate": 2.2860166773572807e-06, "loss": 0.8384, "step": 46415 }, { "epoch": 0.5657319049882393, "grad_norm": 2.343435764312744, "learning_rate": 2.2856959589480437e-06, "loss": 0.8667, "step": 46420 }, { "epoch": 0.5657928412123872, "grad_norm": 1.9007800817489624, "learning_rate": 2.285375240538807e-06, "loss": 0.8659, "step": 46425 }, { "epoch": 0.565853777436535, "grad_norm": 2.300551414489746, "learning_rate": 2.28505452212957e-06, "loss": 0.8354, "step": 46430 }, { "epoch": 0.5659147136606827, "grad_norm": 2.0175201892852783, "learning_rate": 2.2847338037203336e-06, "loss": 0.8676, "step": 46435 }, { "epoch": 0.5659756498848305, "grad_norm": 1.740336537361145, "learning_rate": 2.284413085311097e-06, "loss": 0.9211, "step": 46440 }, { "epoch": 0.5660365861089783, "grad_norm": 2.9356815814971924, "learning_rate": 2.2840923669018605e-06, "loss": 0.8971, "step": 46445 }, { "epoch": 0.5660975223331262, "grad_norm": 1.7870770692825317, "learning_rate": 2.2837716484926235e-06, "loss": 0.7658, "step": 46450 }, { "epoch": 0.566158458557274, "grad_norm": 1.865891456604004, "learning_rate": 2.283450930083387e-06, "loss": 0.8163, "step": 46455 }, { "epoch": 0.5662193947814218, "grad_norm": 1.9844205379486084, "learning_rate": 2.2831302116741504e-06, "loss": 0.7829, "step": 46460 }, { "epoch": 0.5662803310055696, "grad_norm": 1.9811265468597412, "learning_rate": 2.282809493264914e-06, "loss": 0.8373, "step": 46465 }, { "epoch": 0.5663412672297173, "grad_norm": 1.8060705661773682, "learning_rate": 2.282488774855677e-06, "loss": 0.8684, "step": 46470 }, { "epoch": 0.5664022034538652, "grad_norm": 1.967437505722046, "learning_rate": 2.2821680564464403e-06, "loss": 0.8036, "step": 46475 }, { "epoch": 0.566463139678013, "grad_norm": 1.8149967193603516, "learning_rate": 2.2818473380372037e-06, "loss": 0.8961, "step": 46480 }, { "epoch": 0.5665240759021608, "grad_norm": 1.7696951627731323, "learning_rate": 2.2815266196279667e-06, "loss": 0.8087, "step": 46485 }, { "epoch": 0.5665850121263086, "grad_norm": 1.7125446796417236, "learning_rate": 2.28120590121873e-06, "loss": 0.7925, "step": 46490 }, { "epoch": 0.5666459483504565, "grad_norm": 2.1699318885803223, "learning_rate": 2.2808851828094936e-06, "loss": 0.8385, "step": 46495 }, { "epoch": 0.5667068845746042, "grad_norm": 2.1030702590942383, "learning_rate": 2.2805644644002566e-06, "loss": 0.8186, "step": 46500 }, { "epoch": 0.566767820798752, "grad_norm": 1.8917150497436523, "learning_rate": 2.28024374599102e-06, "loss": 0.8254, "step": 46505 }, { "epoch": 0.5668287570228998, "grad_norm": 1.968541145324707, "learning_rate": 2.279923027581783e-06, "loss": 0.8264, "step": 46510 }, { "epoch": 0.5668896932470476, "grad_norm": 1.8344743251800537, "learning_rate": 2.2796023091725465e-06, "loss": 0.7986, "step": 46515 }, { "epoch": 0.5669506294711955, "grad_norm": 1.7767547369003296, "learning_rate": 2.27928159076331e-06, "loss": 0.8689, "step": 46520 }, { "epoch": 0.5670115656953433, "grad_norm": 2.311560869216919, "learning_rate": 2.2789608723540734e-06, "loss": 0.8386, "step": 46525 }, { "epoch": 0.5670725019194911, "grad_norm": 1.9315528869628906, "learning_rate": 2.278640153944837e-06, "loss": 0.8127, "step": 46530 }, { "epoch": 0.5671334381436388, "grad_norm": 1.8654605150222778, "learning_rate": 2.2783194355356e-06, "loss": 0.8861, "step": 46535 }, { "epoch": 0.5671943743677866, "grad_norm": 1.9708895683288574, "learning_rate": 2.2779987171263633e-06, "loss": 0.7698, "step": 46540 }, { "epoch": 0.5672553105919345, "grad_norm": 1.8565754890441895, "learning_rate": 2.2776779987171267e-06, "loss": 0.8305, "step": 46545 }, { "epoch": 0.5673162468160823, "grad_norm": 2.1865856647491455, "learning_rate": 2.27735728030789e-06, "loss": 0.8099, "step": 46550 }, { "epoch": 0.5673771830402301, "grad_norm": 2.2691667079925537, "learning_rate": 2.277036561898653e-06, "loss": 0.7286, "step": 46555 }, { "epoch": 0.5674381192643779, "grad_norm": 2.1157052516937256, "learning_rate": 2.2767158434894166e-06, "loss": 0.7812, "step": 46560 }, { "epoch": 0.5674990554885257, "grad_norm": 2.001523017883301, "learning_rate": 2.2763951250801796e-06, "loss": 0.8089, "step": 46565 }, { "epoch": 0.5675599917126735, "grad_norm": 1.8953379392623901, "learning_rate": 2.276074406670943e-06, "loss": 0.8103, "step": 46570 }, { "epoch": 0.5676209279368213, "grad_norm": 1.8938158750534058, "learning_rate": 2.2757536882617065e-06, "loss": 0.8979, "step": 46575 }, { "epoch": 0.5676818641609691, "grad_norm": 2.01163911819458, "learning_rate": 2.2754329698524695e-06, "loss": 0.8662, "step": 46580 }, { "epoch": 0.5677428003851169, "grad_norm": 2.0067007541656494, "learning_rate": 2.275112251443233e-06, "loss": 0.7916, "step": 46585 }, { "epoch": 0.5678037366092648, "grad_norm": 2.2273194789886475, "learning_rate": 2.2747915330339964e-06, "loss": 0.8444, "step": 46590 }, { "epoch": 0.5678646728334126, "grad_norm": 2.020576000213623, "learning_rate": 2.2744708146247594e-06, "loss": 0.8085, "step": 46595 }, { "epoch": 0.5679256090575604, "grad_norm": 1.8877657651901245, "learning_rate": 2.274150096215523e-06, "loss": 0.8609, "step": 46600 }, { "epoch": 0.5679865452817081, "grad_norm": 1.7087597846984863, "learning_rate": 2.2738293778062863e-06, "loss": 0.8615, "step": 46605 }, { "epoch": 0.5680474815058559, "grad_norm": 1.8873320817947388, "learning_rate": 2.2735086593970498e-06, "loss": 0.7925, "step": 46610 }, { "epoch": 0.5681084177300038, "grad_norm": 1.9817808866500854, "learning_rate": 2.2731879409878128e-06, "loss": 0.8218, "step": 46615 }, { "epoch": 0.5681693539541516, "grad_norm": 2.0957753658294678, "learning_rate": 2.272867222578576e-06, "loss": 0.856, "step": 46620 }, { "epoch": 0.5682302901782994, "grad_norm": 2.2385830879211426, "learning_rate": 2.2725465041693396e-06, "loss": 0.8115, "step": 46625 }, { "epoch": 0.5682912264024472, "grad_norm": 1.8160760402679443, "learning_rate": 2.272225785760103e-06, "loss": 0.8605, "step": 46630 }, { "epoch": 0.568352162626595, "grad_norm": 2.168236494064331, "learning_rate": 2.271905067350866e-06, "loss": 0.8297, "step": 46635 }, { "epoch": 0.5684130988507428, "grad_norm": 1.9635993242263794, "learning_rate": 2.2715843489416295e-06, "loss": 0.8265, "step": 46640 }, { "epoch": 0.5684740350748906, "grad_norm": 1.974095106124878, "learning_rate": 2.2712636305323926e-06, "loss": 0.8341, "step": 46645 }, { "epoch": 0.5685349712990384, "grad_norm": 1.9961345195770264, "learning_rate": 2.270942912123156e-06, "loss": 0.8531, "step": 46650 }, { "epoch": 0.5685959075231862, "grad_norm": 1.824700951576233, "learning_rate": 2.2706221937139194e-06, "loss": 0.8357, "step": 46655 }, { "epoch": 0.568656843747334, "grad_norm": 2.0215470790863037, "learning_rate": 2.2703014753046825e-06, "loss": 0.8918, "step": 46660 }, { "epoch": 0.5687177799714819, "grad_norm": 1.8589445352554321, "learning_rate": 2.269980756895446e-06, "loss": 0.8129, "step": 46665 }, { "epoch": 0.5687787161956297, "grad_norm": 1.9964656829833984, "learning_rate": 2.2696600384862093e-06, "loss": 0.7762, "step": 46670 }, { "epoch": 0.5688396524197774, "grad_norm": 2.010902166366577, "learning_rate": 2.2693393200769724e-06, "loss": 0.8178, "step": 46675 }, { "epoch": 0.5689005886439252, "grad_norm": 1.8275527954101562, "learning_rate": 2.269018601667736e-06, "loss": 0.856, "step": 46680 }, { "epoch": 0.568961524868073, "grad_norm": 1.863747477531433, "learning_rate": 2.2686978832584992e-06, "loss": 0.9357, "step": 46685 }, { "epoch": 0.5690224610922209, "grad_norm": 1.6387330293655396, "learning_rate": 2.2683771648492627e-06, "loss": 0.8463, "step": 46690 }, { "epoch": 0.5690833973163687, "grad_norm": 1.9971039295196533, "learning_rate": 2.268056446440026e-06, "loss": 0.8705, "step": 46695 }, { "epoch": 0.5691443335405165, "grad_norm": 1.9459129571914673, "learning_rate": 2.267735728030789e-06, "loss": 0.8177, "step": 46700 }, { "epoch": 0.5692052697646643, "grad_norm": 2.183166027069092, "learning_rate": 2.2674150096215526e-06, "loss": 0.8192, "step": 46705 }, { "epoch": 0.5692662059888121, "grad_norm": 1.9363521337509155, "learning_rate": 2.267094291212316e-06, "loss": 0.7995, "step": 46710 }, { "epoch": 0.5693271422129599, "grad_norm": 1.6907241344451904, "learning_rate": 2.266773572803079e-06, "loss": 0.8217, "step": 46715 }, { "epoch": 0.5693880784371077, "grad_norm": 2.000201463699341, "learning_rate": 2.2664528543938425e-06, "loss": 0.7741, "step": 46720 }, { "epoch": 0.5694490146612555, "grad_norm": 1.8298461437225342, "learning_rate": 2.2661321359846055e-06, "loss": 0.832, "step": 46725 }, { "epoch": 0.5695099508854033, "grad_norm": 1.830040454864502, "learning_rate": 2.265811417575369e-06, "loss": 0.8155, "step": 46730 }, { "epoch": 0.5695708871095512, "grad_norm": 1.9514943361282349, "learning_rate": 2.2654906991661324e-06, "loss": 0.7997, "step": 46735 }, { "epoch": 0.569631823333699, "grad_norm": 1.8308566808700562, "learning_rate": 2.2651699807568954e-06, "loss": 0.8303, "step": 46740 }, { "epoch": 0.5696927595578467, "grad_norm": 2.262059450149536, "learning_rate": 2.264849262347659e-06, "loss": 0.7902, "step": 46745 }, { "epoch": 0.5697536957819945, "grad_norm": 2.2694900035858154, "learning_rate": 2.2645285439384223e-06, "loss": 0.8063, "step": 46750 }, { "epoch": 0.5698146320061424, "grad_norm": 1.5728579759597778, "learning_rate": 2.2642078255291857e-06, "loss": 0.8475, "step": 46755 }, { "epoch": 0.5698755682302902, "grad_norm": 1.8736475706100464, "learning_rate": 2.2638871071199487e-06, "loss": 0.8041, "step": 46760 }, { "epoch": 0.569936504454438, "grad_norm": 1.9677778482437134, "learning_rate": 2.263566388710712e-06, "loss": 0.8501, "step": 46765 }, { "epoch": 0.5699974406785858, "grad_norm": 2.1361734867095947, "learning_rate": 2.2632456703014756e-06, "loss": 0.85, "step": 46770 }, { "epoch": 0.5700583769027336, "grad_norm": 1.9978231191635132, "learning_rate": 2.262924951892239e-06, "loss": 0.8403, "step": 46775 }, { "epoch": 0.5701193131268814, "grad_norm": 2.3618991374969482, "learning_rate": 2.262604233483002e-06, "loss": 0.7774, "step": 46780 }, { "epoch": 0.5701802493510292, "grad_norm": 1.8087958097457886, "learning_rate": 2.2622835150737655e-06, "loss": 0.8601, "step": 46785 }, { "epoch": 0.570241185575177, "grad_norm": 2.0303378105163574, "learning_rate": 2.261962796664529e-06, "loss": 0.8251, "step": 46790 }, { "epoch": 0.5703021217993248, "grad_norm": 2.199971914291382, "learning_rate": 2.261642078255292e-06, "loss": 0.864, "step": 46795 }, { "epoch": 0.5703630580234726, "grad_norm": 1.8235567808151245, "learning_rate": 2.2613213598460554e-06, "loss": 0.8269, "step": 46800 }, { "epoch": 0.5704239942476205, "grad_norm": 2.4812655448913574, "learning_rate": 2.2610006414368184e-06, "loss": 0.8658, "step": 46805 }, { "epoch": 0.5704849304717683, "grad_norm": 2.022707223892212, "learning_rate": 2.260679923027582e-06, "loss": 0.7791, "step": 46810 }, { "epoch": 0.570545866695916, "grad_norm": 1.9376939535140991, "learning_rate": 2.2603592046183453e-06, "loss": 0.8552, "step": 46815 }, { "epoch": 0.5706068029200638, "grad_norm": 1.7951838970184326, "learning_rate": 2.2600384862091083e-06, "loss": 0.7726, "step": 46820 }, { "epoch": 0.5706677391442116, "grad_norm": 1.861332893371582, "learning_rate": 2.2597177677998718e-06, "loss": 0.8177, "step": 46825 }, { "epoch": 0.5707286753683595, "grad_norm": 1.9197980165481567, "learning_rate": 2.259397049390635e-06, "loss": 0.879, "step": 46830 }, { "epoch": 0.5707896115925073, "grad_norm": 1.8239407539367676, "learning_rate": 2.2590763309813986e-06, "loss": 0.8495, "step": 46835 }, { "epoch": 0.5708505478166551, "grad_norm": 2.011939764022827, "learning_rate": 2.2587556125721617e-06, "loss": 0.8512, "step": 46840 }, { "epoch": 0.5709114840408029, "grad_norm": 2.190605640411377, "learning_rate": 2.258434894162925e-06, "loss": 0.7687, "step": 46845 }, { "epoch": 0.5709724202649507, "grad_norm": 2.0564942359924316, "learning_rate": 2.2581141757536885e-06, "loss": 0.8728, "step": 46850 }, { "epoch": 0.5710333564890985, "grad_norm": 2.443004608154297, "learning_rate": 2.257793457344452e-06, "loss": 0.8792, "step": 46855 }, { "epoch": 0.5710942927132463, "grad_norm": 2.226377487182617, "learning_rate": 2.257472738935215e-06, "loss": 0.808, "step": 46860 }, { "epoch": 0.5711552289373941, "grad_norm": 2.3630199432373047, "learning_rate": 2.2571520205259784e-06, "loss": 0.8347, "step": 46865 }, { "epoch": 0.5712161651615419, "grad_norm": 1.8423051834106445, "learning_rate": 2.256831302116742e-06, "loss": 0.9657, "step": 46870 }, { "epoch": 0.5712771013856898, "grad_norm": 1.8526979684829712, "learning_rate": 2.256510583707505e-06, "loss": 0.8028, "step": 46875 }, { "epoch": 0.5713380376098376, "grad_norm": 1.932946801185608, "learning_rate": 2.2561898652982683e-06, "loss": 0.8761, "step": 46880 }, { "epoch": 0.5713989738339853, "grad_norm": 1.853652000427246, "learning_rate": 2.2558691468890318e-06, "loss": 0.8295, "step": 46885 }, { "epoch": 0.5714599100581331, "grad_norm": 1.7751116752624512, "learning_rate": 2.2555484284797948e-06, "loss": 0.9092, "step": 46890 }, { "epoch": 0.571520846282281, "grad_norm": 1.9651793241500854, "learning_rate": 2.2552277100705582e-06, "loss": 0.8603, "step": 46895 }, { "epoch": 0.5715817825064288, "grad_norm": 1.7361146211624146, "learning_rate": 2.2549069916613212e-06, "loss": 0.7742, "step": 46900 }, { "epoch": 0.5716427187305766, "grad_norm": 1.9053022861480713, "learning_rate": 2.2545862732520847e-06, "loss": 0.7608, "step": 46905 }, { "epoch": 0.5717036549547244, "grad_norm": 1.6853506565093994, "learning_rate": 2.254265554842848e-06, "loss": 0.7995, "step": 46910 }, { "epoch": 0.5717645911788722, "grad_norm": 1.9526848793029785, "learning_rate": 2.2539448364336116e-06, "loss": 0.8366, "step": 46915 }, { "epoch": 0.57182552740302, "grad_norm": 1.9302555322647095, "learning_rate": 2.253624118024375e-06, "loss": 0.8065, "step": 46920 }, { "epoch": 0.5718864636271678, "grad_norm": 1.7620511054992676, "learning_rate": 2.253303399615138e-06, "loss": 0.7866, "step": 46925 }, { "epoch": 0.5719473998513156, "grad_norm": 1.6860451698303223, "learning_rate": 2.2529826812059015e-06, "loss": 0.8785, "step": 46930 }, { "epoch": 0.5720083360754634, "grad_norm": 1.9931939840316772, "learning_rate": 2.252661962796665e-06, "loss": 0.8763, "step": 46935 }, { "epoch": 0.5720692722996112, "grad_norm": 2.0207600593566895, "learning_rate": 2.252341244387428e-06, "loss": 0.8033, "step": 46940 }, { "epoch": 0.5721302085237591, "grad_norm": 2.0463786125183105, "learning_rate": 2.2520205259781914e-06, "loss": 0.8542, "step": 46945 }, { "epoch": 0.5721911447479069, "grad_norm": 2.2896475791931152, "learning_rate": 2.251699807568955e-06, "loss": 0.9288, "step": 46950 }, { "epoch": 0.5722520809720546, "grad_norm": 2.0175085067749023, "learning_rate": 2.251379089159718e-06, "loss": 0.9429, "step": 46955 }, { "epoch": 0.5723130171962024, "grad_norm": 1.9715676307678223, "learning_rate": 2.2510583707504813e-06, "loss": 0.7155, "step": 46960 }, { "epoch": 0.5723739534203502, "grad_norm": 2.138716697692871, "learning_rate": 2.2507376523412447e-06, "loss": 0.8424, "step": 46965 }, { "epoch": 0.5724348896444981, "grad_norm": 1.9950231313705444, "learning_rate": 2.2504169339320077e-06, "loss": 0.8209, "step": 46970 }, { "epoch": 0.5724958258686459, "grad_norm": 1.878995656967163, "learning_rate": 2.250096215522771e-06, "loss": 0.8425, "step": 46975 }, { "epoch": 0.5725567620927937, "grad_norm": 1.688011646270752, "learning_rate": 2.249775497113534e-06, "loss": 0.7556, "step": 46980 }, { "epoch": 0.5726176983169415, "grad_norm": 1.770126461982727, "learning_rate": 2.2494547787042976e-06, "loss": 0.8773, "step": 46985 }, { "epoch": 0.5726786345410892, "grad_norm": 1.9343688488006592, "learning_rate": 2.249134060295061e-06, "loss": 0.8847, "step": 46990 }, { "epoch": 0.5727395707652371, "grad_norm": 2.2087414264678955, "learning_rate": 2.2488133418858245e-06, "loss": 0.8959, "step": 46995 }, { "epoch": 0.5728005069893849, "grad_norm": 2.166537284851074, "learning_rate": 2.248492623476588e-06, "loss": 0.8751, "step": 47000 }, { "epoch": 0.5728614432135327, "grad_norm": 1.8498363494873047, "learning_rate": 2.2481719050673514e-06, "loss": 0.8367, "step": 47005 }, { "epoch": 0.5729223794376805, "grad_norm": 2.030846118927002, "learning_rate": 2.2478511866581144e-06, "loss": 0.745, "step": 47010 }, { "epoch": 0.5729833156618284, "grad_norm": 2.0171289443969727, "learning_rate": 2.247530468248878e-06, "loss": 0.7367, "step": 47015 }, { "epoch": 0.5730442518859762, "grad_norm": 1.8859187364578247, "learning_rate": 2.247209749839641e-06, "loss": 0.8541, "step": 47020 }, { "epoch": 0.5731051881101239, "grad_norm": 2.0178492069244385, "learning_rate": 2.2468890314304043e-06, "loss": 0.8509, "step": 47025 }, { "epoch": 0.5731661243342717, "grad_norm": 2.0977041721343994, "learning_rate": 2.2465683130211677e-06, "loss": 0.822, "step": 47030 }, { "epoch": 0.5732270605584195, "grad_norm": 1.8739315271377563, "learning_rate": 2.2462475946119307e-06, "loss": 0.8349, "step": 47035 }, { "epoch": 0.5732879967825674, "grad_norm": 2.1766788959503174, "learning_rate": 2.245926876202694e-06, "loss": 0.9613, "step": 47040 }, { "epoch": 0.5733489330067152, "grad_norm": 1.921674132347107, "learning_rate": 2.2456061577934576e-06, "loss": 0.8086, "step": 47045 }, { "epoch": 0.573409869230863, "grad_norm": 2.2979559898376465, "learning_rate": 2.2452854393842206e-06, "loss": 0.9016, "step": 47050 }, { "epoch": 0.5734708054550108, "grad_norm": 1.975894570350647, "learning_rate": 2.244964720974984e-06, "loss": 0.8117, "step": 47055 }, { "epoch": 0.5735317416791585, "grad_norm": 1.8237676620483398, "learning_rate": 2.2446440025657475e-06, "loss": 0.8504, "step": 47060 }, { "epoch": 0.5735926779033064, "grad_norm": 1.6467922925949097, "learning_rate": 2.2443232841565105e-06, "loss": 0.801, "step": 47065 }, { "epoch": 0.5736536141274542, "grad_norm": 2.0245041847229004, "learning_rate": 2.244002565747274e-06, "loss": 0.8429, "step": 47070 }, { "epoch": 0.573714550351602, "grad_norm": 1.8421663045883179, "learning_rate": 2.2436818473380374e-06, "loss": 0.8135, "step": 47075 }, { "epoch": 0.5737754865757498, "grad_norm": 1.8572884798049927, "learning_rate": 2.243361128928801e-06, "loss": 0.8764, "step": 47080 }, { "epoch": 0.5738364227998977, "grad_norm": 1.6472606658935547, "learning_rate": 2.2430404105195643e-06, "loss": 0.8374, "step": 47085 }, { "epoch": 0.5738973590240455, "grad_norm": 2.0874061584472656, "learning_rate": 2.2427196921103273e-06, "loss": 0.864, "step": 47090 }, { "epoch": 0.5739582952481932, "grad_norm": 1.9083690643310547, "learning_rate": 2.2423989737010908e-06, "loss": 0.8294, "step": 47095 }, { "epoch": 0.574019231472341, "grad_norm": 2.3386428356170654, "learning_rate": 2.2420782552918538e-06, "loss": 0.7647, "step": 47100 }, { "epoch": 0.5740801676964888, "grad_norm": 2.0831923484802246, "learning_rate": 2.241757536882617e-06, "loss": 0.9681, "step": 47105 }, { "epoch": 0.5741411039206367, "grad_norm": 2.359963893890381, "learning_rate": 2.2414368184733807e-06, "loss": 0.7585, "step": 47110 }, { "epoch": 0.5742020401447845, "grad_norm": 2.0803136825561523, "learning_rate": 2.2411161000641437e-06, "loss": 0.7982, "step": 47115 }, { "epoch": 0.5742629763689323, "grad_norm": 1.8286765813827515, "learning_rate": 2.240795381654907e-06, "loss": 0.8388, "step": 47120 }, { "epoch": 0.5743239125930801, "grad_norm": 2.1023318767547607, "learning_rate": 2.2404746632456705e-06, "loss": 0.8066, "step": 47125 }, { "epoch": 0.5743848488172278, "grad_norm": 1.9148752689361572, "learning_rate": 2.2401539448364336e-06, "loss": 0.8249, "step": 47130 }, { "epoch": 0.5744457850413757, "grad_norm": 1.8564373254776, "learning_rate": 2.239833226427197e-06, "loss": 0.7633, "step": 47135 }, { "epoch": 0.5745067212655235, "grad_norm": 1.7652723789215088, "learning_rate": 2.2395125080179604e-06, "loss": 0.8182, "step": 47140 }, { "epoch": 0.5745676574896713, "grad_norm": 1.9555878639221191, "learning_rate": 2.239191789608724e-06, "loss": 0.8213, "step": 47145 }, { "epoch": 0.5746285937138191, "grad_norm": 1.914434552192688, "learning_rate": 2.238871071199487e-06, "loss": 0.8554, "step": 47150 }, { "epoch": 0.574689529937967, "grad_norm": 2.1274077892303467, "learning_rate": 2.2385503527902503e-06, "loss": 0.7578, "step": 47155 }, { "epoch": 0.5747504661621148, "grad_norm": 2.1723697185516357, "learning_rate": 2.2382296343810138e-06, "loss": 0.821, "step": 47160 }, { "epoch": 0.5748114023862625, "grad_norm": 1.842780351638794, "learning_rate": 2.2379089159717772e-06, "loss": 0.8191, "step": 47165 }, { "epoch": 0.5748723386104103, "grad_norm": 1.9004045724868774, "learning_rate": 2.2375881975625402e-06, "loss": 0.8963, "step": 47170 }, { "epoch": 0.5749332748345581, "grad_norm": 1.838196873664856, "learning_rate": 2.2372674791533037e-06, "loss": 0.8201, "step": 47175 }, { "epoch": 0.574994211058706, "grad_norm": 1.955945372581482, "learning_rate": 2.236946760744067e-06, "loss": 0.8565, "step": 47180 }, { "epoch": 0.5750551472828538, "grad_norm": 2.3400254249572754, "learning_rate": 2.23662604233483e-06, "loss": 0.8736, "step": 47185 }, { "epoch": 0.5751160835070016, "grad_norm": 1.8657890558242798, "learning_rate": 2.2363053239255936e-06, "loss": 0.7912, "step": 47190 }, { "epoch": 0.5751770197311494, "grad_norm": 2.0548813343048096, "learning_rate": 2.2359846055163566e-06, "loss": 0.8507, "step": 47195 }, { "epoch": 0.5752379559552971, "grad_norm": 1.9043481349945068, "learning_rate": 2.23566388710712e-06, "loss": 0.8024, "step": 47200 }, { "epoch": 0.575298892179445, "grad_norm": 2.0938541889190674, "learning_rate": 2.2353431686978835e-06, "loss": 0.8281, "step": 47205 }, { "epoch": 0.5753598284035928, "grad_norm": 1.6476184129714966, "learning_rate": 2.2350224502886465e-06, "loss": 0.8085, "step": 47210 }, { "epoch": 0.5754207646277406, "grad_norm": 1.8270889520645142, "learning_rate": 2.23470173187941e-06, "loss": 0.7562, "step": 47215 }, { "epoch": 0.5754817008518884, "grad_norm": 2.0024240016937256, "learning_rate": 2.2343810134701734e-06, "loss": 0.7394, "step": 47220 }, { "epoch": 0.5755426370760363, "grad_norm": 2.024219512939453, "learning_rate": 2.234060295060937e-06, "loss": 0.8305, "step": 47225 }, { "epoch": 0.5756035733001841, "grad_norm": 2.0654146671295166, "learning_rate": 2.2337395766517003e-06, "loss": 0.8305, "step": 47230 }, { "epoch": 0.5756645095243318, "grad_norm": 1.9824291467666626, "learning_rate": 2.2334188582424633e-06, "loss": 0.8303, "step": 47235 }, { "epoch": 0.5757254457484796, "grad_norm": 2.0320374965667725, "learning_rate": 2.2330981398332267e-06, "loss": 0.8488, "step": 47240 }, { "epoch": 0.5757863819726274, "grad_norm": 1.9795598983764648, "learning_rate": 2.23277742142399e-06, "loss": 0.8502, "step": 47245 }, { "epoch": 0.5758473181967753, "grad_norm": 1.8377946615219116, "learning_rate": 2.232456703014753e-06, "loss": 0.8015, "step": 47250 }, { "epoch": 0.5759082544209231, "grad_norm": 2.112751007080078, "learning_rate": 2.2321359846055166e-06, "loss": 0.8333, "step": 47255 }, { "epoch": 0.5759691906450709, "grad_norm": 1.96861732006073, "learning_rate": 2.23181526619628e-06, "loss": 0.8762, "step": 47260 }, { "epoch": 0.5760301268692187, "grad_norm": 1.738682746887207, "learning_rate": 2.231494547787043e-06, "loss": 0.8545, "step": 47265 }, { "epoch": 0.5760910630933664, "grad_norm": 2.0450868606567383, "learning_rate": 2.2311738293778065e-06, "loss": 0.7853, "step": 47270 }, { "epoch": 0.5761519993175143, "grad_norm": 2.089764356613159, "learning_rate": 2.2308531109685695e-06, "loss": 0.8228, "step": 47275 }, { "epoch": 0.5762129355416621, "grad_norm": 1.6390517950057983, "learning_rate": 2.230532392559333e-06, "loss": 0.7961, "step": 47280 }, { "epoch": 0.5762738717658099, "grad_norm": 1.8355305194854736, "learning_rate": 2.2302116741500964e-06, "loss": 0.8118, "step": 47285 }, { "epoch": 0.5763348079899577, "grad_norm": 1.9058408737182617, "learning_rate": 2.2298909557408594e-06, "loss": 0.7608, "step": 47290 }, { "epoch": 0.5763957442141056, "grad_norm": 2.055328130722046, "learning_rate": 2.229570237331623e-06, "loss": 0.8407, "step": 47295 }, { "epoch": 0.5764566804382534, "grad_norm": 1.9798731803894043, "learning_rate": 2.2292495189223863e-06, "loss": 0.7571, "step": 47300 }, { "epoch": 0.5765176166624011, "grad_norm": 1.754415512084961, "learning_rate": 2.2289288005131497e-06, "loss": 0.8168, "step": 47305 }, { "epoch": 0.5765785528865489, "grad_norm": 1.9932535886764526, "learning_rate": 2.228608082103913e-06, "loss": 0.7362, "step": 47310 }, { "epoch": 0.5766394891106967, "grad_norm": 1.5491033792495728, "learning_rate": 2.228287363694676e-06, "loss": 0.7896, "step": 47315 }, { "epoch": 0.5767004253348446, "grad_norm": 2.0623703002929688, "learning_rate": 2.2279666452854396e-06, "loss": 0.8031, "step": 47320 }, { "epoch": 0.5767613615589924, "grad_norm": 2.2944705486297607, "learning_rate": 2.227645926876203e-06, "loss": 0.9114, "step": 47325 }, { "epoch": 0.5768222977831402, "grad_norm": 1.934117078781128, "learning_rate": 2.227325208466966e-06, "loss": 0.8613, "step": 47330 }, { "epoch": 0.576883234007288, "grad_norm": 1.6713606119155884, "learning_rate": 2.2270044900577295e-06, "loss": 0.8098, "step": 47335 }, { "epoch": 0.5769441702314357, "grad_norm": 1.8501057624816895, "learning_rate": 2.226683771648493e-06, "loss": 0.7728, "step": 47340 }, { "epoch": 0.5770051064555836, "grad_norm": 2.007781505584717, "learning_rate": 2.226363053239256e-06, "loss": 0.8503, "step": 47345 }, { "epoch": 0.5770660426797314, "grad_norm": 2.0234384536743164, "learning_rate": 2.2260423348300194e-06, "loss": 0.7751, "step": 47350 }, { "epoch": 0.5771269789038792, "grad_norm": 1.8817061185836792, "learning_rate": 2.2257216164207824e-06, "loss": 0.8708, "step": 47355 }, { "epoch": 0.577187915128027, "grad_norm": 1.8325806856155396, "learning_rate": 2.225400898011546e-06, "loss": 0.764, "step": 47360 }, { "epoch": 0.5772488513521749, "grad_norm": 1.9466146230697632, "learning_rate": 2.2250801796023093e-06, "loss": 0.7911, "step": 47365 }, { "epoch": 0.5773097875763227, "grad_norm": 1.965692162513733, "learning_rate": 2.2247594611930728e-06, "loss": 0.8449, "step": 47370 }, { "epoch": 0.5773707238004704, "grad_norm": 1.8637094497680664, "learning_rate": 2.2244387427838358e-06, "loss": 0.8356, "step": 47375 }, { "epoch": 0.5774316600246182, "grad_norm": 2.048768997192383, "learning_rate": 2.2241180243745992e-06, "loss": 0.8518, "step": 47380 }, { "epoch": 0.577492596248766, "grad_norm": 2.0186266899108887, "learning_rate": 2.2237973059653627e-06, "loss": 0.9011, "step": 47385 }, { "epoch": 0.5775535324729139, "grad_norm": 1.8164805173873901, "learning_rate": 2.223476587556126e-06, "loss": 0.8015, "step": 47390 }, { "epoch": 0.5776144686970617, "grad_norm": 2.0468575954437256, "learning_rate": 2.223155869146889e-06, "loss": 0.8995, "step": 47395 }, { "epoch": 0.5776754049212095, "grad_norm": 1.8515559434890747, "learning_rate": 2.2228351507376526e-06, "loss": 0.8703, "step": 47400 }, { "epoch": 0.5777363411453573, "grad_norm": 2.0408289432525635, "learning_rate": 2.222514432328416e-06, "loss": 0.7995, "step": 47405 }, { "epoch": 0.577797277369505, "grad_norm": 2.051377773284912, "learning_rate": 2.222193713919179e-06, "loss": 0.7773, "step": 47410 }, { "epoch": 0.5778582135936529, "grad_norm": 2.0462841987609863, "learning_rate": 2.2218729955099425e-06, "loss": 0.8596, "step": 47415 }, { "epoch": 0.5779191498178007, "grad_norm": 2.0481953620910645, "learning_rate": 2.221552277100706e-06, "loss": 0.828, "step": 47420 }, { "epoch": 0.5779800860419485, "grad_norm": 1.959604024887085, "learning_rate": 2.221231558691469e-06, "loss": 0.7999, "step": 47425 }, { "epoch": 0.5780410222660963, "grad_norm": 1.9885107278823853, "learning_rate": 2.2209108402822324e-06, "loss": 0.7557, "step": 47430 }, { "epoch": 0.5781019584902441, "grad_norm": 1.6273484230041504, "learning_rate": 2.220590121872996e-06, "loss": 0.7518, "step": 47435 }, { "epoch": 0.578162894714392, "grad_norm": 1.8373719453811646, "learning_rate": 2.220269403463759e-06, "loss": 0.8379, "step": 47440 }, { "epoch": 0.5782238309385397, "grad_norm": 1.7499154806137085, "learning_rate": 2.2199486850545223e-06, "loss": 0.835, "step": 47445 }, { "epoch": 0.5782847671626875, "grad_norm": 1.960491418838501, "learning_rate": 2.2196279666452857e-06, "loss": 0.8559, "step": 47450 }, { "epoch": 0.5783457033868353, "grad_norm": 1.9043909311294556, "learning_rate": 2.219307248236049e-06, "loss": 0.8267, "step": 47455 }, { "epoch": 0.5784066396109832, "grad_norm": 2.0400452613830566, "learning_rate": 2.218986529826812e-06, "loss": 0.8549, "step": 47460 }, { "epoch": 0.578467575835131, "grad_norm": 1.9763110876083374, "learning_rate": 2.2186658114175756e-06, "loss": 0.7272, "step": 47465 }, { "epoch": 0.5785285120592788, "grad_norm": 1.8999052047729492, "learning_rate": 2.218345093008339e-06, "loss": 0.845, "step": 47470 }, { "epoch": 0.5785894482834265, "grad_norm": 1.733727216720581, "learning_rate": 2.2180243745991025e-06, "loss": 0.7975, "step": 47475 }, { "epoch": 0.5786503845075743, "grad_norm": 1.8920211791992188, "learning_rate": 2.2177036561898655e-06, "loss": 0.8147, "step": 47480 }, { "epoch": 0.5787113207317222, "grad_norm": 2.000849962234497, "learning_rate": 2.217382937780629e-06, "loss": 0.8415, "step": 47485 }, { "epoch": 0.57877225695587, "grad_norm": 1.716424822807312, "learning_rate": 2.217062219371392e-06, "loss": 0.7732, "step": 47490 }, { "epoch": 0.5788331931800178, "grad_norm": 2.079484224319458, "learning_rate": 2.2167415009621554e-06, "loss": 0.8268, "step": 47495 }, { "epoch": 0.5788941294041656, "grad_norm": 1.8641210794448853, "learning_rate": 2.216420782552919e-06, "loss": 0.8113, "step": 47500 }, { "epoch": 0.5789550656283134, "grad_norm": 2.740100145339966, "learning_rate": 2.216100064143682e-06, "loss": 0.8838, "step": 47505 }, { "epoch": 0.5790160018524612, "grad_norm": 1.8019206523895264, "learning_rate": 2.2157793457344453e-06, "loss": 0.8093, "step": 47510 }, { "epoch": 0.579076938076609, "grad_norm": 1.8012003898620605, "learning_rate": 2.2154586273252087e-06, "loss": 0.7965, "step": 47515 }, { "epoch": 0.5791378743007568, "grad_norm": 2.253786563873291, "learning_rate": 2.2151379089159717e-06, "loss": 0.8427, "step": 47520 }, { "epoch": 0.5791988105249046, "grad_norm": 2.2950332164764404, "learning_rate": 2.214817190506735e-06, "loss": 0.8395, "step": 47525 }, { "epoch": 0.5792597467490525, "grad_norm": 2.3005902767181396, "learning_rate": 2.2144964720974986e-06, "loss": 0.7773, "step": 47530 }, { "epoch": 0.5793206829732003, "grad_norm": 1.7432873249053955, "learning_rate": 2.214175753688262e-06, "loss": 0.8741, "step": 47535 }, { "epoch": 0.5793816191973481, "grad_norm": 1.8649972677230835, "learning_rate": 2.213855035279025e-06, "loss": 0.875, "step": 47540 }, { "epoch": 0.5794425554214958, "grad_norm": 1.8104170560836792, "learning_rate": 2.2135343168697885e-06, "loss": 0.8288, "step": 47545 }, { "epoch": 0.5795034916456436, "grad_norm": 1.8259307146072388, "learning_rate": 2.213213598460552e-06, "loss": 0.851, "step": 47550 }, { "epoch": 0.5795644278697915, "grad_norm": 1.878832221031189, "learning_rate": 2.2128928800513154e-06, "loss": 0.8921, "step": 47555 }, { "epoch": 0.5796253640939393, "grad_norm": 1.9053013324737549, "learning_rate": 2.2125721616420784e-06, "loss": 0.849, "step": 47560 }, { "epoch": 0.5796863003180871, "grad_norm": 2.1526682376861572, "learning_rate": 2.212251443232842e-06, "loss": 0.8508, "step": 47565 }, { "epoch": 0.5797472365422349, "grad_norm": 1.9771778583526611, "learning_rate": 2.211930724823605e-06, "loss": 0.7894, "step": 47570 }, { "epoch": 0.5798081727663827, "grad_norm": 1.6250038146972656, "learning_rate": 2.2116100064143683e-06, "loss": 0.7689, "step": 47575 }, { "epoch": 0.5798691089905305, "grad_norm": 1.896871566772461, "learning_rate": 2.2112892880051318e-06, "loss": 0.8612, "step": 47580 }, { "epoch": 0.5799300452146783, "grad_norm": 2.4305341243743896, "learning_rate": 2.2109685695958948e-06, "loss": 0.8347, "step": 47585 }, { "epoch": 0.5799909814388261, "grad_norm": 1.7370744943618774, "learning_rate": 2.2106478511866582e-06, "loss": 0.7808, "step": 47590 }, { "epoch": 0.5800519176629739, "grad_norm": 2.367168664932251, "learning_rate": 2.2103271327774217e-06, "loss": 0.799, "step": 47595 }, { "epoch": 0.5801128538871217, "grad_norm": 2.0895814895629883, "learning_rate": 2.2100064143681847e-06, "loss": 0.8729, "step": 47600 }, { "epoch": 0.5801737901112696, "grad_norm": 1.7973419427871704, "learning_rate": 2.209685695958948e-06, "loss": 0.7821, "step": 47605 }, { "epoch": 0.5802347263354174, "grad_norm": 1.6964384317398071, "learning_rate": 2.2093649775497116e-06, "loss": 0.906, "step": 47610 }, { "epoch": 0.5802956625595651, "grad_norm": 2.3063769340515137, "learning_rate": 2.209044259140475e-06, "loss": 0.8093, "step": 47615 }, { "epoch": 0.5803565987837129, "grad_norm": 1.657038688659668, "learning_rate": 2.2087235407312384e-06, "loss": 0.8851, "step": 47620 }, { "epoch": 0.5804175350078608, "grad_norm": 1.8951719999313354, "learning_rate": 2.2084028223220014e-06, "loss": 0.8606, "step": 47625 }, { "epoch": 0.5804784712320086, "grad_norm": 1.9659771919250488, "learning_rate": 2.208082103912765e-06, "loss": 0.8454, "step": 47630 }, { "epoch": 0.5805394074561564, "grad_norm": 1.7445183992385864, "learning_rate": 2.2077613855035283e-06, "loss": 0.8337, "step": 47635 }, { "epoch": 0.5806003436803042, "grad_norm": 2.044426679611206, "learning_rate": 2.2074406670942913e-06, "loss": 0.8121, "step": 47640 }, { "epoch": 0.580661279904452, "grad_norm": 2.2425789833068848, "learning_rate": 2.2071199486850548e-06, "loss": 0.8678, "step": 47645 }, { "epoch": 0.5807222161285998, "grad_norm": 1.9096548557281494, "learning_rate": 2.206799230275818e-06, "loss": 0.844, "step": 47650 }, { "epoch": 0.5807831523527476, "grad_norm": 1.8057054281234741, "learning_rate": 2.2064785118665812e-06, "loss": 0.7266, "step": 47655 }, { "epoch": 0.5808440885768954, "grad_norm": 1.7670516967773438, "learning_rate": 2.2061577934573447e-06, "loss": 0.7832, "step": 47660 }, { "epoch": 0.5809050248010432, "grad_norm": 1.7588320970535278, "learning_rate": 2.2058370750481077e-06, "loss": 0.8589, "step": 47665 }, { "epoch": 0.580965961025191, "grad_norm": 1.9507850408554077, "learning_rate": 2.205516356638871e-06, "loss": 0.8473, "step": 47670 }, { "epoch": 0.5810268972493389, "grad_norm": 1.7736799716949463, "learning_rate": 2.2051956382296346e-06, "loss": 0.8632, "step": 47675 }, { "epoch": 0.5810878334734867, "grad_norm": 1.9905402660369873, "learning_rate": 2.2048749198203976e-06, "loss": 0.8082, "step": 47680 }, { "epoch": 0.5811487696976344, "grad_norm": 2.050323724746704, "learning_rate": 2.204554201411161e-06, "loss": 0.8104, "step": 47685 }, { "epoch": 0.5812097059217822, "grad_norm": 2.2816526889801025, "learning_rate": 2.2042334830019245e-06, "loss": 0.8068, "step": 47690 }, { "epoch": 0.58127064214593, "grad_norm": 1.8541682958602905, "learning_rate": 2.203912764592688e-06, "loss": 0.7847, "step": 47695 }, { "epoch": 0.5813315783700779, "grad_norm": 1.8056284189224243, "learning_rate": 2.2035920461834514e-06, "loss": 0.7852, "step": 47700 }, { "epoch": 0.5813925145942257, "grad_norm": 2.245718002319336, "learning_rate": 2.2032713277742144e-06, "loss": 0.794, "step": 47705 }, { "epoch": 0.5814534508183735, "grad_norm": 1.9299086332321167, "learning_rate": 2.202950609364978e-06, "loss": 0.8159, "step": 47710 }, { "epoch": 0.5815143870425213, "grad_norm": 1.662161111831665, "learning_rate": 2.2026298909557413e-06, "loss": 0.8083, "step": 47715 }, { "epoch": 0.581575323266669, "grad_norm": 2.0942294597625732, "learning_rate": 2.2023091725465043e-06, "loss": 0.8297, "step": 47720 }, { "epoch": 0.5816362594908169, "grad_norm": 1.6196235418319702, "learning_rate": 2.2019884541372677e-06, "loss": 0.8474, "step": 47725 }, { "epoch": 0.5816971957149647, "grad_norm": 1.8257399797439575, "learning_rate": 2.2016677357280307e-06, "loss": 0.7765, "step": 47730 }, { "epoch": 0.5817581319391125, "grad_norm": 1.9009860754013062, "learning_rate": 2.201347017318794e-06, "loss": 0.8194, "step": 47735 }, { "epoch": 0.5818190681632603, "grad_norm": 2.0163724422454834, "learning_rate": 2.2010262989095576e-06, "loss": 0.8021, "step": 47740 }, { "epoch": 0.5818800043874082, "grad_norm": 2.016103982925415, "learning_rate": 2.2007055805003206e-06, "loss": 0.8251, "step": 47745 }, { "epoch": 0.581940940611556, "grad_norm": 2.3830244541168213, "learning_rate": 2.200384862091084e-06, "loss": 0.9198, "step": 47750 }, { "epoch": 0.5820018768357037, "grad_norm": 1.9028129577636719, "learning_rate": 2.2000641436818475e-06, "loss": 0.7688, "step": 47755 }, { "epoch": 0.5820628130598515, "grad_norm": 2.062317371368408, "learning_rate": 2.199743425272611e-06, "loss": 0.8865, "step": 47760 }, { "epoch": 0.5821237492839993, "grad_norm": 1.8161214590072632, "learning_rate": 2.199422706863374e-06, "loss": 0.9117, "step": 47765 }, { "epoch": 0.5821846855081472, "grad_norm": 1.9047292470932007, "learning_rate": 2.1991019884541374e-06, "loss": 0.8584, "step": 47770 }, { "epoch": 0.582245621732295, "grad_norm": 2.16149640083313, "learning_rate": 2.198781270044901e-06, "loss": 0.8352, "step": 47775 }, { "epoch": 0.5823065579564428, "grad_norm": 2.482001781463623, "learning_rate": 2.1984605516356643e-06, "loss": 0.886, "step": 47780 }, { "epoch": 0.5823674941805906, "grad_norm": 2.1401262283325195, "learning_rate": 2.1981398332264273e-06, "loss": 0.7075, "step": 47785 }, { "epoch": 0.5824284304047384, "grad_norm": 2.2962255477905273, "learning_rate": 2.1978191148171907e-06, "loss": 0.8901, "step": 47790 }, { "epoch": 0.5824893666288862, "grad_norm": 1.9351845979690552, "learning_rate": 2.197498396407954e-06, "loss": 0.84, "step": 47795 }, { "epoch": 0.582550302853034, "grad_norm": 1.8575737476348877, "learning_rate": 2.197177677998717e-06, "loss": 0.8376, "step": 47800 }, { "epoch": 0.5826112390771818, "grad_norm": 2.0164663791656494, "learning_rate": 2.1968569595894806e-06, "loss": 0.8666, "step": 47805 }, { "epoch": 0.5826721753013296, "grad_norm": 1.8157246112823486, "learning_rate": 2.196536241180244e-06, "loss": 0.7763, "step": 47810 }, { "epoch": 0.5827331115254775, "grad_norm": 1.9029147624969482, "learning_rate": 2.196215522771007e-06, "loss": 0.8341, "step": 47815 }, { "epoch": 0.5827940477496253, "grad_norm": 2.1492557525634766, "learning_rate": 2.1958948043617705e-06, "loss": 0.8021, "step": 47820 }, { "epoch": 0.582854983973773, "grad_norm": 2.1153297424316406, "learning_rate": 2.1955740859525336e-06, "loss": 0.85, "step": 47825 }, { "epoch": 0.5829159201979208, "grad_norm": 2.0049831867218018, "learning_rate": 2.195253367543297e-06, "loss": 0.8549, "step": 47830 }, { "epoch": 0.5829768564220686, "grad_norm": 1.9396716356277466, "learning_rate": 2.1949326491340604e-06, "loss": 0.8207, "step": 47835 }, { "epoch": 0.5830377926462165, "grad_norm": 1.9667595624923706, "learning_rate": 2.194611930724824e-06, "loss": 0.8544, "step": 47840 }, { "epoch": 0.5830987288703643, "grad_norm": 1.859063982963562, "learning_rate": 2.1942912123155873e-06, "loss": 0.8084, "step": 47845 }, { "epoch": 0.5831596650945121, "grad_norm": 1.8097535371780396, "learning_rate": 2.1939704939063503e-06, "loss": 0.7982, "step": 47850 }, { "epoch": 0.5832206013186599, "grad_norm": 2.12465763092041, "learning_rate": 2.1936497754971138e-06, "loss": 0.8297, "step": 47855 }, { "epoch": 0.5832815375428076, "grad_norm": 2.0584909915924072, "learning_rate": 2.1933290570878772e-06, "loss": 0.8397, "step": 47860 }, { "epoch": 0.5833424737669555, "grad_norm": 1.9805301427841187, "learning_rate": 2.1930083386786402e-06, "loss": 0.8255, "step": 47865 }, { "epoch": 0.5834034099911033, "grad_norm": 2.056560754776001, "learning_rate": 2.1926876202694037e-06, "loss": 0.7838, "step": 47870 }, { "epoch": 0.5834643462152511, "grad_norm": 2.116581916809082, "learning_rate": 2.192366901860167e-06, "loss": 0.825, "step": 47875 }, { "epoch": 0.5835252824393989, "grad_norm": 2.217182159423828, "learning_rate": 2.19204618345093e-06, "loss": 0.8054, "step": 47880 }, { "epoch": 0.5835862186635468, "grad_norm": 1.8826680183410645, "learning_rate": 2.1917254650416936e-06, "loss": 0.8903, "step": 47885 }, { "epoch": 0.5836471548876946, "grad_norm": 1.7716206312179565, "learning_rate": 2.191404746632457e-06, "loss": 0.8418, "step": 47890 }, { "epoch": 0.5837080911118423, "grad_norm": 1.7687005996704102, "learning_rate": 2.19108402822322e-06, "loss": 0.7751, "step": 47895 }, { "epoch": 0.5837690273359901, "grad_norm": 1.9099822044372559, "learning_rate": 2.1907633098139835e-06, "loss": 0.8926, "step": 47900 }, { "epoch": 0.5838299635601379, "grad_norm": 1.8877382278442383, "learning_rate": 2.1904425914047465e-06, "loss": 0.8081, "step": 47905 }, { "epoch": 0.5838908997842858, "grad_norm": 1.9422825574874878, "learning_rate": 2.19012187299551e-06, "loss": 0.9118, "step": 47910 }, { "epoch": 0.5839518360084336, "grad_norm": 2.073680877685547, "learning_rate": 2.1898011545862734e-06, "loss": 0.8694, "step": 47915 }, { "epoch": 0.5840127722325814, "grad_norm": 1.9620157480239868, "learning_rate": 2.189480436177037e-06, "loss": 0.7547, "step": 47920 }, { "epoch": 0.5840737084567292, "grad_norm": 2.3266985416412354, "learning_rate": 2.1891597177678002e-06, "loss": 0.86, "step": 47925 }, { "epoch": 0.584134644680877, "grad_norm": 1.813309907913208, "learning_rate": 2.1888389993585637e-06, "loss": 0.8355, "step": 47930 }, { "epoch": 0.5841955809050248, "grad_norm": 1.8635395765304565, "learning_rate": 2.1885182809493267e-06, "loss": 0.8755, "step": 47935 }, { "epoch": 0.5842565171291726, "grad_norm": 1.7341877222061157, "learning_rate": 2.18819756254009e-06, "loss": 0.8125, "step": 47940 }, { "epoch": 0.5843174533533204, "grad_norm": 2.001211643218994, "learning_rate": 2.187876844130853e-06, "loss": 0.8399, "step": 47945 }, { "epoch": 0.5843783895774682, "grad_norm": 1.9632161855697632, "learning_rate": 2.1875561257216166e-06, "loss": 0.7927, "step": 47950 }, { "epoch": 0.5844393258016161, "grad_norm": 1.8996407985687256, "learning_rate": 2.18723540731238e-06, "loss": 0.7376, "step": 47955 }, { "epoch": 0.5845002620257639, "grad_norm": 1.6181706190109253, "learning_rate": 2.186914688903143e-06, "loss": 0.8011, "step": 47960 }, { "epoch": 0.5845611982499116, "grad_norm": 2.671811580657959, "learning_rate": 2.1865939704939065e-06, "loss": 0.8668, "step": 47965 }, { "epoch": 0.5846221344740594, "grad_norm": 1.9630006551742554, "learning_rate": 2.18627325208467e-06, "loss": 0.7761, "step": 47970 }, { "epoch": 0.5846830706982072, "grad_norm": 1.8315750360488892, "learning_rate": 2.185952533675433e-06, "loss": 0.7542, "step": 47975 }, { "epoch": 0.5847440069223551, "grad_norm": 2.127570867538452, "learning_rate": 2.1856318152661964e-06, "loss": 0.8546, "step": 47980 }, { "epoch": 0.5848049431465029, "grad_norm": 2.109947681427002, "learning_rate": 2.18531109685696e-06, "loss": 0.8046, "step": 47985 }, { "epoch": 0.5848658793706507, "grad_norm": 1.781523585319519, "learning_rate": 2.184990378447723e-06, "loss": 0.896, "step": 47990 }, { "epoch": 0.5849268155947985, "grad_norm": 1.9781181812286377, "learning_rate": 2.1846696600384863e-06, "loss": 0.8711, "step": 47995 }, { "epoch": 0.5849877518189462, "grad_norm": 1.885941505432129, "learning_rate": 2.1843489416292497e-06, "loss": 0.8131, "step": 48000 }, { "epoch": 0.5850486880430941, "grad_norm": 1.8197468519210815, "learning_rate": 2.184028223220013e-06, "loss": 0.8466, "step": 48005 }, { "epoch": 0.5851096242672419, "grad_norm": 1.871848464012146, "learning_rate": 2.1837075048107766e-06, "loss": 0.8669, "step": 48010 }, { "epoch": 0.5851705604913897, "grad_norm": 1.9530713558197021, "learning_rate": 2.1833867864015396e-06, "loss": 0.802, "step": 48015 }, { "epoch": 0.5852314967155375, "grad_norm": 1.7627134323120117, "learning_rate": 2.183066067992303e-06, "loss": 0.8454, "step": 48020 }, { "epoch": 0.5852924329396854, "grad_norm": 1.9303723573684692, "learning_rate": 2.182745349583066e-06, "loss": 0.8368, "step": 48025 }, { "epoch": 0.5853533691638332, "grad_norm": 2.575087070465088, "learning_rate": 2.1824246311738295e-06, "loss": 0.8471, "step": 48030 }, { "epoch": 0.5854143053879809, "grad_norm": 1.8722716569900513, "learning_rate": 2.182103912764593e-06, "loss": 0.7882, "step": 48035 }, { "epoch": 0.5854752416121287, "grad_norm": 2.2576255798339844, "learning_rate": 2.181783194355356e-06, "loss": 0.8131, "step": 48040 }, { "epoch": 0.5855361778362765, "grad_norm": 1.995415449142456, "learning_rate": 2.1814624759461194e-06, "loss": 0.8611, "step": 48045 }, { "epoch": 0.5855971140604244, "grad_norm": 1.8833032846450806, "learning_rate": 2.181141757536883e-06, "loss": 0.8005, "step": 48050 }, { "epoch": 0.5856580502845722, "grad_norm": 1.9398304224014282, "learning_rate": 2.180821039127646e-06, "loss": 0.8554, "step": 48055 }, { "epoch": 0.58571898650872, "grad_norm": 2.0272324085235596, "learning_rate": 2.1805003207184093e-06, "loss": 0.8588, "step": 48060 }, { "epoch": 0.5857799227328678, "grad_norm": 1.9503097534179688, "learning_rate": 2.1801796023091728e-06, "loss": 0.8856, "step": 48065 }, { "epoch": 0.5858408589570155, "grad_norm": 1.6682758331298828, "learning_rate": 2.179858883899936e-06, "loss": 0.8054, "step": 48070 }, { "epoch": 0.5859017951811634, "grad_norm": 1.8454949855804443, "learning_rate": 2.1795381654906992e-06, "loss": 0.7708, "step": 48075 }, { "epoch": 0.5859627314053112, "grad_norm": 1.6804901361465454, "learning_rate": 2.1792174470814627e-06, "loss": 0.8224, "step": 48080 }, { "epoch": 0.586023667629459, "grad_norm": 1.8404148817062378, "learning_rate": 2.178896728672226e-06, "loss": 0.8096, "step": 48085 }, { "epoch": 0.5860846038536068, "grad_norm": 2.4447903633117676, "learning_rate": 2.1785760102629895e-06, "loss": 0.8735, "step": 48090 }, { "epoch": 0.5861455400777547, "grad_norm": 1.9291926622390747, "learning_rate": 2.1782552918537526e-06, "loss": 0.7816, "step": 48095 }, { "epoch": 0.5862064763019025, "grad_norm": 2.1582324504852295, "learning_rate": 2.177934573444516e-06, "loss": 0.8105, "step": 48100 }, { "epoch": 0.5862674125260502, "grad_norm": 1.6295485496520996, "learning_rate": 2.1776138550352794e-06, "loss": 0.7497, "step": 48105 }, { "epoch": 0.586328348750198, "grad_norm": 2.025611639022827, "learning_rate": 2.1772931366260425e-06, "loss": 0.7699, "step": 48110 }, { "epoch": 0.5863892849743458, "grad_norm": 1.7358454465866089, "learning_rate": 2.176972418216806e-06, "loss": 0.8226, "step": 48115 }, { "epoch": 0.5864502211984937, "grad_norm": 1.9216142892837524, "learning_rate": 2.176651699807569e-06, "loss": 0.8445, "step": 48120 }, { "epoch": 0.5865111574226415, "grad_norm": 2.2237870693206787, "learning_rate": 2.1763309813983323e-06, "loss": 0.7635, "step": 48125 }, { "epoch": 0.5865720936467893, "grad_norm": 2.1213362216949463, "learning_rate": 2.176010262989096e-06, "loss": 0.8125, "step": 48130 }, { "epoch": 0.5866330298709371, "grad_norm": 2.4632208347320557, "learning_rate": 2.175689544579859e-06, "loss": 0.7775, "step": 48135 }, { "epoch": 0.5866939660950848, "grad_norm": 1.8925175666809082, "learning_rate": 2.1753688261706222e-06, "loss": 0.7792, "step": 48140 }, { "epoch": 0.5867549023192327, "grad_norm": 1.9245128631591797, "learning_rate": 2.1750481077613857e-06, "loss": 0.785, "step": 48145 }, { "epoch": 0.5868158385433805, "grad_norm": 1.711973786354065, "learning_rate": 2.174727389352149e-06, "loss": 0.8125, "step": 48150 }, { "epoch": 0.5868767747675283, "grad_norm": 1.972066879272461, "learning_rate": 2.1744066709429126e-06, "loss": 0.8919, "step": 48155 }, { "epoch": 0.5869377109916761, "grad_norm": 2.2176544666290283, "learning_rate": 2.1740859525336756e-06, "loss": 0.8384, "step": 48160 }, { "epoch": 0.586998647215824, "grad_norm": 2.0106873512268066, "learning_rate": 2.173765234124439e-06, "loss": 0.8486, "step": 48165 }, { "epoch": 0.5870595834399718, "grad_norm": 1.8593631982803345, "learning_rate": 2.1734445157152025e-06, "loss": 0.7846, "step": 48170 }, { "epoch": 0.5871205196641195, "grad_norm": 1.8427835702896118, "learning_rate": 2.1731237973059655e-06, "loss": 0.8001, "step": 48175 }, { "epoch": 0.5871814558882673, "grad_norm": 2.2189464569091797, "learning_rate": 2.172803078896729e-06, "loss": 0.7905, "step": 48180 }, { "epoch": 0.5872423921124151, "grad_norm": 1.748752236366272, "learning_rate": 2.1724823604874924e-06, "loss": 0.7714, "step": 48185 }, { "epoch": 0.587303328336563, "grad_norm": 2.0280332565307617, "learning_rate": 2.1721616420782554e-06, "loss": 0.8347, "step": 48190 }, { "epoch": 0.5873642645607108, "grad_norm": 1.938143014907837, "learning_rate": 2.171840923669019e-06, "loss": 0.8316, "step": 48195 }, { "epoch": 0.5874252007848586, "grad_norm": 2.555931806564331, "learning_rate": 2.171520205259782e-06, "loss": 0.7984, "step": 48200 }, { "epoch": 0.5874861370090064, "grad_norm": 2.042846441268921, "learning_rate": 2.1711994868505453e-06, "loss": 0.8621, "step": 48205 }, { "epoch": 0.5875470732331541, "grad_norm": 1.9376275539398193, "learning_rate": 2.1708787684413087e-06, "loss": 0.9231, "step": 48210 }, { "epoch": 0.587608009457302, "grad_norm": 1.7918589115142822, "learning_rate": 2.1705580500320717e-06, "loss": 0.8073, "step": 48215 }, { "epoch": 0.5876689456814498, "grad_norm": 4.006993770599365, "learning_rate": 2.170237331622835e-06, "loss": 0.93, "step": 48220 }, { "epoch": 0.5877298819055976, "grad_norm": 1.8284876346588135, "learning_rate": 2.1699166132135986e-06, "loss": 0.8747, "step": 48225 }, { "epoch": 0.5877908181297454, "grad_norm": 1.8115755319595337, "learning_rate": 2.169595894804362e-06, "loss": 0.7718, "step": 48230 }, { "epoch": 0.5878517543538933, "grad_norm": 2.0806539058685303, "learning_rate": 2.1692751763951255e-06, "loss": 0.8752, "step": 48235 }, { "epoch": 0.5879126905780411, "grad_norm": 1.998307228088379, "learning_rate": 2.1689544579858885e-06, "loss": 0.8527, "step": 48240 }, { "epoch": 0.5879736268021888, "grad_norm": 2.1178629398345947, "learning_rate": 2.168633739576652e-06, "loss": 0.8419, "step": 48245 }, { "epoch": 0.5880345630263366, "grad_norm": 1.9024261236190796, "learning_rate": 2.1683130211674154e-06, "loss": 0.8631, "step": 48250 }, { "epoch": 0.5880954992504844, "grad_norm": 2.0403707027435303, "learning_rate": 2.1679923027581784e-06, "loss": 0.8644, "step": 48255 }, { "epoch": 0.5881564354746323, "grad_norm": 1.9892600774765015, "learning_rate": 2.167671584348942e-06, "loss": 0.8053, "step": 48260 }, { "epoch": 0.5882173716987801, "grad_norm": 2.497462272644043, "learning_rate": 2.1673508659397053e-06, "loss": 0.8131, "step": 48265 }, { "epoch": 0.5882783079229279, "grad_norm": 2.1796796321868896, "learning_rate": 2.1670301475304683e-06, "loss": 0.7582, "step": 48270 }, { "epoch": 0.5883392441470757, "grad_norm": 1.7778518199920654, "learning_rate": 2.1667094291212317e-06, "loss": 0.8616, "step": 48275 }, { "epoch": 0.5884001803712234, "grad_norm": 1.9142162799835205, "learning_rate": 2.1663887107119948e-06, "loss": 0.875, "step": 48280 }, { "epoch": 0.5884611165953713, "grad_norm": 2.13497257232666, "learning_rate": 2.166067992302758e-06, "loss": 0.8523, "step": 48285 }, { "epoch": 0.5885220528195191, "grad_norm": 1.9872097969055176, "learning_rate": 2.1657472738935216e-06, "loss": 0.834, "step": 48290 }, { "epoch": 0.5885829890436669, "grad_norm": 1.868018388748169, "learning_rate": 2.165426555484285e-06, "loss": 0.8272, "step": 48295 }, { "epoch": 0.5886439252678147, "grad_norm": 1.8593637943267822, "learning_rate": 2.165105837075048e-06, "loss": 0.8372, "step": 48300 }, { "epoch": 0.5887048614919625, "grad_norm": 2.0264909267425537, "learning_rate": 2.1647851186658115e-06, "loss": 0.7819, "step": 48305 }, { "epoch": 0.5887657977161104, "grad_norm": 1.7492477893829346, "learning_rate": 2.164464400256575e-06, "loss": 0.8724, "step": 48310 }, { "epoch": 0.5888267339402581, "grad_norm": 1.6796696186065674, "learning_rate": 2.1641436818473384e-06, "loss": 0.7789, "step": 48315 }, { "epoch": 0.5888876701644059, "grad_norm": 1.8160591125488281, "learning_rate": 2.1638229634381014e-06, "loss": 0.7889, "step": 48320 }, { "epoch": 0.5889486063885537, "grad_norm": 1.9772765636444092, "learning_rate": 2.163502245028865e-06, "loss": 0.8542, "step": 48325 }, { "epoch": 0.5890095426127016, "grad_norm": 1.9234728813171387, "learning_rate": 2.1631815266196283e-06, "loss": 0.8047, "step": 48330 }, { "epoch": 0.5890704788368494, "grad_norm": 2.1471972465515137, "learning_rate": 2.1628608082103913e-06, "loss": 0.7733, "step": 48335 }, { "epoch": 0.5891314150609972, "grad_norm": 1.8904341459274292, "learning_rate": 2.1625400898011548e-06, "loss": 0.8252, "step": 48340 }, { "epoch": 0.589192351285145, "grad_norm": 2.0045888423919678, "learning_rate": 2.1622193713919182e-06, "loss": 0.8025, "step": 48345 }, { "epoch": 0.5892532875092927, "grad_norm": 1.8750709295272827, "learning_rate": 2.1618986529826812e-06, "loss": 0.8733, "step": 48350 }, { "epoch": 0.5893142237334406, "grad_norm": 2.399587869644165, "learning_rate": 2.1615779345734447e-06, "loss": 0.8616, "step": 48355 }, { "epoch": 0.5893751599575884, "grad_norm": 1.8391740322113037, "learning_rate": 2.161257216164208e-06, "loss": 0.7215, "step": 48360 }, { "epoch": 0.5894360961817362, "grad_norm": 1.9545634984970093, "learning_rate": 2.160936497754971e-06, "loss": 0.7935, "step": 48365 }, { "epoch": 0.589497032405884, "grad_norm": 2.109278440475464, "learning_rate": 2.1606157793457346e-06, "loss": 0.8259, "step": 48370 }, { "epoch": 0.5895579686300318, "grad_norm": 1.9877874851226807, "learning_rate": 2.160295060936498e-06, "loss": 0.7934, "step": 48375 }, { "epoch": 0.5896189048541797, "grad_norm": 2.0200138092041016, "learning_rate": 2.159974342527261e-06, "loss": 0.8147, "step": 48380 }, { "epoch": 0.5896798410783274, "grad_norm": 2.015944242477417, "learning_rate": 2.1596536241180245e-06, "loss": 0.8206, "step": 48385 }, { "epoch": 0.5897407773024752, "grad_norm": 2.0693318843841553, "learning_rate": 2.159332905708788e-06, "loss": 0.8387, "step": 48390 }, { "epoch": 0.589801713526623, "grad_norm": 1.8064793348312378, "learning_rate": 2.1590121872995513e-06, "loss": 0.785, "step": 48395 }, { "epoch": 0.5898626497507709, "grad_norm": 1.7636311054229736, "learning_rate": 2.158691468890315e-06, "loss": 0.7952, "step": 48400 }, { "epoch": 0.5899235859749187, "grad_norm": 1.7567014694213867, "learning_rate": 2.158370750481078e-06, "loss": 0.7943, "step": 48405 }, { "epoch": 0.5899845221990665, "grad_norm": 1.9755300283432007, "learning_rate": 2.1580500320718412e-06, "loss": 0.836, "step": 48410 }, { "epoch": 0.5900454584232142, "grad_norm": 1.9129269123077393, "learning_rate": 2.1577293136626043e-06, "loss": 0.8385, "step": 48415 }, { "epoch": 0.590106394647362, "grad_norm": 1.8172839879989624, "learning_rate": 2.1574085952533677e-06, "loss": 0.8076, "step": 48420 }, { "epoch": 0.5901673308715099, "grad_norm": 2.436785936355591, "learning_rate": 2.157087876844131e-06, "loss": 0.7773, "step": 48425 }, { "epoch": 0.5902282670956577, "grad_norm": 2.1185169219970703, "learning_rate": 2.156767158434894e-06, "loss": 0.7721, "step": 48430 }, { "epoch": 0.5902892033198055, "grad_norm": 1.5482879877090454, "learning_rate": 2.1564464400256576e-06, "loss": 0.7994, "step": 48435 }, { "epoch": 0.5903501395439533, "grad_norm": 1.713510513305664, "learning_rate": 2.156125721616421e-06, "loss": 0.8318, "step": 48440 }, { "epoch": 0.5904110757681011, "grad_norm": 1.7054438591003418, "learning_rate": 2.155805003207184e-06, "loss": 0.7801, "step": 48445 }, { "epoch": 0.5904720119922489, "grad_norm": 2.065678358078003, "learning_rate": 2.1554842847979475e-06, "loss": 0.8388, "step": 48450 }, { "epoch": 0.5905329482163967, "grad_norm": 1.9923889636993408, "learning_rate": 2.155163566388711e-06, "loss": 0.8498, "step": 48455 }, { "epoch": 0.5905938844405445, "grad_norm": 1.8285467624664307, "learning_rate": 2.1548428479794744e-06, "loss": 0.8515, "step": 48460 }, { "epoch": 0.5906548206646923, "grad_norm": 2.1745893955230713, "learning_rate": 2.1545221295702374e-06, "loss": 0.8322, "step": 48465 }, { "epoch": 0.5907157568888401, "grad_norm": 2.3330235481262207, "learning_rate": 2.154201411161001e-06, "loss": 0.9331, "step": 48470 }, { "epoch": 0.590776693112988, "grad_norm": 1.7495886087417603, "learning_rate": 2.1538806927517643e-06, "loss": 0.8833, "step": 48475 }, { "epoch": 0.5908376293371358, "grad_norm": 1.9329882860183716, "learning_rate": 2.1535599743425277e-06, "loss": 0.7849, "step": 48480 }, { "epoch": 0.5908985655612835, "grad_norm": 2.032027244567871, "learning_rate": 2.1532392559332907e-06, "loss": 0.8597, "step": 48485 }, { "epoch": 0.5909595017854313, "grad_norm": 2.0941081047058105, "learning_rate": 2.152918537524054e-06, "loss": 0.8318, "step": 48490 }, { "epoch": 0.5910204380095792, "grad_norm": 1.8783940076828003, "learning_rate": 2.152597819114817e-06, "loss": 0.8265, "step": 48495 }, { "epoch": 0.591081374233727, "grad_norm": 1.9724431037902832, "learning_rate": 2.1522771007055806e-06, "loss": 0.7885, "step": 48500 }, { "epoch": 0.5911423104578748, "grad_norm": 1.7794607877731323, "learning_rate": 2.151956382296344e-06, "loss": 0.8103, "step": 48505 }, { "epoch": 0.5912032466820226, "grad_norm": 1.9528734683990479, "learning_rate": 2.151635663887107e-06, "loss": 0.7639, "step": 48510 }, { "epoch": 0.5912641829061704, "grad_norm": 1.7767528295516968, "learning_rate": 2.1513149454778705e-06, "loss": 0.8179, "step": 48515 }, { "epoch": 0.5913251191303182, "grad_norm": 2.081644058227539, "learning_rate": 2.150994227068634e-06, "loss": 0.7156, "step": 48520 }, { "epoch": 0.591386055354466, "grad_norm": 1.9393256902694702, "learning_rate": 2.150673508659397e-06, "loss": 0.8546, "step": 48525 }, { "epoch": 0.5914469915786138, "grad_norm": 1.6791110038757324, "learning_rate": 2.1503527902501604e-06, "loss": 0.8078, "step": 48530 }, { "epoch": 0.5915079278027616, "grad_norm": 1.9245957136154175, "learning_rate": 2.150032071840924e-06, "loss": 0.8053, "step": 48535 }, { "epoch": 0.5915688640269094, "grad_norm": 1.960952877998352, "learning_rate": 2.1497113534316873e-06, "loss": 0.8388, "step": 48540 }, { "epoch": 0.5916298002510573, "grad_norm": 1.9457842111587524, "learning_rate": 2.1493906350224507e-06, "loss": 0.8419, "step": 48545 }, { "epoch": 0.5916907364752051, "grad_norm": 2.773043155670166, "learning_rate": 2.1490699166132138e-06, "loss": 0.8438, "step": 48550 }, { "epoch": 0.5917516726993528, "grad_norm": 2.021402597427368, "learning_rate": 2.148749198203977e-06, "loss": 0.893, "step": 48555 }, { "epoch": 0.5918126089235006, "grad_norm": 1.585318684577942, "learning_rate": 2.1484284797947406e-06, "loss": 0.8988, "step": 48560 }, { "epoch": 0.5918735451476484, "grad_norm": 1.9963452816009521, "learning_rate": 2.1481077613855037e-06, "loss": 0.7982, "step": 48565 }, { "epoch": 0.5919344813717963, "grad_norm": 1.6666964292526245, "learning_rate": 2.147787042976267e-06, "loss": 0.7967, "step": 48570 }, { "epoch": 0.5919954175959441, "grad_norm": 1.8844960927963257, "learning_rate": 2.14746632456703e-06, "loss": 0.798, "step": 48575 }, { "epoch": 0.5920563538200919, "grad_norm": 1.9360896348953247, "learning_rate": 2.1471456061577936e-06, "loss": 0.8394, "step": 48580 }, { "epoch": 0.5921172900442397, "grad_norm": 1.733461856842041, "learning_rate": 2.146824887748557e-06, "loss": 0.89, "step": 48585 }, { "epoch": 0.5921782262683875, "grad_norm": 1.8481173515319824, "learning_rate": 2.14650416933932e-06, "loss": 0.8152, "step": 48590 }, { "epoch": 0.5922391624925353, "grad_norm": 1.9713153839111328, "learning_rate": 2.1461834509300835e-06, "loss": 0.834, "step": 48595 }, { "epoch": 0.5923000987166831, "grad_norm": 1.8751614093780518, "learning_rate": 2.145862732520847e-06, "loss": 0.8333, "step": 48600 }, { "epoch": 0.5923610349408309, "grad_norm": 2.238957405090332, "learning_rate": 2.14554201411161e-06, "loss": 0.8115, "step": 48605 }, { "epoch": 0.5924219711649787, "grad_norm": 1.885645866394043, "learning_rate": 2.1452212957023734e-06, "loss": 0.8053, "step": 48610 }, { "epoch": 0.5924829073891266, "grad_norm": 1.8289486169815063, "learning_rate": 2.144900577293137e-06, "loss": 0.7852, "step": 48615 }, { "epoch": 0.5925438436132744, "grad_norm": 1.8271232843399048, "learning_rate": 2.1445798588839002e-06, "loss": 0.8588, "step": 48620 }, { "epoch": 0.5926047798374221, "grad_norm": 2.034266710281372, "learning_rate": 2.1442591404746637e-06, "loss": 0.8485, "step": 48625 }, { "epoch": 0.5926657160615699, "grad_norm": 1.9707067012786865, "learning_rate": 2.1439384220654267e-06, "loss": 0.863, "step": 48630 }, { "epoch": 0.5927266522857177, "grad_norm": 1.7154055833816528, "learning_rate": 2.14361770365619e-06, "loss": 0.7813, "step": 48635 }, { "epoch": 0.5927875885098656, "grad_norm": 2.1291708946228027, "learning_rate": 2.1432969852469536e-06, "loss": 0.807, "step": 48640 }, { "epoch": 0.5928485247340134, "grad_norm": 2.1943297386169434, "learning_rate": 2.1429762668377166e-06, "loss": 0.8217, "step": 48645 }, { "epoch": 0.5929094609581612, "grad_norm": 1.8205815553665161, "learning_rate": 2.14265554842848e-06, "loss": 0.8103, "step": 48650 }, { "epoch": 0.592970397182309, "grad_norm": 1.8201760053634644, "learning_rate": 2.1423348300192435e-06, "loss": 0.7621, "step": 48655 }, { "epoch": 0.5930313334064568, "grad_norm": 2.642246723175049, "learning_rate": 2.1420141116100065e-06, "loss": 0.927, "step": 48660 }, { "epoch": 0.5930922696306046, "grad_norm": 1.9421666860580444, "learning_rate": 2.14169339320077e-06, "loss": 0.7961, "step": 48665 }, { "epoch": 0.5931532058547524, "grad_norm": 1.9896297454833984, "learning_rate": 2.141372674791533e-06, "loss": 0.8604, "step": 48670 }, { "epoch": 0.5932141420789002, "grad_norm": 2.369932174682617, "learning_rate": 2.1410519563822964e-06, "loss": 0.8302, "step": 48675 }, { "epoch": 0.593275078303048, "grad_norm": 1.9064582586288452, "learning_rate": 2.14073123797306e-06, "loss": 0.8395, "step": 48680 }, { "epoch": 0.5933360145271959, "grad_norm": 1.8932722806930542, "learning_rate": 2.1404105195638233e-06, "loss": 0.8174, "step": 48685 }, { "epoch": 0.5933969507513437, "grad_norm": 2.1079788208007812, "learning_rate": 2.1400898011545863e-06, "loss": 0.8589, "step": 48690 }, { "epoch": 0.5934578869754914, "grad_norm": 1.8694915771484375, "learning_rate": 2.1397690827453497e-06, "loss": 0.8992, "step": 48695 }, { "epoch": 0.5935188231996392, "grad_norm": 1.918027639389038, "learning_rate": 2.139448364336113e-06, "loss": 0.8604, "step": 48700 }, { "epoch": 0.593579759423787, "grad_norm": 1.9862028360366821, "learning_rate": 2.1391276459268766e-06, "loss": 0.8387, "step": 48705 }, { "epoch": 0.5936406956479349, "grad_norm": 1.7005048990249634, "learning_rate": 2.1388069275176396e-06, "loss": 0.855, "step": 48710 }, { "epoch": 0.5937016318720827, "grad_norm": 1.8862971067428589, "learning_rate": 2.138486209108403e-06, "loss": 0.8252, "step": 48715 }, { "epoch": 0.5937625680962305, "grad_norm": 1.824944019317627, "learning_rate": 2.1381654906991665e-06, "loss": 0.8095, "step": 48720 }, { "epoch": 0.5938235043203783, "grad_norm": 2.081313371658325, "learning_rate": 2.1378447722899295e-06, "loss": 0.798, "step": 48725 }, { "epoch": 0.593884440544526, "grad_norm": 1.9084155559539795, "learning_rate": 2.137524053880693e-06, "loss": 0.8475, "step": 48730 }, { "epoch": 0.5939453767686739, "grad_norm": 2.0669004917144775, "learning_rate": 2.1372033354714564e-06, "loss": 0.8395, "step": 48735 }, { "epoch": 0.5940063129928217, "grad_norm": 2.1678988933563232, "learning_rate": 2.1368826170622194e-06, "loss": 0.7797, "step": 48740 }, { "epoch": 0.5940672492169695, "grad_norm": 1.5977274179458618, "learning_rate": 2.136561898652983e-06, "loss": 0.7783, "step": 48745 }, { "epoch": 0.5941281854411173, "grad_norm": 1.8950726985931396, "learning_rate": 2.136241180243746e-06, "loss": 0.8784, "step": 48750 }, { "epoch": 0.5941891216652652, "grad_norm": 2.1130599975585938, "learning_rate": 2.1359204618345093e-06, "loss": 0.8833, "step": 48755 }, { "epoch": 0.594250057889413, "grad_norm": 1.7752162218093872, "learning_rate": 2.1355997434252727e-06, "loss": 0.8303, "step": 48760 }, { "epoch": 0.5943109941135607, "grad_norm": 1.8304150104522705, "learning_rate": 2.135279025016036e-06, "loss": 0.8324, "step": 48765 }, { "epoch": 0.5943719303377085, "grad_norm": 1.8499886989593506, "learning_rate": 2.1349583066067996e-06, "loss": 0.9047, "step": 48770 }, { "epoch": 0.5944328665618563, "grad_norm": 2.0612471103668213, "learning_rate": 2.1346375881975626e-06, "loss": 0.8013, "step": 48775 }, { "epoch": 0.5944938027860042, "grad_norm": 1.9521428346633911, "learning_rate": 2.134316869788326e-06, "loss": 0.7654, "step": 48780 }, { "epoch": 0.594554739010152, "grad_norm": 1.8690049648284912, "learning_rate": 2.1339961513790895e-06, "loss": 0.8439, "step": 48785 }, { "epoch": 0.5946156752342998, "grad_norm": 2.0175602436065674, "learning_rate": 2.1336754329698525e-06, "loss": 0.7443, "step": 48790 }, { "epoch": 0.5946766114584476, "grad_norm": 1.8489738702774048, "learning_rate": 2.133354714560616e-06, "loss": 0.8338, "step": 48795 }, { "epoch": 0.5947375476825953, "grad_norm": 1.9140571355819702, "learning_rate": 2.1330339961513794e-06, "loss": 0.8621, "step": 48800 }, { "epoch": 0.5947984839067432, "grad_norm": 2.1045031547546387, "learning_rate": 2.1327132777421424e-06, "loss": 0.8078, "step": 48805 }, { "epoch": 0.594859420130891, "grad_norm": 2.0937745571136475, "learning_rate": 2.132392559332906e-06, "loss": 0.8259, "step": 48810 }, { "epoch": 0.5949203563550388, "grad_norm": 2.0184922218322754, "learning_rate": 2.1320718409236693e-06, "loss": 0.8759, "step": 48815 }, { "epoch": 0.5949812925791866, "grad_norm": 2.132995367050171, "learning_rate": 2.1317511225144323e-06, "loss": 0.8136, "step": 48820 }, { "epoch": 0.5950422288033345, "grad_norm": 1.9200636148452759, "learning_rate": 2.1314304041051958e-06, "loss": 0.9, "step": 48825 }, { "epoch": 0.5951031650274823, "grad_norm": 1.9131687879562378, "learning_rate": 2.131109685695959e-06, "loss": 0.7811, "step": 48830 }, { "epoch": 0.59516410125163, "grad_norm": 2.615809202194214, "learning_rate": 2.1307889672867222e-06, "loss": 0.8407, "step": 48835 }, { "epoch": 0.5952250374757778, "grad_norm": 1.8819303512573242, "learning_rate": 2.1304682488774857e-06, "loss": 0.794, "step": 48840 }, { "epoch": 0.5952859736999256, "grad_norm": 1.9177591800689697, "learning_rate": 2.130147530468249e-06, "loss": 0.7688, "step": 48845 }, { "epoch": 0.5953469099240735, "grad_norm": 1.8246159553527832, "learning_rate": 2.1298268120590126e-06, "loss": 0.7968, "step": 48850 }, { "epoch": 0.5954078461482213, "grad_norm": 2.0647642612457275, "learning_rate": 2.1295060936497756e-06, "loss": 0.8908, "step": 48855 }, { "epoch": 0.5954687823723691, "grad_norm": 2.1448440551757812, "learning_rate": 2.129185375240539e-06, "loss": 0.8467, "step": 48860 }, { "epoch": 0.5955297185965169, "grad_norm": 1.870600700378418, "learning_rate": 2.1288646568313025e-06, "loss": 0.8474, "step": 48865 }, { "epoch": 0.5955906548206646, "grad_norm": 2.2542123794555664, "learning_rate": 2.1285439384220655e-06, "loss": 0.8319, "step": 48870 }, { "epoch": 0.5956515910448125, "grad_norm": 1.7611441612243652, "learning_rate": 2.128223220012829e-06, "loss": 0.8043, "step": 48875 }, { "epoch": 0.5957125272689603, "grad_norm": 1.9769171476364136, "learning_rate": 2.1279025016035924e-06, "loss": 0.8476, "step": 48880 }, { "epoch": 0.5957734634931081, "grad_norm": 2.106572151184082, "learning_rate": 2.1275817831943554e-06, "loss": 0.8179, "step": 48885 }, { "epoch": 0.5958343997172559, "grad_norm": 2.157602071762085, "learning_rate": 2.127261064785119e-06, "loss": 0.841, "step": 48890 }, { "epoch": 0.5958953359414038, "grad_norm": 1.9414407014846802, "learning_rate": 2.1269403463758822e-06, "loss": 0.7822, "step": 48895 }, { "epoch": 0.5959562721655516, "grad_norm": 2.3080129623413086, "learning_rate": 2.1266196279666453e-06, "loss": 0.8349, "step": 48900 }, { "epoch": 0.5960172083896993, "grad_norm": 1.8429700136184692, "learning_rate": 2.1262989095574087e-06, "loss": 0.8052, "step": 48905 }, { "epoch": 0.5960781446138471, "grad_norm": 2.0106871128082275, "learning_rate": 2.125978191148172e-06, "loss": 0.8571, "step": 48910 }, { "epoch": 0.5961390808379949, "grad_norm": 1.9178744554519653, "learning_rate": 2.125657472738935e-06, "loss": 0.7947, "step": 48915 }, { "epoch": 0.5962000170621428, "grad_norm": 1.8500670194625854, "learning_rate": 2.1253367543296986e-06, "loss": 0.8016, "step": 48920 }, { "epoch": 0.5962609532862906, "grad_norm": 1.7448687553405762, "learning_rate": 2.125016035920462e-06, "loss": 0.8153, "step": 48925 }, { "epoch": 0.5963218895104384, "grad_norm": 2.1368865966796875, "learning_rate": 2.1246953175112255e-06, "loss": 0.8042, "step": 48930 }, { "epoch": 0.5963828257345862, "grad_norm": 2.330148935317993, "learning_rate": 2.124374599101989e-06, "loss": 0.8419, "step": 48935 }, { "epoch": 0.5964437619587339, "grad_norm": 2.03971004486084, "learning_rate": 2.124053880692752e-06, "loss": 0.8294, "step": 48940 }, { "epoch": 0.5965046981828818, "grad_norm": 2.1539013385772705, "learning_rate": 2.1237331622835154e-06, "loss": 0.8051, "step": 48945 }, { "epoch": 0.5965656344070296, "grad_norm": 1.9605557918548584, "learning_rate": 2.123412443874279e-06, "loss": 0.8823, "step": 48950 }, { "epoch": 0.5966265706311774, "grad_norm": 1.8364603519439697, "learning_rate": 2.123091725465042e-06, "loss": 0.7848, "step": 48955 }, { "epoch": 0.5966875068553252, "grad_norm": 1.9448363780975342, "learning_rate": 2.1227710070558053e-06, "loss": 0.792, "step": 48960 }, { "epoch": 0.5967484430794731, "grad_norm": 2.0030791759490967, "learning_rate": 2.1224502886465683e-06, "loss": 0.8023, "step": 48965 }, { "epoch": 0.5968093793036209, "grad_norm": 2.1758553981781006, "learning_rate": 2.1221295702373317e-06, "loss": 0.7829, "step": 48970 }, { "epoch": 0.5968703155277686, "grad_norm": 1.9426404237747192, "learning_rate": 2.121808851828095e-06, "loss": 0.6783, "step": 48975 }, { "epoch": 0.5969312517519164, "grad_norm": 2.1631829738616943, "learning_rate": 2.121488133418858e-06, "loss": 0.8717, "step": 48980 }, { "epoch": 0.5969921879760642, "grad_norm": 1.7509431838989258, "learning_rate": 2.1211674150096216e-06, "loss": 0.798, "step": 48985 }, { "epoch": 0.5970531242002121, "grad_norm": 2.124818801879883, "learning_rate": 2.120846696600385e-06, "loss": 0.8432, "step": 48990 }, { "epoch": 0.5971140604243599, "grad_norm": 1.8382757902145386, "learning_rate": 2.1205259781911485e-06, "loss": 0.8272, "step": 48995 }, { "epoch": 0.5971749966485077, "grad_norm": 1.9252582788467407, "learning_rate": 2.1202052597819115e-06, "loss": 0.7935, "step": 49000 }, { "epoch": 0.5972359328726555, "grad_norm": 2.1255204677581787, "learning_rate": 2.119884541372675e-06, "loss": 0.8453, "step": 49005 }, { "epoch": 0.5972968690968032, "grad_norm": 1.871579885482788, "learning_rate": 2.1195638229634384e-06, "loss": 0.8863, "step": 49010 }, { "epoch": 0.5973578053209511, "grad_norm": 1.9202470779418945, "learning_rate": 2.119243104554202e-06, "loss": 0.8135, "step": 49015 }, { "epoch": 0.5974187415450989, "grad_norm": 1.8901227712631226, "learning_rate": 2.118922386144965e-06, "loss": 0.8766, "step": 49020 }, { "epoch": 0.5974796777692467, "grad_norm": 1.999495506286621, "learning_rate": 2.1186016677357283e-06, "loss": 0.8036, "step": 49025 }, { "epoch": 0.5975406139933945, "grad_norm": 2.141659736633301, "learning_rate": 2.1182809493264917e-06, "loss": 0.8184, "step": 49030 }, { "epoch": 0.5976015502175424, "grad_norm": 1.646027684211731, "learning_rate": 2.1179602309172548e-06, "loss": 0.848, "step": 49035 }, { "epoch": 0.5976624864416902, "grad_norm": 1.7864829301834106, "learning_rate": 2.117639512508018e-06, "loss": 0.8546, "step": 49040 }, { "epoch": 0.5977234226658379, "grad_norm": 1.773301362991333, "learning_rate": 2.1173187940987812e-06, "loss": 0.7874, "step": 49045 }, { "epoch": 0.5977843588899857, "grad_norm": 2.0745973587036133, "learning_rate": 2.1169980756895447e-06, "loss": 0.854, "step": 49050 }, { "epoch": 0.5978452951141335, "grad_norm": 2.300102710723877, "learning_rate": 2.116677357280308e-06, "loss": 0.8177, "step": 49055 }, { "epoch": 0.5979062313382814, "grad_norm": 2.083278179168701, "learning_rate": 2.116356638871071e-06, "loss": 0.8531, "step": 49060 }, { "epoch": 0.5979671675624292, "grad_norm": 1.9418530464172363, "learning_rate": 2.1160359204618346e-06, "loss": 0.8659, "step": 49065 }, { "epoch": 0.598028103786577, "grad_norm": 2.121229410171509, "learning_rate": 2.115715202052598e-06, "loss": 0.793, "step": 49070 }, { "epoch": 0.5980890400107248, "grad_norm": 2.000382900238037, "learning_rate": 2.1153944836433614e-06, "loss": 0.7457, "step": 49075 }, { "epoch": 0.5981499762348725, "grad_norm": 1.7658756971359253, "learning_rate": 2.1150737652341245e-06, "loss": 0.808, "step": 49080 }, { "epoch": 0.5982109124590204, "grad_norm": 1.9610445499420166, "learning_rate": 2.114753046824888e-06, "loss": 0.7958, "step": 49085 }, { "epoch": 0.5982718486831682, "grad_norm": 2.1284563541412354, "learning_rate": 2.1144323284156513e-06, "loss": 0.7982, "step": 49090 }, { "epoch": 0.598332784907316, "grad_norm": 2.1601364612579346, "learning_rate": 2.1141116100064148e-06, "loss": 0.8831, "step": 49095 }, { "epoch": 0.5983937211314638, "grad_norm": 1.8787765502929688, "learning_rate": 2.113790891597178e-06, "loss": 0.836, "step": 49100 }, { "epoch": 0.5984546573556117, "grad_norm": 2.137017250061035, "learning_rate": 2.1134701731879412e-06, "loss": 0.7604, "step": 49105 }, { "epoch": 0.5985155935797595, "grad_norm": 2.131420135498047, "learning_rate": 2.1131494547787047e-06, "loss": 0.8093, "step": 49110 }, { "epoch": 0.5985765298039072, "grad_norm": 1.9810823202133179, "learning_rate": 2.1128287363694677e-06, "loss": 0.8629, "step": 49115 }, { "epoch": 0.598637466028055, "grad_norm": 1.9077690839767456, "learning_rate": 2.112508017960231e-06, "loss": 0.8018, "step": 49120 }, { "epoch": 0.5986984022522028, "grad_norm": 1.7445541620254517, "learning_rate": 2.112187299550994e-06, "loss": 0.7557, "step": 49125 }, { "epoch": 0.5987593384763507, "grad_norm": 2.0450785160064697, "learning_rate": 2.1118665811417576e-06, "loss": 0.8079, "step": 49130 }, { "epoch": 0.5988202747004985, "grad_norm": 2.326707601547241, "learning_rate": 2.111545862732521e-06, "loss": 0.8326, "step": 49135 }, { "epoch": 0.5988812109246463, "grad_norm": 1.9195497035980225, "learning_rate": 2.111225144323284e-06, "loss": 0.7726, "step": 49140 }, { "epoch": 0.5989421471487941, "grad_norm": 1.883426547050476, "learning_rate": 2.1109044259140475e-06, "loss": 0.8413, "step": 49145 }, { "epoch": 0.5990030833729418, "grad_norm": 2.0401668548583984, "learning_rate": 2.110583707504811e-06, "loss": 0.8415, "step": 49150 }, { "epoch": 0.5990640195970897, "grad_norm": 1.7750554084777832, "learning_rate": 2.1102629890955744e-06, "loss": 0.8755, "step": 49155 }, { "epoch": 0.5991249558212375, "grad_norm": 2.091947317123413, "learning_rate": 2.109942270686338e-06, "loss": 0.8799, "step": 49160 }, { "epoch": 0.5991858920453853, "grad_norm": 1.9992965459823608, "learning_rate": 2.109621552277101e-06, "loss": 0.8131, "step": 49165 }, { "epoch": 0.5992468282695331, "grad_norm": 2.09401273727417, "learning_rate": 2.1093008338678643e-06, "loss": 0.8254, "step": 49170 }, { "epoch": 0.599307764493681, "grad_norm": 2.298750638961792, "learning_rate": 2.1089801154586277e-06, "loss": 0.8586, "step": 49175 }, { "epoch": 0.5993687007178288, "grad_norm": 1.8838399648666382, "learning_rate": 2.1086593970493907e-06, "loss": 0.8436, "step": 49180 }, { "epoch": 0.5994296369419765, "grad_norm": 2.059652805328369, "learning_rate": 2.108338678640154e-06, "loss": 0.8658, "step": 49185 }, { "epoch": 0.5994905731661243, "grad_norm": 2.13873028755188, "learning_rate": 2.1080179602309176e-06, "loss": 0.8366, "step": 49190 }, { "epoch": 0.5995515093902721, "grad_norm": 1.998255729675293, "learning_rate": 2.1076972418216806e-06, "loss": 0.8374, "step": 49195 }, { "epoch": 0.59961244561442, "grad_norm": 1.6334856748580933, "learning_rate": 2.107376523412444e-06, "loss": 0.8461, "step": 49200 }, { "epoch": 0.5996733818385678, "grad_norm": 2.645552396774292, "learning_rate": 2.107055805003207e-06, "loss": 0.8504, "step": 49205 }, { "epoch": 0.5997343180627156, "grad_norm": 1.827043056488037, "learning_rate": 2.1067350865939705e-06, "loss": 0.7758, "step": 49210 }, { "epoch": 0.5997952542868634, "grad_norm": 1.7691949605941772, "learning_rate": 2.106414368184734e-06, "loss": 0.8483, "step": 49215 }, { "epoch": 0.5998561905110111, "grad_norm": 2.0006370544433594, "learning_rate": 2.106093649775497e-06, "loss": 0.81, "step": 49220 }, { "epoch": 0.599917126735159, "grad_norm": 2.151719331741333, "learning_rate": 2.1057729313662604e-06, "loss": 0.7787, "step": 49225 }, { "epoch": 0.5999780629593068, "grad_norm": 2.021322011947632, "learning_rate": 2.105452212957024e-06, "loss": 0.8667, "step": 49230 }, { "epoch": 0.6000389991834546, "grad_norm": 2.2163987159729004, "learning_rate": 2.1051314945477873e-06, "loss": 0.8611, "step": 49235 }, { "epoch": 0.6000999354076024, "grad_norm": 2.208620071411133, "learning_rate": 2.1048107761385507e-06, "loss": 0.8394, "step": 49240 }, { "epoch": 0.6001608716317502, "grad_norm": 1.7888134717941284, "learning_rate": 2.104490057729314e-06, "loss": 0.8065, "step": 49245 }, { "epoch": 0.6002218078558981, "grad_norm": 2.7186062335968018, "learning_rate": 2.104169339320077e-06, "loss": 0.8786, "step": 49250 }, { "epoch": 0.6002827440800458, "grad_norm": 1.6708406209945679, "learning_rate": 2.1038486209108406e-06, "loss": 0.8501, "step": 49255 }, { "epoch": 0.6003436803041936, "grad_norm": 1.996686577796936, "learning_rate": 2.1035279025016036e-06, "loss": 0.8152, "step": 49260 }, { "epoch": 0.6004046165283414, "grad_norm": 2.0211641788482666, "learning_rate": 2.103207184092367e-06, "loss": 0.8599, "step": 49265 }, { "epoch": 0.6004655527524893, "grad_norm": 2.097921133041382, "learning_rate": 2.1028864656831305e-06, "loss": 0.7692, "step": 49270 }, { "epoch": 0.6005264889766371, "grad_norm": 1.8699787855148315, "learning_rate": 2.1025657472738935e-06, "loss": 0.8497, "step": 49275 }, { "epoch": 0.6005874252007849, "grad_norm": 1.9965598583221436, "learning_rate": 2.102245028864657e-06, "loss": 0.8347, "step": 49280 }, { "epoch": 0.6006483614249327, "grad_norm": 2.226442575454712, "learning_rate": 2.1019243104554204e-06, "loss": 0.827, "step": 49285 }, { "epoch": 0.6007092976490804, "grad_norm": 2.430048704147339, "learning_rate": 2.1016035920461834e-06, "loss": 0.8218, "step": 49290 }, { "epoch": 0.6007702338732283, "grad_norm": 2.2746822834014893, "learning_rate": 2.101282873636947e-06, "loss": 0.8227, "step": 49295 }, { "epoch": 0.6008311700973761, "grad_norm": 1.8204485177993774, "learning_rate": 2.1009621552277103e-06, "loss": 0.8594, "step": 49300 }, { "epoch": 0.6008921063215239, "grad_norm": 1.964516043663025, "learning_rate": 2.1006414368184733e-06, "loss": 0.7935, "step": 49305 }, { "epoch": 0.6009530425456717, "grad_norm": 2.691189765930176, "learning_rate": 2.1003207184092368e-06, "loss": 0.8766, "step": 49310 }, { "epoch": 0.6010139787698195, "grad_norm": 2.0104076862335205, "learning_rate": 2.1000000000000002e-06, "loss": 0.8323, "step": 49315 }, { "epoch": 0.6010749149939674, "grad_norm": 1.8387112617492676, "learning_rate": 2.0996792815907637e-06, "loss": 0.7946, "step": 49320 }, { "epoch": 0.6011358512181151, "grad_norm": 2.031686782836914, "learning_rate": 2.099358563181527e-06, "loss": 0.7682, "step": 49325 }, { "epoch": 0.6011967874422629, "grad_norm": 1.8626859188079834, "learning_rate": 2.09903784477229e-06, "loss": 0.8174, "step": 49330 }, { "epoch": 0.6012577236664107, "grad_norm": 1.8134868144989014, "learning_rate": 2.0987171263630536e-06, "loss": 0.7824, "step": 49335 }, { "epoch": 0.6013186598905585, "grad_norm": 1.9137613773345947, "learning_rate": 2.0983964079538166e-06, "loss": 0.8042, "step": 49340 }, { "epoch": 0.6013795961147064, "grad_norm": 1.7741069793701172, "learning_rate": 2.09807568954458e-06, "loss": 0.9044, "step": 49345 }, { "epoch": 0.6014405323388542, "grad_norm": 2.099705219268799, "learning_rate": 2.0977549711353435e-06, "loss": 0.8767, "step": 49350 }, { "epoch": 0.601501468563002, "grad_norm": 1.9301338195800781, "learning_rate": 2.0974342527261065e-06, "loss": 0.8506, "step": 49355 }, { "epoch": 0.6015624047871497, "grad_norm": 1.8725298643112183, "learning_rate": 2.09711353431687e-06, "loss": 0.868, "step": 49360 }, { "epoch": 0.6016233410112976, "grad_norm": 2.1530957221984863, "learning_rate": 2.0967928159076334e-06, "loss": 0.8942, "step": 49365 }, { "epoch": 0.6016842772354454, "grad_norm": 2.025691032409668, "learning_rate": 2.0964720974983964e-06, "loss": 0.8018, "step": 49370 }, { "epoch": 0.6017452134595932, "grad_norm": 2.009093999862671, "learning_rate": 2.09615137908916e-06, "loss": 0.802, "step": 49375 }, { "epoch": 0.601806149683741, "grad_norm": 1.9785040616989136, "learning_rate": 2.0958306606799233e-06, "loss": 0.8732, "step": 49380 }, { "epoch": 0.6018670859078888, "grad_norm": 2.555464506149292, "learning_rate": 2.0955099422706867e-06, "loss": 0.8691, "step": 49385 }, { "epoch": 0.6019280221320366, "grad_norm": 1.9310979843139648, "learning_rate": 2.0951892238614497e-06, "loss": 0.8487, "step": 49390 }, { "epoch": 0.6019889583561844, "grad_norm": 2.134640693664551, "learning_rate": 2.094868505452213e-06, "loss": 0.876, "step": 49395 }, { "epoch": 0.6020498945803322, "grad_norm": 2.1640965938568115, "learning_rate": 2.0945477870429766e-06, "loss": 0.8083, "step": 49400 }, { "epoch": 0.60211083080448, "grad_norm": 1.6755194664001465, "learning_rate": 2.09422706863374e-06, "loss": 0.8858, "step": 49405 }, { "epoch": 0.6021717670286278, "grad_norm": 1.789576530456543, "learning_rate": 2.093906350224503e-06, "loss": 0.8123, "step": 49410 }, { "epoch": 0.6022327032527757, "grad_norm": 1.7178939580917358, "learning_rate": 2.0935856318152665e-06, "loss": 0.7518, "step": 49415 }, { "epoch": 0.6022936394769235, "grad_norm": 1.8491592407226562, "learning_rate": 2.0932649134060295e-06, "loss": 0.855, "step": 49420 }, { "epoch": 0.6023545757010712, "grad_norm": 1.9437440633773804, "learning_rate": 2.092944194996793e-06, "loss": 0.7889, "step": 49425 }, { "epoch": 0.602415511925219, "grad_norm": 1.6649205684661865, "learning_rate": 2.0926234765875564e-06, "loss": 0.7552, "step": 49430 }, { "epoch": 0.6024764481493668, "grad_norm": 1.8808293342590332, "learning_rate": 2.0923027581783194e-06, "loss": 0.8944, "step": 49435 }, { "epoch": 0.6025373843735147, "grad_norm": 1.7946680784225464, "learning_rate": 2.091982039769083e-06, "loss": 0.7774, "step": 49440 }, { "epoch": 0.6025983205976625, "grad_norm": 1.7790882587432861, "learning_rate": 2.0916613213598463e-06, "loss": 0.809, "step": 49445 }, { "epoch": 0.6026592568218103, "grad_norm": 2.048551321029663, "learning_rate": 2.0913406029506093e-06, "loss": 0.793, "step": 49450 }, { "epoch": 0.6027201930459581, "grad_norm": 1.6755255460739136, "learning_rate": 2.0910198845413727e-06, "loss": 0.8006, "step": 49455 }, { "epoch": 0.6027811292701059, "grad_norm": 1.7579777240753174, "learning_rate": 2.090699166132136e-06, "loss": 0.8452, "step": 49460 }, { "epoch": 0.6028420654942537, "grad_norm": 1.8494526147842407, "learning_rate": 2.0903784477228996e-06, "loss": 0.7865, "step": 49465 }, { "epoch": 0.6029030017184015, "grad_norm": 1.9228458404541016, "learning_rate": 2.090057729313663e-06, "loss": 0.8217, "step": 49470 }, { "epoch": 0.6029639379425493, "grad_norm": 2.0669758319854736, "learning_rate": 2.089737010904426e-06, "loss": 0.8377, "step": 49475 }, { "epoch": 0.6030248741666971, "grad_norm": 1.9485135078430176, "learning_rate": 2.0894162924951895e-06, "loss": 0.8426, "step": 49480 }, { "epoch": 0.603085810390845, "grad_norm": 1.9282655715942383, "learning_rate": 2.089095574085953e-06, "loss": 0.78, "step": 49485 }, { "epoch": 0.6031467466149928, "grad_norm": 1.9714722633361816, "learning_rate": 2.088774855676716e-06, "loss": 0.8366, "step": 49490 }, { "epoch": 0.6032076828391405, "grad_norm": 2.2300782203674316, "learning_rate": 2.0884541372674794e-06, "loss": 0.8351, "step": 49495 }, { "epoch": 0.6032686190632883, "grad_norm": 1.9817723035812378, "learning_rate": 2.0881334188582424e-06, "loss": 0.8558, "step": 49500 }, { "epoch": 0.6033295552874361, "grad_norm": 1.7962294816970825, "learning_rate": 2.087812700449006e-06, "loss": 0.8223, "step": 49505 }, { "epoch": 0.603390491511584, "grad_norm": 2.046787738800049, "learning_rate": 2.0874919820397693e-06, "loss": 0.8708, "step": 49510 }, { "epoch": 0.6034514277357318, "grad_norm": 1.8996093273162842, "learning_rate": 2.0871712636305323e-06, "loss": 0.8251, "step": 49515 }, { "epoch": 0.6035123639598796, "grad_norm": 2.0181736946105957, "learning_rate": 2.0868505452212958e-06, "loss": 0.8857, "step": 49520 }, { "epoch": 0.6035733001840274, "grad_norm": 2.2426400184631348, "learning_rate": 2.086529826812059e-06, "loss": 0.8418, "step": 49525 }, { "epoch": 0.6036342364081752, "grad_norm": 1.8332481384277344, "learning_rate": 2.0862091084028222e-06, "loss": 0.7714, "step": 49530 }, { "epoch": 0.603695172632323, "grad_norm": 1.7640206813812256, "learning_rate": 2.0858883899935857e-06, "loss": 0.85, "step": 49535 }, { "epoch": 0.6037561088564708, "grad_norm": 1.9482475519180298, "learning_rate": 2.085567671584349e-06, "loss": 0.8446, "step": 49540 }, { "epoch": 0.6038170450806186, "grad_norm": 2.2607367038726807, "learning_rate": 2.0852469531751125e-06, "loss": 0.8617, "step": 49545 }, { "epoch": 0.6038779813047664, "grad_norm": 1.7330448627471924, "learning_rate": 2.084926234765876e-06, "loss": 0.8221, "step": 49550 }, { "epoch": 0.6039389175289143, "grad_norm": 1.7914788722991943, "learning_rate": 2.084605516356639e-06, "loss": 0.8291, "step": 49555 }, { "epoch": 0.6039998537530621, "grad_norm": 1.7790250778198242, "learning_rate": 2.0842847979474024e-06, "loss": 0.7897, "step": 49560 }, { "epoch": 0.6040607899772098, "grad_norm": 1.89496648311615, "learning_rate": 2.083964079538166e-06, "loss": 0.8469, "step": 49565 }, { "epoch": 0.6041217262013576, "grad_norm": 1.6705379486083984, "learning_rate": 2.083643361128929e-06, "loss": 0.8925, "step": 49570 }, { "epoch": 0.6041826624255054, "grad_norm": 2.431924819946289, "learning_rate": 2.0833226427196923e-06, "loss": 0.8262, "step": 49575 }, { "epoch": 0.6042435986496533, "grad_norm": 1.8291850090026855, "learning_rate": 2.0830019243104558e-06, "loss": 0.8194, "step": 49580 }, { "epoch": 0.6043045348738011, "grad_norm": 1.855124592781067, "learning_rate": 2.082681205901219e-06, "loss": 0.7928, "step": 49585 }, { "epoch": 0.6043654710979489, "grad_norm": 2.1268136501312256, "learning_rate": 2.0823604874919822e-06, "loss": 0.8285, "step": 49590 }, { "epoch": 0.6044264073220967, "grad_norm": 2.031900405883789, "learning_rate": 2.0820397690827453e-06, "loss": 0.7573, "step": 49595 }, { "epoch": 0.6044873435462444, "grad_norm": 1.9012541770935059, "learning_rate": 2.0817190506735087e-06, "loss": 0.8546, "step": 49600 }, { "epoch": 0.6045482797703923, "grad_norm": 2.178337335586548, "learning_rate": 2.081398332264272e-06, "loss": 0.8056, "step": 49605 }, { "epoch": 0.6046092159945401, "grad_norm": 1.9775195121765137, "learning_rate": 2.0810776138550356e-06, "loss": 0.8426, "step": 49610 }, { "epoch": 0.6046701522186879, "grad_norm": 2.1763932704925537, "learning_rate": 2.0807568954457986e-06, "loss": 0.7695, "step": 49615 }, { "epoch": 0.6047310884428357, "grad_norm": 2.123218297958374, "learning_rate": 2.080436177036562e-06, "loss": 0.8705, "step": 49620 }, { "epoch": 0.6047920246669836, "grad_norm": 2.1224589347839355, "learning_rate": 2.0801154586273255e-06, "loss": 0.8022, "step": 49625 }, { "epoch": 0.6048529608911314, "grad_norm": 1.9183082580566406, "learning_rate": 2.079794740218089e-06, "loss": 0.7962, "step": 49630 }, { "epoch": 0.6049138971152791, "grad_norm": 2.398913621902466, "learning_rate": 2.079474021808852e-06, "loss": 0.8408, "step": 49635 }, { "epoch": 0.6049748333394269, "grad_norm": 2.2226288318634033, "learning_rate": 2.0791533033996154e-06, "loss": 0.8347, "step": 49640 }, { "epoch": 0.6050357695635747, "grad_norm": 2.0979208946228027, "learning_rate": 2.078832584990379e-06, "loss": 0.8065, "step": 49645 }, { "epoch": 0.6050967057877226, "grad_norm": 1.824906587600708, "learning_rate": 2.078511866581142e-06, "loss": 0.8604, "step": 49650 }, { "epoch": 0.6051576420118704, "grad_norm": 2.2604494094848633, "learning_rate": 2.0781911481719053e-06, "loss": 0.8916, "step": 49655 }, { "epoch": 0.6052185782360182, "grad_norm": 1.8315414190292358, "learning_rate": 2.0778704297626687e-06, "loss": 0.7388, "step": 49660 }, { "epoch": 0.605279514460166, "grad_norm": 1.882660150527954, "learning_rate": 2.0775497113534317e-06, "loss": 0.8512, "step": 49665 }, { "epoch": 0.6053404506843137, "grad_norm": 1.8831974267959595, "learning_rate": 2.077228992944195e-06, "loss": 0.7888, "step": 49670 }, { "epoch": 0.6054013869084616, "grad_norm": 1.819966197013855, "learning_rate": 2.076908274534958e-06, "loss": 0.8215, "step": 49675 }, { "epoch": 0.6054623231326094, "grad_norm": 1.8731495141983032, "learning_rate": 2.0765875561257216e-06, "loss": 0.8496, "step": 49680 }, { "epoch": 0.6055232593567572, "grad_norm": 2.1011924743652344, "learning_rate": 2.076266837716485e-06, "loss": 0.8256, "step": 49685 }, { "epoch": 0.605584195580905, "grad_norm": 1.7506194114685059, "learning_rate": 2.0759461193072485e-06, "loss": 0.8097, "step": 49690 }, { "epoch": 0.6056451318050529, "grad_norm": 1.8502187728881836, "learning_rate": 2.075625400898012e-06, "loss": 0.8846, "step": 49695 }, { "epoch": 0.6057060680292007, "grad_norm": 1.7229024171829224, "learning_rate": 2.075304682488775e-06, "loss": 0.749, "step": 49700 }, { "epoch": 0.6057670042533484, "grad_norm": 1.9139565229415894, "learning_rate": 2.0749839640795384e-06, "loss": 0.865, "step": 49705 }, { "epoch": 0.6058279404774962, "grad_norm": 1.9389071464538574, "learning_rate": 2.074663245670302e-06, "loss": 0.9204, "step": 49710 }, { "epoch": 0.605888876701644, "grad_norm": 1.6672989130020142, "learning_rate": 2.074342527261065e-06, "loss": 0.9217, "step": 49715 }, { "epoch": 0.6059498129257919, "grad_norm": 1.904667615890503, "learning_rate": 2.0740218088518283e-06, "loss": 0.9444, "step": 49720 }, { "epoch": 0.6060107491499397, "grad_norm": 1.9306272268295288, "learning_rate": 2.0737010904425917e-06, "loss": 0.843, "step": 49725 }, { "epoch": 0.6060716853740875, "grad_norm": 2.1878795623779297, "learning_rate": 2.0733803720333548e-06, "loss": 0.84, "step": 49730 }, { "epoch": 0.6061326215982353, "grad_norm": 1.5532796382904053, "learning_rate": 2.073059653624118e-06, "loss": 0.8882, "step": 49735 }, { "epoch": 0.606193557822383, "grad_norm": 2.1339292526245117, "learning_rate": 2.0727389352148816e-06, "loss": 0.836, "step": 49740 }, { "epoch": 0.6062544940465309, "grad_norm": 2.0502126216888428, "learning_rate": 2.0724182168056447e-06, "loss": 0.8623, "step": 49745 }, { "epoch": 0.6063154302706787, "grad_norm": 2.346353054046631, "learning_rate": 2.072097498396408e-06, "loss": 0.8572, "step": 49750 }, { "epoch": 0.6063763664948265, "grad_norm": 1.8372398614883423, "learning_rate": 2.071776779987171e-06, "loss": 0.7954, "step": 49755 }, { "epoch": 0.6064373027189743, "grad_norm": 2.0587286949157715, "learning_rate": 2.0714560615779346e-06, "loss": 0.8437, "step": 49760 }, { "epoch": 0.6064982389431222, "grad_norm": 1.8177462816238403, "learning_rate": 2.071135343168698e-06, "loss": 0.8027, "step": 49765 }, { "epoch": 0.60655917516727, "grad_norm": 1.8391621112823486, "learning_rate": 2.0708146247594614e-06, "loss": 0.833, "step": 49770 }, { "epoch": 0.6066201113914177, "grad_norm": 2.2147059440612793, "learning_rate": 2.070493906350225e-06, "loss": 0.84, "step": 49775 }, { "epoch": 0.6066810476155655, "grad_norm": 2.062406063079834, "learning_rate": 2.070173187940988e-06, "loss": 0.8579, "step": 49780 }, { "epoch": 0.6067419838397133, "grad_norm": 1.9727281332015991, "learning_rate": 2.0698524695317513e-06, "loss": 0.8325, "step": 49785 }, { "epoch": 0.6068029200638612, "grad_norm": 2.214745044708252, "learning_rate": 2.0695317511225148e-06, "loss": 0.7844, "step": 49790 }, { "epoch": 0.606863856288009, "grad_norm": 2.7669994831085205, "learning_rate": 2.0692110327132778e-06, "loss": 0.7892, "step": 49795 }, { "epoch": 0.6069247925121568, "grad_norm": 1.9274684190750122, "learning_rate": 2.0688903143040412e-06, "loss": 0.9192, "step": 49800 }, { "epoch": 0.6069857287363046, "grad_norm": 1.9894391298294067, "learning_rate": 2.0685695958948047e-06, "loss": 0.8139, "step": 49805 }, { "epoch": 0.6070466649604523, "grad_norm": 1.8905495405197144, "learning_rate": 2.0682488774855677e-06, "loss": 0.8159, "step": 49810 }, { "epoch": 0.6071076011846002, "grad_norm": 1.9239122867584229, "learning_rate": 2.067928159076331e-06, "loss": 0.7835, "step": 49815 }, { "epoch": 0.607168537408748, "grad_norm": 1.7475123405456543, "learning_rate": 2.0676074406670946e-06, "loss": 0.852, "step": 49820 }, { "epoch": 0.6072294736328958, "grad_norm": 1.9294815063476562, "learning_rate": 2.0672867222578576e-06, "loss": 0.8228, "step": 49825 }, { "epoch": 0.6072904098570436, "grad_norm": 2.249014377593994, "learning_rate": 2.066966003848621e-06, "loss": 0.8568, "step": 49830 }, { "epoch": 0.6073513460811915, "grad_norm": 2.222764015197754, "learning_rate": 2.0666452854393845e-06, "loss": 0.8309, "step": 49835 }, { "epoch": 0.6074122823053393, "grad_norm": 1.9425551891326904, "learning_rate": 2.0663245670301475e-06, "loss": 0.806, "step": 49840 }, { "epoch": 0.607473218529487, "grad_norm": 1.9490773677825928, "learning_rate": 2.066003848620911e-06, "loss": 0.7994, "step": 49845 }, { "epoch": 0.6075341547536348, "grad_norm": 1.8148179054260254, "learning_rate": 2.0656831302116744e-06, "loss": 0.8304, "step": 49850 }, { "epoch": 0.6075950909777826, "grad_norm": 1.9482251405715942, "learning_rate": 2.065362411802438e-06, "loss": 0.8334, "step": 49855 }, { "epoch": 0.6076560272019305, "grad_norm": 2.114895820617676, "learning_rate": 2.0650416933932012e-06, "loss": 0.8389, "step": 49860 }, { "epoch": 0.6077169634260783, "grad_norm": 1.9930156469345093, "learning_rate": 2.0647209749839643e-06, "loss": 0.778, "step": 49865 }, { "epoch": 0.6077778996502261, "grad_norm": 1.5535528659820557, "learning_rate": 2.0644002565747277e-06, "loss": 0.8774, "step": 49870 }, { "epoch": 0.6078388358743739, "grad_norm": 1.9144014120101929, "learning_rate": 2.064079538165491e-06, "loss": 0.7788, "step": 49875 }, { "epoch": 0.6078997720985216, "grad_norm": 1.9437800645828247, "learning_rate": 2.063758819756254e-06, "loss": 0.8287, "step": 49880 }, { "epoch": 0.6079607083226695, "grad_norm": 1.6815896034240723, "learning_rate": 2.0634381013470176e-06, "loss": 0.8267, "step": 49885 }, { "epoch": 0.6080216445468173, "grad_norm": 1.9040685892105103, "learning_rate": 2.0631173829377806e-06, "loss": 0.8491, "step": 49890 }, { "epoch": 0.6080825807709651, "grad_norm": 2.4207522869110107, "learning_rate": 2.062796664528544e-06, "loss": 0.8632, "step": 49895 }, { "epoch": 0.6081435169951129, "grad_norm": 2.1190338134765625, "learning_rate": 2.0624759461193075e-06, "loss": 0.8555, "step": 49900 }, { "epoch": 0.6082044532192608, "grad_norm": 2.44341778755188, "learning_rate": 2.0621552277100705e-06, "loss": 0.842, "step": 49905 }, { "epoch": 0.6082653894434086, "grad_norm": 1.9379099607467651, "learning_rate": 2.061834509300834e-06, "loss": 0.8248, "step": 49910 }, { "epoch": 0.6083263256675563, "grad_norm": 1.8704533576965332, "learning_rate": 2.0615137908915974e-06, "loss": 0.8611, "step": 49915 }, { "epoch": 0.6083872618917041, "grad_norm": 1.8451042175292969, "learning_rate": 2.0611930724823604e-06, "loss": 0.8705, "step": 49920 }, { "epoch": 0.6084481981158519, "grad_norm": 1.8815513849258423, "learning_rate": 2.060872354073124e-06, "loss": 0.7946, "step": 49925 }, { "epoch": 0.6085091343399998, "grad_norm": 2.092372179031372, "learning_rate": 2.0605516356638873e-06, "loss": 0.8122, "step": 49930 }, { "epoch": 0.6085700705641476, "grad_norm": 1.87808358669281, "learning_rate": 2.0602309172546507e-06, "loss": 0.7918, "step": 49935 }, { "epoch": 0.6086310067882954, "grad_norm": 1.8227638006210327, "learning_rate": 2.059910198845414e-06, "loss": 0.871, "step": 49940 }, { "epoch": 0.6086919430124432, "grad_norm": 2.0163917541503906, "learning_rate": 2.059589480436177e-06, "loss": 0.8529, "step": 49945 }, { "epoch": 0.6087528792365909, "grad_norm": 1.8683369159698486, "learning_rate": 2.0592687620269406e-06, "loss": 0.847, "step": 49950 }, { "epoch": 0.6088138154607388, "grad_norm": 1.790745735168457, "learning_rate": 2.058948043617704e-06, "loss": 0.85, "step": 49955 }, { "epoch": 0.6088747516848866, "grad_norm": 1.6837877035140991, "learning_rate": 2.058627325208467e-06, "loss": 0.9302, "step": 49960 }, { "epoch": 0.6089356879090344, "grad_norm": 1.9972318410873413, "learning_rate": 2.0583066067992305e-06, "loss": 0.9318, "step": 49965 }, { "epoch": 0.6089966241331822, "grad_norm": 1.9746068716049194, "learning_rate": 2.0579858883899935e-06, "loss": 0.8158, "step": 49970 }, { "epoch": 0.60905756035733, "grad_norm": 2.110562801361084, "learning_rate": 2.057665169980757e-06, "loss": 0.8478, "step": 49975 }, { "epoch": 0.6091184965814779, "grad_norm": 2.0025646686553955, "learning_rate": 2.0573444515715204e-06, "loss": 0.8029, "step": 49980 }, { "epoch": 0.6091794328056256, "grad_norm": 2.069495677947998, "learning_rate": 2.0570237331622834e-06, "loss": 0.7724, "step": 49985 }, { "epoch": 0.6092403690297734, "grad_norm": 1.8153157234191895, "learning_rate": 2.056703014753047e-06, "loss": 0.8156, "step": 49990 }, { "epoch": 0.6093013052539212, "grad_norm": 2.1681103706359863, "learning_rate": 2.0563822963438103e-06, "loss": 0.7784, "step": 49995 }, { "epoch": 0.609362241478069, "grad_norm": 2.117177724838257, "learning_rate": 2.0560615779345738e-06, "loss": 0.7926, "step": 50000 }, { "epoch": 0.6094231777022169, "grad_norm": 2.143044948577881, "learning_rate": 2.0557408595253368e-06, "loss": 0.8788, "step": 50005 }, { "epoch": 0.6094841139263647, "grad_norm": 1.641628623008728, "learning_rate": 2.0554201411161002e-06, "loss": 0.8353, "step": 50010 }, { "epoch": 0.6095450501505125, "grad_norm": 2.0341668128967285, "learning_rate": 2.0550994227068637e-06, "loss": 0.8189, "step": 50015 }, { "epoch": 0.6096059863746602, "grad_norm": 1.8143558502197266, "learning_rate": 2.054778704297627e-06, "loss": 0.8938, "step": 50020 }, { "epoch": 0.6096669225988081, "grad_norm": 1.8483459949493408, "learning_rate": 2.05445798588839e-06, "loss": 0.7674, "step": 50025 }, { "epoch": 0.6097278588229559, "grad_norm": 2.398406982421875, "learning_rate": 2.0541372674791535e-06, "loss": 0.7767, "step": 50030 }, { "epoch": 0.6097887950471037, "grad_norm": 1.7913150787353516, "learning_rate": 2.053816549069917e-06, "loss": 0.8211, "step": 50035 }, { "epoch": 0.6098497312712515, "grad_norm": 1.763196349143982, "learning_rate": 2.05349583066068e-06, "loss": 0.767, "step": 50040 }, { "epoch": 0.6099106674953994, "grad_norm": 1.7176586389541626, "learning_rate": 2.0531751122514434e-06, "loss": 0.7507, "step": 50045 }, { "epoch": 0.6099716037195472, "grad_norm": 2.0280649662017822, "learning_rate": 2.0528543938422065e-06, "loss": 0.8724, "step": 50050 }, { "epoch": 0.6100325399436949, "grad_norm": 2.1416726112365723, "learning_rate": 2.05253367543297e-06, "loss": 0.7776, "step": 50055 }, { "epoch": 0.6100934761678427, "grad_norm": 1.866624116897583, "learning_rate": 2.0522129570237333e-06, "loss": 0.8171, "step": 50060 }, { "epoch": 0.6101544123919905, "grad_norm": 2.098630428314209, "learning_rate": 2.0518922386144964e-06, "loss": 0.796, "step": 50065 }, { "epoch": 0.6102153486161384, "grad_norm": 2.178762912750244, "learning_rate": 2.05157152020526e-06, "loss": 0.9054, "step": 50070 }, { "epoch": 0.6102762848402862, "grad_norm": 1.9573487043380737, "learning_rate": 2.0512508017960232e-06, "loss": 0.8244, "step": 50075 }, { "epoch": 0.610337221064434, "grad_norm": 1.6183960437774658, "learning_rate": 2.0509300833867867e-06, "loss": 0.7944, "step": 50080 }, { "epoch": 0.6103981572885818, "grad_norm": 1.8767549991607666, "learning_rate": 2.05060936497755e-06, "loss": 0.8323, "step": 50085 }, { "epoch": 0.6104590935127295, "grad_norm": 2.035471200942993, "learning_rate": 2.050288646568313e-06, "loss": 0.7464, "step": 50090 }, { "epoch": 0.6105200297368774, "grad_norm": 1.5727229118347168, "learning_rate": 2.0499679281590766e-06, "loss": 0.794, "step": 50095 }, { "epoch": 0.6105809659610252, "grad_norm": 2.0219013690948486, "learning_rate": 2.04964720974984e-06, "loss": 0.7951, "step": 50100 }, { "epoch": 0.610641902185173, "grad_norm": 1.7656259536743164, "learning_rate": 2.049326491340603e-06, "loss": 0.8497, "step": 50105 }, { "epoch": 0.6107028384093208, "grad_norm": 1.6657023429870605, "learning_rate": 2.0490057729313665e-06, "loss": 0.7997, "step": 50110 }, { "epoch": 0.6107637746334686, "grad_norm": 2.121385097503662, "learning_rate": 2.04868505452213e-06, "loss": 0.8331, "step": 50115 }, { "epoch": 0.6108247108576165, "grad_norm": 2.947507381439209, "learning_rate": 2.048364336112893e-06, "loss": 0.8761, "step": 50120 }, { "epoch": 0.6108856470817642, "grad_norm": 1.758551001548767, "learning_rate": 2.0480436177036564e-06, "loss": 0.8728, "step": 50125 }, { "epoch": 0.610946583305912, "grad_norm": 1.883799433708191, "learning_rate": 2.0477228992944194e-06, "loss": 0.894, "step": 50130 }, { "epoch": 0.6110075195300598, "grad_norm": 2.0879576206207275, "learning_rate": 2.047402180885183e-06, "loss": 0.7882, "step": 50135 }, { "epoch": 0.6110684557542077, "grad_norm": 2.565556526184082, "learning_rate": 2.0470814624759463e-06, "loss": 0.9068, "step": 50140 }, { "epoch": 0.6111293919783555, "grad_norm": 1.9117645025253296, "learning_rate": 2.0467607440667093e-06, "loss": 0.7965, "step": 50145 }, { "epoch": 0.6111903282025033, "grad_norm": 1.923585057258606, "learning_rate": 2.0464400256574727e-06, "loss": 0.7978, "step": 50150 }, { "epoch": 0.6112512644266511, "grad_norm": 1.8016875982284546, "learning_rate": 2.046119307248236e-06, "loss": 0.8309, "step": 50155 }, { "epoch": 0.6113122006507988, "grad_norm": 1.9293022155761719, "learning_rate": 2.0457985888389996e-06, "loss": 0.8052, "step": 50160 }, { "epoch": 0.6113731368749467, "grad_norm": 3.5958850383758545, "learning_rate": 2.045477870429763e-06, "loss": 0.7924, "step": 50165 }, { "epoch": 0.6114340730990945, "grad_norm": 2.259397268295288, "learning_rate": 2.0451571520205265e-06, "loss": 0.7665, "step": 50170 }, { "epoch": 0.6114950093232423, "grad_norm": 1.93910551071167, "learning_rate": 2.0448364336112895e-06, "loss": 0.7954, "step": 50175 }, { "epoch": 0.6115559455473901, "grad_norm": 2.029869794845581, "learning_rate": 2.044515715202053e-06, "loss": 0.8274, "step": 50180 }, { "epoch": 0.611616881771538, "grad_norm": 1.83638334274292, "learning_rate": 2.044194996792816e-06, "loss": 0.7119, "step": 50185 }, { "epoch": 0.6116778179956858, "grad_norm": 1.970703363418579, "learning_rate": 2.0438742783835794e-06, "loss": 0.788, "step": 50190 }, { "epoch": 0.6117387542198335, "grad_norm": 1.869486689567566, "learning_rate": 2.043553559974343e-06, "loss": 0.8241, "step": 50195 }, { "epoch": 0.6117996904439813, "grad_norm": 1.9561747312545776, "learning_rate": 2.043232841565106e-06, "loss": 0.8251, "step": 50200 }, { "epoch": 0.6118606266681291, "grad_norm": 1.9915393590927124, "learning_rate": 2.0429121231558693e-06, "loss": 0.7539, "step": 50205 }, { "epoch": 0.611921562892277, "grad_norm": 1.801902413368225, "learning_rate": 2.0425914047466327e-06, "loss": 0.8134, "step": 50210 }, { "epoch": 0.6119824991164248, "grad_norm": 2.0662949085235596, "learning_rate": 2.0422706863373958e-06, "loss": 0.8481, "step": 50215 }, { "epoch": 0.6120434353405726, "grad_norm": 1.862617015838623, "learning_rate": 2.041949967928159e-06, "loss": 0.8213, "step": 50220 }, { "epoch": 0.6121043715647204, "grad_norm": 1.885318636894226, "learning_rate": 2.0416292495189226e-06, "loss": 0.8949, "step": 50225 }, { "epoch": 0.6121653077888681, "grad_norm": 2.574355363845825, "learning_rate": 2.0413085311096857e-06, "loss": 0.8477, "step": 50230 }, { "epoch": 0.612226244013016, "grad_norm": 2.0937068462371826, "learning_rate": 2.040987812700449e-06, "loss": 0.7896, "step": 50235 }, { "epoch": 0.6122871802371638, "grad_norm": 2.0812065601348877, "learning_rate": 2.0406670942912125e-06, "loss": 0.7771, "step": 50240 }, { "epoch": 0.6123481164613116, "grad_norm": 2.540296792984009, "learning_rate": 2.040346375881976e-06, "loss": 0.8511, "step": 50245 }, { "epoch": 0.6124090526854594, "grad_norm": 2.1086902618408203, "learning_rate": 2.0400256574727394e-06, "loss": 0.7703, "step": 50250 }, { "epoch": 0.6124699889096072, "grad_norm": 1.7996755838394165, "learning_rate": 2.0397049390635024e-06, "loss": 0.7734, "step": 50255 }, { "epoch": 0.6125309251337551, "grad_norm": 1.7127889394760132, "learning_rate": 2.039384220654266e-06, "loss": 0.826, "step": 50260 }, { "epoch": 0.6125918613579028, "grad_norm": 1.8241848945617676, "learning_rate": 2.039063502245029e-06, "loss": 0.8272, "step": 50265 }, { "epoch": 0.6126527975820506, "grad_norm": 1.6173462867736816, "learning_rate": 2.0387427838357923e-06, "loss": 0.764, "step": 50270 }, { "epoch": 0.6127137338061984, "grad_norm": 1.847986102104187, "learning_rate": 2.0384220654265558e-06, "loss": 0.8458, "step": 50275 }, { "epoch": 0.6127746700303462, "grad_norm": 2.087785005569458, "learning_rate": 2.0381013470173188e-06, "loss": 0.8233, "step": 50280 }, { "epoch": 0.6128356062544941, "grad_norm": 1.7562968730926514, "learning_rate": 2.0377806286080822e-06, "loss": 0.9269, "step": 50285 }, { "epoch": 0.6128965424786419, "grad_norm": 2.176997423171997, "learning_rate": 2.0374599101988457e-06, "loss": 0.8792, "step": 50290 }, { "epoch": 0.6129574787027897, "grad_norm": 2.245363235473633, "learning_rate": 2.0371391917896087e-06, "loss": 0.8302, "step": 50295 }, { "epoch": 0.6130184149269374, "grad_norm": 1.8794021606445312, "learning_rate": 2.036818473380372e-06, "loss": 0.7604, "step": 50300 }, { "epoch": 0.6130793511510853, "grad_norm": 1.935964584350586, "learning_rate": 2.0364977549711356e-06, "loss": 0.8635, "step": 50305 }, { "epoch": 0.6131402873752331, "grad_norm": 1.84105384349823, "learning_rate": 2.036177036561899e-06, "loss": 0.8139, "step": 50310 }, { "epoch": 0.6132012235993809, "grad_norm": 2.3546628952026367, "learning_rate": 2.035856318152662e-06, "loss": 0.8061, "step": 50315 }, { "epoch": 0.6132621598235287, "grad_norm": 1.8001716136932373, "learning_rate": 2.0355355997434255e-06, "loss": 0.9101, "step": 50320 }, { "epoch": 0.6133230960476765, "grad_norm": 2.245600700378418, "learning_rate": 2.035214881334189e-06, "loss": 0.8757, "step": 50325 }, { "epoch": 0.6133840322718243, "grad_norm": 1.961050271987915, "learning_rate": 2.0348941629249523e-06, "loss": 0.8423, "step": 50330 }, { "epoch": 0.6134449684959721, "grad_norm": 2.047785997390747, "learning_rate": 2.0345734445157154e-06, "loss": 0.8445, "step": 50335 }, { "epoch": 0.6135059047201199, "grad_norm": 1.8710556030273438, "learning_rate": 2.034252726106479e-06, "loss": 0.868, "step": 50340 }, { "epoch": 0.6135668409442677, "grad_norm": 1.8306834697723389, "learning_rate": 2.033932007697242e-06, "loss": 0.8077, "step": 50345 }, { "epoch": 0.6136277771684155, "grad_norm": 2.348632574081421, "learning_rate": 2.0336112892880053e-06, "loss": 0.733, "step": 50350 }, { "epoch": 0.6136887133925634, "grad_norm": 1.637225866317749, "learning_rate": 2.0332905708787687e-06, "loss": 0.7706, "step": 50355 }, { "epoch": 0.6137496496167112, "grad_norm": 2.6579506397247314, "learning_rate": 2.0329698524695317e-06, "loss": 0.8651, "step": 50360 }, { "epoch": 0.6138105858408589, "grad_norm": 1.7720004320144653, "learning_rate": 2.032649134060295e-06, "loss": 0.7995, "step": 50365 }, { "epoch": 0.6138715220650067, "grad_norm": 1.7918827533721924, "learning_rate": 2.0323284156510586e-06, "loss": 0.8045, "step": 50370 }, { "epoch": 0.6139324582891545, "grad_norm": 1.7978343963623047, "learning_rate": 2.0320076972418216e-06, "loss": 0.8771, "step": 50375 }, { "epoch": 0.6139933945133024, "grad_norm": 1.7897371053695679, "learning_rate": 2.031686978832585e-06, "loss": 0.7712, "step": 50380 }, { "epoch": 0.6140543307374502, "grad_norm": 1.8636045455932617, "learning_rate": 2.0313662604233485e-06, "loss": 0.8257, "step": 50385 }, { "epoch": 0.614115266961598, "grad_norm": 2.2907893657684326, "learning_rate": 2.031045542014112e-06, "loss": 0.8956, "step": 50390 }, { "epoch": 0.6141762031857458, "grad_norm": 1.8090375661849976, "learning_rate": 2.030724823604875e-06, "loss": 0.8797, "step": 50395 }, { "epoch": 0.6142371394098936, "grad_norm": 2.047539472579956, "learning_rate": 2.0304041051956384e-06, "loss": 0.8593, "step": 50400 }, { "epoch": 0.6142980756340414, "grad_norm": 2.0564708709716797, "learning_rate": 2.030083386786402e-06, "loss": 0.8772, "step": 50405 }, { "epoch": 0.6143590118581892, "grad_norm": 1.883857011795044, "learning_rate": 2.0297626683771653e-06, "loss": 0.856, "step": 50410 }, { "epoch": 0.614419948082337, "grad_norm": 2.465348958969116, "learning_rate": 2.0294419499679283e-06, "loss": 0.7944, "step": 50415 }, { "epoch": 0.6144808843064848, "grad_norm": 1.9550639390945435, "learning_rate": 2.0291212315586917e-06, "loss": 0.8275, "step": 50420 }, { "epoch": 0.6145418205306327, "grad_norm": 1.936245083808899, "learning_rate": 2.0288005131494547e-06, "loss": 0.8148, "step": 50425 }, { "epoch": 0.6146027567547805, "grad_norm": 2.0266079902648926, "learning_rate": 2.028479794740218e-06, "loss": 0.8694, "step": 50430 }, { "epoch": 0.6146636929789282, "grad_norm": 1.7038410902023315, "learning_rate": 2.0281590763309816e-06, "loss": 0.8679, "step": 50435 }, { "epoch": 0.614724629203076, "grad_norm": 2.121741533279419, "learning_rate": 2.0278383579217446e-06, "loss": 0.8814, "step": 50440 }, { "epoch": 0.6147855654272238, "grad_norm": 2.4677982330322266, "learning_rate": 2.027517639512508e-06, "loss": 0.8285, "step": 50445 }, { "epoch": 0.6148465016513717, "grad_norm": 2.3502438068389893, "learning_rate": 2.0271969211032715e-06, "loss": 0.8655, "step": 50450 }, { "epoch": 0.6149074378755195, "grad_norm": 1.9568082094192505, "learning_rate": 2.0268762026940345e-06, "loss": 0.8602, "step": 50455 }, { "epoch": 0.6149683740996673, "grad_norm": 1.9036803245544434, "learning_rate": 2.026555484284798e-06, "loss": 0.8146, "step": 50460 }, { "epoch": 0.6150293103238151, "grad_norm": 1.8987891674041748, "learning_rate": 2.0262347658755614e-06, "loss": 0.7897, "step": 50465 }, { "epoch": 0.6150902465479628, "grad_norm": 2.067125082015991, "learning_rate": 2.025914047466325e-06, "loss": 0.8189, "step": 50470 }, { "epoch": 0.6151511827721107, "grad_norm": 1.8194183111190796, "learning_rate": 2.0255933290570883e-06, "loss": 0.8319, "step": 50475 }, { "epoch": 0.6152121189962585, "grad_norm": 2.2104883193969727, "learning_rate": 2.0252726106478513e-06, "loss": 0.9032, "step": 50480 }, { "epoch": 0.6152730552204063, "grad_norm": 1.7256277799606323, "learning_rate": 2.0249518922386148e-06, "loss": 0.8668, "step": 50485 }, { "epoch": 0.6153339914445541, "grad_norm": 2.370614767074585, "learning_rate": 2.024631173829378e-06, "loss": 0.8523, "step": 50490 }, { "epoch": 0.615394927668702, "grad_norm": 2.0052740573883057, "learning_rate": 2.0243104554201412e-06, "loss": 0.8734, "step": 50495 }, { "epoch": 0.6154558638928498, "grad_norm": 1.8303500413894653, "learning_rate": 2.0239897370109047e-06, "loss": 0.7628, "step": 50500 }, { "epoch": 0.6155168001169975, "grad_norm": 1.531672477722168, "learning_rate": 2.023669018601668e-06, "loss": 0.7617, "step": 50505 }, { "epoch": 0.6155777363411453, "grad_norm": 2.298978567123413, "learning_rate": 2.023348300192431e-06, "loss": 0.8675, "step": 50510 }, { "epoch": 0.6156386725652931, "grad_norm": 1.8592380285263062, "learning_rate": 2.0230275817831946e-06, "loss": 0.8516, "step": 50515 }, { "epoch": 0.615699608789441, "grad_norm": 2.0106008052825928, "learning_rate": 2.0227068633739576e-06, "loss": 0.831, "step": 50520 }, { "epoch": 0.6157605450135888, "grad_norm": 1.714240550994873, "learning_rate": 2.022386144964721e-06, "loss": 0.8138, "step": 50525 }, { "epoch": 0.6158214812377366, "grad_norm": 1.8571598529815674, "learning_rate": 2.0220654265554845e-06, "loss": 0.7997, "step": 50530 }, { "epoch": 0.6158824174618844, "grad_norm": 1.7452503442764282, "learning_rate": 2.021744708146248e-06, "loss": 0.7928, "step": 50535 }, { "epoch": 0.6159433536860321, "grad_norm": 1.9904682636260986, "learning_rate": 2.021423989737011e-06, "loss": 0.867, "step": 50540 }, { "epoch": 0.61600428991018, "grad_norm": 1.8484668731689453, "learning_rate": 2.0211032713277743e-06, "loss": 0.8348, "step": 50545 }, { "epoch": 0.6160652261343278, "grad_norm": 2.3306384086608887, "learning_rate": 2.0207825529185378e-06, "loss": 0.8415, "step": 50550 }, { "epoch": 0.6161261623584756, "grad_norm": 1.7945619821548462, "learning_rate": 2.0204618345093012e-06, "loss": 0.7562, "step": 50555 }, { "epoch": 0.6161870985826234, "grad_norm": 1.9687933921813965, "learning_rate": 2.0201411161000642e-06, "loss": 0.8971, "step": 50560 }, { "epoch": 0.6162480348067713, "grad_norm": 1.9809684753417969, "learning_rate": 2.0198203976908277e-06, "loss": 0.7884, "step": 50565 }, { "epoch": 0.6163089710309191, "grad_norm": 2.141702890396118, "learning_rate": 2.019499679281591e-06, "loss": 0.8464, "step": 50570 }, { "epoch": 0.6163699072550668, "grad_norm": 2.026533603668213, "learning_rate": 2.019178960872354e-06, "loss": 0.8561, "step": 50575 }, { "epoch": 0.6164308434792146, "grad_norm": 2.2412469387054443, "learning_rate": 2.0188582424631176e-06, "loss": 0.8473, "step": 50580 }, { "epoch": 0.6164917797033624, "grad_norm": 1.8800256252288818, "learning_rate": 2.018537524053881e-06, "loss": 0.8405, "step": 50585 }, { "epoch": 0.6165527159275103, "grad_norm": 1.7496039867401123, "learning_rate": 2.018216805644644e-06, "loss": 0.7851, "step": 50590 }, { "epoch": 0.6166136521516581, "grad_norm": 1.9137214422225952, "learning_rate": 2.0178960872354075e-06, "loss": 0.7966, "step": 50595 }, { "epoch": 0.6166745883758059, "grad_norm": 1.823432207107544, "learning_rate": 2.0175753688261705e-06, "loss": 0.8987, "step": 50600 }, { "epoch": 0.6167355245999537, "grad_norm": 1.9429261684417725, "learning_rate": 2.017254650416934e-06, "loss": 0.8021, "step": 50605 }, { "epoch": 0.6167964608241014, "grad_norm": 2.147235870361328, "learning_rate": 2.0169339320076974e-06, "loss": 0.8268, "step": 50610 }, { "epoch": 0.6168573970482493, "grad_norm": 2.238276481628418, "learning_rate": 2.016613213598461e-06, "loss": 0.9049, "step": 50615 }, { "epoch": 0.6169183332723971, "grad_norm": 1.8263249397277832, "learning_rate": 2.016292495189224e-06, "loss": 0.7988, "step": 50620 }, { "epoch": 0.6169792694965449, "grad_norm": 1.8153973817825317, "learning_rate": 2.0159717767799873e-06, "loss": 0.7937, "step": 50625 }, { "epoch": 0.6170402057206927, "grad_norm": 1.74777352809906, "learning_rate": 2.0156510583707507e-06, "loss": 0.8517, "step": 50630 }, { "epoch": 0.6171011419448406, "grad_norm": 1.8708909749984741, "learning_rate": 2.015330339961514e-06, "loss": 0.8404, "step": 50635 }, { "epoch": 0.6171620781689884, "grad_norm": 2.187528371810913, "learning_rate": 2.015009621552277e-06, "loss": 0.8058, "step": 50640 }, { "epoch": 0.6172230143931361, "grad_norm": 1.841400146484375, "learning_rate": 2.0146889031430406e-06, "loss": 0.885, "step": 50645 }, { "epoch": 0.6172839506172839, "grad_norm": 1.6963982582092285, "learning_rate": 2.014368184733804e-06, "loss": 0.8239, "step": 50650 }, { "epoch": 0.6173448868414317, "grad_norm": 1.7327089309692383, "learning_rate": 2.014047466324567e-06, "loss": 0.8413, "step": 50655 }, { "epoch": 0.6174058230655796, "grad_norm": 1.9889421463012695, "learning_rate": 2.0137267479153305e-06, "loss": 0.8387, "step": 50660 }, { "epoch": 0.6174667592897274, "grad_norm": 1.7826231718063354, "learning_rate": 2.013406029506094e-06, "loss": 0.7868, "step": 50665 }, { "epoch": 0.6175276955138752, "grad_norm": 1.8771321773529053, "learning_rate": 2.013085311096857e-06, "loss": 0.8517, "step": 50670 }, { "epoch": 0.617588631738023, "grad_norm": 2.1398215293884277, "learning_rate": 2.0127645926876204e-06, "loss": 0.9226, "step": 50675 }, { "epoch": 0.6176495679621707, "grad_norm": 1.9440622329711914, "learning_rate": 2.0124438742783834e-06, "loss": 0.8358, "step": 50680 }, { "epoch": 0.6177105041863186, "grad_norm": 1.6856553554534912, "learning_rate": 2.012123155869147e-06, "loss": 0.9017, "step": 50685 }, { "epoch": 0.6177714404104664, "grad_norm": 1.75260329246521, "learning_rate": 2.0118024374599103e-06, "loss": 0.8321, "step": 50690 }, { "epoch": 0.6178323766346142, "grad_norm": 2.005396604537964, "learning_rate": 2.0114817190506737e-06, "loss": 0.9, "step": 50695 }, { "epoch": 0.617893312858762, "grad_norm": 2.4212772846221924, "learning_rate": 2.011161000641437e-06, "loss": 0.7817, "step": 50700 }, { "epoch": 0.6179542490829099, "grad_norm": 2.2713053226470947, "learning_rate": 2.0108402822322e-06, "loss": 0.8278, "step": 50705 }, { "epoch": 0.6180151853070577, "grad_norm": 2.161281108856201, "learning_rate": 2.0105195638229636e-06, "loss": 0.8043, "step": 50710 }, { "epoch": 0.6180761215312054, "grad_norm": 2.246851682662964, "learning_rate": 2.010198845413727e-06, "loss": 0.8189, "step": 50715 }, { "epoch": 0.6181370577553532, "grad_norm": 1.7983394861221313, "learning_rate": 2.00987812700449e-06, "loss": 0.8652, "step": 50720 }, { "epoch": 0.618197993979501, "grad_norm": 2.2053844928741455, "learning_rate": 2.0095574085952535e-06, "loss": 0.8385, "step": 50725 }, { "epoch": 0.6182589302036489, "grad_norm": 2.385607957839966, "learning_rate": 2.009236690186017e-06, "loss": 0.8359, "step": 50730 }, { "epoch": 0.6183198664277967, "grad_norm": 2.2393953800201416, "learning_rate": 2.00891597177678e-06, "loss": 0.7851, "step": 50735 }, { "epoch": 0.6183808026519445, "grad_norm": 2.1215553283691406, "learning_rate": 2.0085952533675434e-06, "loss": 0.8106, "step": 50740 }, { "epoch": 0.6184417388760923, "grad_norm": 1.9315088987350464, "learning_rate": 2.008274534958307e-06, "loss": 0.7804, "step": 50745 }, { "epoch": 0.61850267510024, "grad_norm": 1.6743334531784058, "learning_rate": 2.00795381654907e-06, "loss": 0.7969, "step": 50750 }, { "epoch": 0.6185636113243879, "grad_norm": 2.941206693649292, "learning_rate": 2.0076330981398333e-06, "loss": 0.8149, "step": 50755 }, { "epoch": 0.6186245475485357, "grad_norm": 2.0326058864593506, "learning_rate": 2.0073123797305968e-06, "loss": 0.8121, "step": 50760 }, { "epoch": 0.6186854837726835, "grad_norm": 1.7356318235397339, "learning_rate": 2.00699166132136e-06, "loss": 0.7806, "step": 50765 }, { "epoch": 0.6187464199968313, "grad_norm": 2.2980008125305176, "learning_rate": 2.0066709429121232e-06, "loss": 0.8367, "step": 50770 }, { "epoch": 0.6188073562209792, "grad_norm": 2.012634754180908, "learning_rate": 2.0063502245028867e-06, "loss": 0.8992, "step": 50775 }, { "epoch": 0.618868292445127, "grad_norm": 1.9546339511871338, "learning_rate": 2.00602950609365e-06, "loss": 0.7656, "step": 50780 }, { "epoch": 0.6189292286692747, "grad_norm": 2.2709503173828125, "learning_rate": 2.0057087876844136e-06, "loss": 0.8357, "step": 50785 }, { "epoch": 0.6189901648934225, "grad_norm": 1.8264821767807007, "learning_rate": 2.0053880692751766e-06, "loss": 0.8435, "step": 50790 }, { "epoch": 0.6190511011175703, "grad_norm": 2.0622470378875732, "learning_rate": 2.00506735086594e-06, "loss": 0.7961, "step": 50795 }, { "epoch": 0.6191120373417182, "grad_norm": 1.7744354009628296, "learning_rate": 2.0047466324567034e-06, "loss": 0.8205, "step": 50800 }, { "epoch": 0.619172973565866, "grad_norm": 2.0331125259399414, "learning_rate": 2.0044259140474665e-06, "loss": 0.7575, "step": 50805 }, { "epoch": 0.6192339097900138, "grad_norm": 1.8285077810287476, "learning_rate": 2.00410519563823e-06, "loss": 0.8525, "step": 50810 }, { "epoch": 0.6192948460141616, "grad_norm": 2.7037525177001953, "learning_rate": 2.003784477228993e-06, "loss": 0.8606, "step": 50815 }, { "epoch": 0.6193557822383093, "grad_norm": 1.9998477697372437, "learning_rate": 2.0034637588197564e-06, "loss": 0.8539, "step": 50820 }, { "epoch": 0.6194167184624572, "grad_norm": 2.2479472160339355, "learning_rate": 2.00314304041052e-06, "loss": 0.8506, "step": 50825 }, { "epoch": 0.619477654686605, "grad_norm": 1.9125471115112305, "learning_rate": 2.002822322001283e-06, "loss": 0.852, "step": 50830 }, { "epoch": 0.6195385909107528, "grad_norm": 1.9825698137283325, "learning_rate": 2.0025016035920463e-06, "loss": 0.7867, "step": 50835 }, { "epoch": 0.6195995271349006, "grad_norm": 1.8365697860717773, "learning_rate": 2.0021808851828097e-06, "loss": 0.8499, "step": 50840 }, { "epoch": 0.6196604633590485, "grad_norm": 1.861578106880188, "learning_rate": 2.0018601667735727e-06, "loss": 0.7882, "step": 50845 }, { "epoch": 0.6197213995831963, "grad_norm": 1.7724937200546265, "learning_rate": 2.001539448364336e-06, "loss": 0.8224, "step": 50850 }, { "epoch": 0.619782335807344, "grad_norm": 1.9207149744033813, "learning_rate": 2.0012187299550996e-06, "loss": 0.722, "step": 50855 }, { "epoch": 0.6198432720314918, "grad_norm": 2.0099785327911377, "learning_rate": 2.000898011545863e-06, "loss": 0.8741, "step": 50860 }, { "epoch": 0.6199042082556396, "grad_norm": 1.938179612159729, "learning_rate": 2.0005772931366265e-06, "loss": 0.8058, "step": 50865 }, { "epoch": 0.6199651444797875, "grad_norm": 1.9106754064559937, "learning_rate": 2.0002565747273895e-06, "loss": 0.8201, "step": 50870 }, { "epoch": 0.6200260807039353, "grad_norm": 1.8326733112335205, "learning_rate": 1.999935856318153e-06, "loss": 0.8093, "step": 50875 }, { "epoch": 0.6200870169280831, "grad_norm": 2.0575811862945557, "learning_rate": 1.9996151379089164e-06, "loss": 0.8012, "step": 50880 }, { "epoch": 0.6201479531522309, "grad_norm": 1.7884505987167358, "learning_rate": 1.9992944194996794e-06, "loss": 0.7982, "step": 50885 }, { "epoch": 0.6202088893763786, "grad_norm": 2.117253303527832, "learning_rate": 1.998973701090443e-06, "loss": 0.8447, "step": 50890 }, { "epoch": 0.6202698256005265, "grad_norm": 1.8005552291870117, "learning_rate": 1.998652982681206e-06, "loss": 0.8721, "step": 50895 }, { "epoch": 0.6203307618246743, "grad_norm": 2.299730062484741, "learning_rate": 1.9983322642719693e-06, "loss": 0.8057, "step": 50900 }, { "epoch": 0.6203916980488221, "grad_norm": 2.0807337760925293, "learning_rate": 1.9980115458627327e-06, "loss": 0.8511, "step": 50905 }, { "epoch": 0.6204526342729699, "grad_norm": 2.0995140075683594, "learning_rate": 1.9976908274534957e-06, "loss": 0.8602, "step": 50910 }, { "epoch": 0.6205135704971178, "grad_norm": 1.9068934917449951, "learning_rate": 1.997370109044259e-06, "loss": 0.8343, "step": 50915 }, { "epoch": 0.6205745067212656, "grad_norm": 1.6359624862670898, "learning_rate": 1.9970493906350226e-06, "loss": 0.8365, "step": 50920 }, { "epoch": 0.6206354429454133, "grad_norm": 1.854665756225586, "learning_rate": 1.996728672225786e-06, "loss": 0.7467, "step": 50925 }, { "epoch": 0.6206963791695611, "grad_norm": 1.9470980167388916, "learning_rate": 1.996407953816549e-06, "loss": 0.8599, "step": 50930 }, { "epoch": 0.6207573153937089, "grad_norm": 1.8791033029556274, "learning_rate": 1.9960872354073125e-06, "loss": 0.8069, "step": 50935 }, { "epoch": 0.6208182516178568, "grad_norm": 2.115366220474243, "learning_rate": 1.995766516998076e-06, "loss": 0.8115, "step": 50940 }, { "epoch": 0.6208791878420046, "grad_norm": 2.468484401702881, "learning_rate": 1.9954457985888394e-06, "loss": 0.8277, "step": 50945 }, { "epoch": 0.6209401240661524, "grad_norm": 1.486653447151184, "learning_rate": 1.9951250801796024e-06, "loss": 0.8004, "step": 50950 }, { "epoch": 0.6210010602903002, "grad_norm": 2.120816469192505, "learning_rate": 1.994804361770366e-06, "loss": 0.8292, "step": 50955 }, { "epoch": 0.6210619965144479, "grad_norm": 2.2050788402557373, "learning_rate": 1.9944836433611293e-06, "loss": 0.8621, "step": 50960 }, { "epoch": 0.6211229327385958, "grad_norm": 1.8770794868469238, "learning_rate": 1.9941629249518923e-06, "loss": 0.8541, "step": 50965 }, { "epoch": 0.6211838689627436, "grad_norm": 2.1856842041015625, "learning_rate": 1.9938422065426558e-06, "loss": 0.8101, "step": 50970 }, { "epoch": 0.6212448051868914, "grad_norm": 1.7639633417129517, "learning_rate": 1.9935214881334188e-06, "loss": 0.792, "step": 50975 }, { "epoch": 0.6213057414110392, "grad_norm": 1.8246773481369019, "learning_rate": 1.9932007697241822e-06, "loss": 0.8128, "step": 50980 }, { "epoch": 0.621366677635187, "grad_norm": 2.0752933025360107, "learning_rate": 1.9928800513149457e-06, "loss": 0.8291, "step": 50985 }, { "epoch": 0.6214276138593349, "grad_norm": 1.7598967552185059, "learning_rate": 1.9925593329057087e-06, "loss": 0.8829, "step": 50990 }, { "epoch": 0.6214885500834826, "grad_norm": 1.7472370862960815, "learning_rate": 1.992238614496472e-06, "loss": 0.8036, "step": 50995 }, { "epoch": 0.6215494863076304, "grad_norm": 1.998862624168396, "learning_rate": 1.9919178960872356e-06, "loss": 0.9064, "step": 51000 }, { "epoch": 0.6216104225317782, "grad_norm": 1.860283613204956, "learning_rate": 1.991597177677999e-06, "loss": 0.8406, "step": 51005 }, { "epoch": 0.621671358755926, "grad_norm": 2.2351126670837402, "learning_rate": 1.9912764592687624e-06, "loss": 0.8424, "step": 51010 }, { "epoch": 0.6217322949800739, "grad_norm": 1.79080331325531, "learning_rate": 1.9909557408595255e-06, "loss": 0.8258, "step": 51015 }, { "epoch": 0.6217932312042217, "grad_norm": 1.41278076171875, "learning_rate": 1.990635022450289e-06, "loss": 0.846, "step": 51020 }, { "epoch": 0.6218541674283695, "grad_norm": 1.9600050449371338, "learning_rate": 1.9903143040410523e-06, "loss": 0.8384, "step": 51025 }, { "epoch": 0.6219151036525172, "grad_norm": 1.9890750646591187, "learning_rate": 1.9899935856318154e-06, "loss": 0.8186, "step": 51030 }, { "epoch": 0.621976039876665, "grad_norm": 1.9920533895492554, "learning_rate": 1.989672867222579e-06, "loss": 0.883, "step": 51035 }, { "epoch": 0.6220369761008129, "grad_norm": 2.2572109699249268, "learning_rate": 1.9893521488133422e-06, "loss": 0.8521, "step": 51040 }, { "epoch": 0.6220979123249607, "grad_norm": 1.8539507389068604, "learning_rate": 1.9890314304041052e-06, "loss": 0.8507, "step": 51045 }, { "epoch": 0.6221588485491085, "grad_norm": 1.8901512622833252, "learning_rate": 1.9887107119948687e-06, "loss": 0.7354, "step": 51050 }, { "epoch": 0.6222197847732563, "grad_norm": 2.3321053981781006, "learning_rate": 1.9883899935856317e-06, "loss": 0.8738, "step": 51055 }, { "epoch": 0.6222807209974042, "grad_norm": 1.9771490097045898, "learning_rate": 1.988069275176395e-06, "loss": 0.8455, "step": 51060 }, { "epoch": 0.6223416572215519, "grad_norm": 2.244605541229248, "learning_rate": 1.9877485567671586e-06, "loss": 0.8445, "step": 51065 }, { "epoch": 0.6224025934456997, "grad_norm": 1.7431639432907104, "learning_rate": 1.9874278383579216e-06, "loss": 0.7786, "step": 51070 }, { "epoch": 0.6224635296698475, "grad_norm": 2.072036027908325, "learning_rate": 1.987107119948685e-06, "loss": 0.8373, "step": 51075 }, { "epoch": 0.6225244658939953, "grad_norm": 1.7186920642852783, "learning_rate": 1.9867864015394485e-06, "loss": 0.8195, "step": 51080 }, { "epoch": 0.6225854021181432, "grad_norm": 2.3483145236968994, "learning_rate": 1.986465683130212e-06, "loss": 0.8427, "step": 51085 }, { "epoch": 0.622646338342291, "grad_norm": 2.2222537994384766, "learning_rate": 1.9861449647209754e-06, "loss": 0.8439, "step": 51090 }, { "epoch": 0.6227072745664388, "grad_norm": 2.548513650894165, "learning_rate": 1.9858242463117384e-06, "loss": 0.7856, "step": 51095 }, { "epoch": 0.6227682107905865, "grad_norm": 1.7688965797424316, "learning_rate": 1.985503527902502e-06, "loss": 0.8342, "step": 51100 }, { "epoch": 0.6228291470147344, "grad_norm": 2.198469400405884, "learning_rate": 1.9851828094932653e-06, "loss": 0.8615, "step": 51105 }, { "epoch": 0.6228900832388822, "grad_norm": 1.7574316263198853, "learning_rate": 1.9848620910840283e-06, "loss": 0.842, "step": 51110 }, { "epoch": 0.62295101946303, "grad_norm": 2.2856040000915527, "learning_rate": 1.9845413726747917e-06, "loss": 0.797, "step": 51115 }, { "epoch": 0.6230119556871778, "grad_norm": 1.920756459236145, "learning_rate": 1.984220654265555e-06, "loss": 0.8362, "step": 51120 }, { "epoch": 0.6230728919113256, "grad_norm": 2.6716766357421875, "learning_rate": 1.983899935856318e-06, "loss": 0.9479, "step": 51125 }, { "epoch": 0.6231338281354735, "grad_norm": 1.8429325819015503, "learning_rate": 1.9835792174470816e-06, "loss": 0.8425, "step": 51130 }, { "epoch": 0.6231947643596212, "grad_norm": 2.3658535480499268, "learning_rate": 1.983258499037845e-06, "loss": 0.8347, "step": 51135 }, { "epoch": 0.623255700583769, "grad_norm": 2.6323390007019043, "learning_rate": 1.982937780628608e-06, "loss": 0.8229, "step": 51140 }, { "epoch": 0.6233166368079168, "grad_norm": 2.060213088989258, "learning_rate": 1.9826170622193715e-06, "loss": 0.8031, "step": 51145 }, { "epoch": 0.6233775730320646, "grad_norm": 1.9100370407104492, "learning_rate": 1.982296343810135e-06, "loss": 0.7961, "step": 51150 }, { "epoch": 0.6234385092562125, "grad_norm": 2.2634639739990234, "learning_rate": 1.981975625400898e-06, "loss": 0.8115, "step": 51155 }, { "epoch": 0.6234994454803603, "grad_norm": 1.6874241828918457, "learning_rate": 1.9816549069916614e-06, "loss": 0.825, "step": 51160 }, { "epoch": 0.6235603817045081, "grad_norm": 2.850353240966797, "learning_rate": 1.981334188582425e-06, "loss": 0.8232, "step": 51165 }, { "epoch": 0.6236213179286558, "grad_norm": 2.0565428733825684, "learning_rate": 1.9810134701731883e-06, "loss": 0.8653, "step": 51170 }, { "epoch": 0.6236822541528037, "grad_norm": 1.9527826309204102, "learning_rate": 1.9806927517639517e-06, "loss": 0.8692, "step": 51175 }, { "epoch": 0.6237431903769515, "grad_norm": 1.6712111234664917, "learning_rate": 1.9803720333547147e-06, "loss": 0.8019, "step": 51180 }, { "epoch": 0.6238041266010993, "grad_norm": 1.7505428791046143, "learning_rate": 1.980051314945478e-06, "loss": 0.7722, "step": 51185 }, { "epoch": 0.6238650628252471, "grad_norm": 2.040759801864624, "learning_rate": 1.979730596536241e-06, "loss": 0.8465, "step": 51190 }, { "epoch": 0.6239259990493949, "grad_norm": 1.8661400079727173, "learning_rate": 1.9794098781270046e-06, "loss": 0.8655, "step": 51195 }, { "epoch": 0.6239869352735428, "grad_norm": 1.899789571762085, "learning_rate": 1.979089159717768e-06, "loss": 0.8028, "step": 51200 }, { "epoch": 0.6240478714976905, "grad_norm": 2.200867176055908, "learning_rate": 1.978768441308531e-06, "loss": 0.8472, "step": 51205 }, { "epoch": 0.6241088077218383, "grad_norm": 1.9932864904403687, "learning_rate": 1.9784477228992945e-06, "loss": 0.8496, "step": 51210 }, { "epoch": 0.6241697439459861, "grad_norm": 1.924368143081665, "learning_rate": 1.978127004490058e-06, "loss": 0.829, "step": 51215 }, { "epoch": 0.624230680170134, "grad_norm": 2.187704563140869, "learning_rate": 1.977806286080821e-06, "loss": 0.8877, "step": 51220 }, { "epoch": 0.6242916163942818, "grad_norm": 2.3294973373413086, "learning_rate": 1.9774855676715844e-06, "loss": 0.8138, "step": 51225 }, { "epoch": 0.6243525526184296, "grad_norm": 1.877184510231018, "learning_rate": 1.977164849262348e-06, "loss": 0.7809, "step": 51230 }, { "epoch": 0.6244134888425774, "grad_norm": 2.4483535289764404, "learning_rate": 1.9768441308531113e-06, "loss": 0.7834, "step": 51235 }, { "epoch": 0.6244744250667251, "grad_norm": 2.142406702041626, "learning_rate": 1.9765234124438743e-06, "loss": 0.7692, "step": 51240 }, { "epoch": 0.624535361290873, "grad_norm": 1.9393163919448853, "learning_rate": 1.9762026940346378e-06, "loss": 0.8445, "step": 51245 }, { "epoch": 0.6245962975150208, "grad_norm": 2.2288429737091064, "learning_rate": 1.9758819756254012e-06, "loss": 0.794, "step": 51250 }, { "epoch": 0.6246572337391686, "grad_norm": 1.918976068496704, "learning_rate": 1.9755612572161647e-06, "loss": 0.8186, "step": 51255 }, { "epoch": 0.6247181699633164, "grad_norm": 1.7705940008163452, "learning_rate": 1.9752405388069277e-06, "loss": 0.8053, "step": 51260 }, { "epoch": 0.6247791061874642, "grad_norm": 2.0365686416625977, "learning_rate": 1.974919820397691e-06, "loss": 0.8187, "step": 51265 }, { "epoch": 0.6248400424116121, "grad_norm": 1.868468165397644, "learning_rate": 1.974599101988454e-06, "loss": 0.808, "step": 51270 }, { "epoch": 0.6249009786357598, "grad_norm": 2.6496949195861816, "learning_rate": 1.9742783835792176e-06, "loss": 0.8443, "step": 51275 }, { "epoch": 0.6249619148599076, "grad_norm": 2.0807933807373047, "learning_rate": 1.973957665169981e-06, "loss": 0.8648, "step": 51280 }, { "epoch": 0.6250228510840554, "grad_norm": 2.063350200653076, "learning_rate": 1.973636946760744e-06, "loss": 0.8211, "step": 51285 }, { "epoch": 0.6250837873082032, "grad_norm": 1.832335352897644, "learning_rate": 1.9733162283515075e-06, "loss": 0.8887, "step": 51290 }, { "epoch": 0.6251447235323511, "grad_norm": 1.6259499788284302, "learning_rate": 1.972995509942271e-06, "loss": 0.8035, "step": 51295 }, { "epoch": 0.6252056597564989, "grad_norm": 2.6713523864746094, "learning_rate": 1.972674791533034e-06, "loss": 0.8095, "step": 51300 }, { "epoch": 0.6252665959806466, "grad_norm": 2.235734224319458, "learning_rate": 1.9723540731237974e-06, "loss": 0.8592, "step": 51305 }, { "epoch": 0.6253275322047944, "grad_norm": 1.9074554443359375, "learning_rate": 1.972033354714561e-06, "loss": 0.8133, "step": 51310 }, { "epoch": 0.6253884684289422, "grad_norm": 1.7642250061035156, "learning_rate": 1.9717126363053242e-06, "loss": 0.782, "step": 51315 }, { "epoch": 0.6254494046530901, "grad_norm": 2.0445590019226074, "learning_rate": 1.9713919178960873e-06, "loss": 0.8484, "step": 51320 }, { "epoch": 0.6255103408772379, "grad_norm": 2.2089974880218506, "learning_rate": 1.9710711994868507e-06, "loss": 0.9062, "step": 51325 }, { "epoch": 0.6255712771013857, "grad_norm": 2.0125365257263184, "learning_rate": 1.970750481077614e-06, "loss": 0.7397, "step": 51330 }, { "epoch": 0.6256322133255335, "grad_norm": 1.8794082403182983, "learning_rate": 1.9704297626683776e-06, "loss": 0.8207, "step": 51335 }, { "epoch": 0.6256931495496812, "grad_norm": 1.8533222675323486, "learning_rate": 1.9701090442591406e-06, "loss": 0.7193, "step": 51340 }, { "epoch": 0.6257540857738291, "grad_norm": 1.8955286741256714, "learning_rate": 1.969788325849904e-06, "loss": 0.8471, "step": 51345 }, { "epoch": 0.6258150219979769, "grad_norm": 1.6541059017181396, "learning_rate": 1.969467607440667e-06, "loss": 0.859, "step": 51350 }, { "epoch": 0.6258759582221247, "grad_norm": 2.258646249771118, "learning_rate": 1.9691468890314305e-06, "loss": 0.7623, "step": 51355 }, { "epoch": 0.6259368944462725, "grad_norm": 1.898430585861206, "learning_rate": 1.968826170622194e-06, "loss": 0.8409, "step": 51360 }, { "epoch": 0.6259978306704204, "grad_norm": 2.268625020980835, "learning_rate": 1.968505452212957e-06, "loss": 0.7822, "step": 51365 }, { "epoch": 0.6260587668945682, "grad_norm": 1.6350680589675903, "learning_rate": 1.9681847338037204e-06, "loss": 0.8142, "step": 51370 }, { "epoch": 0.6261197031187159, "grad_norm": 1.8667877912521362, "learning_rate": 1.967864015394484e-06, "loss": 0.8423, "step": 51375 }, { "epoch": 0.6261806393428637, "grad_norm": 2.1594181060791016, "learning_rate": 1.967543296985247e-06, "loss": 0.8246, "step": 51380 }, { "epoch": 0.6262415755670115, "grad_norm": 1.9479906558990479, "learning_rate": 1.9672225785760103e-06, "loss": 0.8286, "step": 51385 }, { "epoch": 0.6263025117911594, "grad_norm": 1.8273522853851318, "learning_rate": 1.9669018601667737e-06, "loss": 0.8238, "step": 51390 }, { "epoch": 0.6263634480153072, "grad_norm": 1.9161262512207031, "learning_rate": 1.966581141757537e-06, "loss": 0.8249, "step": 51395 }, { "epoch": 0.626424384239455, "grad_norm": 2.011528968811035, "learning_rate": 1.9662604233483006e-06, "loss": 0.8222, "step": 51400 }, { "epoch": 0.6264853204636028, "grad_norm": 1.925941824913025, "learning_rate": 1.9659397049390636e-06, "loss": 0.7963, "step": 51405 }, { "epoch": 0.6265462566877505, "grad_norm": 2.1274328231811523, "learning_rate": 1.965618986529827e-06, "loss": 0.8567, "step": 51410 }, { "epoch": 0.6266071929118984, "grad_norm": 2.2022454738616943, "learning_rate": 1.9652982681205905e-06, "loss": 0.7796, "step": 51415 }, { "epoch": 0.6266681291360462, "grad_norm": 1.9425522089004517, "learning_rate": 1.9649775497113535e-06, "loss": 0.8005, "step": 51420 }, { "epoch": 0.626729065360194, "grad_norm": 1.8051022291183472, "learning_rate": 1.964656831302117e-06, "loss": 0.8675, "step": 51425 }, { "epoch": 0.6267900015843418, "grad_norm": 1.8739224672317505, "learning_rate": 1.9643361128928804e-06, "loss": 0.8894, "step": 51430 }, { "epoch": 0.6268509378084897, "grad_norm": 2.4584641456604004, "learning_rate": 1.9640153944836434e-06, "loss": 0.828, "step": 51435 }, { "epoch": 0.6269118740326375, "grad_norm": 1.6420023441314697, "learning_rate": 1.963694676074407e-06, "loss": 0.7561, "step": 51440 }, { "epoch": 0.6269728102567852, "grad_norm": 1.7413684129714966, "learning_rate": 1.96337395766517e-06, "loss": 0.8338, "step": 51445 }, { "epoch": 0.627033746480933, "grad_norm": 1.8502739667892456, "learning_rate": 1.9630532392559333e-06, "loss": 0.8257, "step": 51450 }, { "epoch": 0.6270946827050808, "grad_norm": 1.9592515230178833, "learning_rate": 1.9627325208466968e-06, "loss": 0.8334, "step": 51455 }, { "epoch": 0.6271556189292287, "grad_norm": 1.936850666999817, "learning_rate": 1.9624118024374598e-06, "loss": 0.7962, "step": 51460 }, { "epoch": 0.6272165551533765, "grad_norm": 1.8870481252670288, "learning_rate": 1.9620910840282232e-06, "loss": 0.8265, "step": 51465 }, { "epoch": 0.6272774913775243, "grad_norm": 2.0245769023895264, "learning_rate": 1.9617703656189867e-06, "loss": 0.8655, "step": 51470 }, { "epoch": 0.6273384276016721, "grad_norm": 2.0084481239318848, "learning_rate": 1.96144964720975e-06, "loss": 0.8789, "step": 51475 }, { "epoch": 0.6273993638258198, "grad_norm": 2.022221088409424, "learning_rate": 1.9611289288005135e-06, "loss": 0.8691, "step": 51480 }, { "epoch": 0.6274603000499677, "grad_norm": 1.84248948097229, "learning_rate": 1.9608082103912766e-06, "loss": 0.796, "step": 51485 }, { "epoch": 0.6275212362741155, "grad_norm": 2.0622119903564453, "learning_rate": 1.96048749198204e-06, "loss": 0.8457, "step": 51490 }, { "epoch": 0.6275821724982633, "grad_norm": 2.1101672649383545, "learning_rate": 1.9601667735728034e-06, "loss": 0.8007, "step": 51495 }, { "epoch": 0.6276431087224111, "grad_norm": 1.8939887285232544, "learning_rate": 1.9598460551635665e-06, "loss": 0.8227, "step": 51500 }, { "epoch": 0.627704044946559, "grad_norm": 1.9976756572723389, "learning_rate": 1.95952533675433e-06, "loss": 0.8356, "step": 51505 }, { "epoch": 0.6277649811707068, "grad_norm": 1.8618085384368896, "learning_rate": 1.9592046183450933e-06, "loss": 0.8504, "step": 51510 }, { "epoch": 0.6278259173948545, "grad_norm": 2.0355937480926514, "learning_rate": 1.9588838999358564e-06, "loss": 0.7746, "step": 51515 }, { "epoch": 0.6278868536190023, "grad_norm": 2.578237295150757, "learning_rate": 1.95856318152662e-06, "loss": 0.8139, "step": 51520 }, { "epoch": 0.6279477898431501, "grad_norm": 2.863818645477295, "learning_rate": 1.958242463117383e-06, "loss": 0.8691, "step": 51525 }, { "epoch": 0.628008726067298, "grad_norm": 2.996999502182007, "learning_rate": 1.9579217447081463e-06, "loss": 0.8184, "step": 51530 }, { "epoch": 0.6280696622914458, "grad_norm": 2.050877332687378, "learning_rate": 1.9576010262989097e-06, "loss": 0.8318, "step": 51535 }, { "epoch": 0.6281305985155936, "grad_norm": 2.0480833053588867, "learning_rate": 1.957280307889673e-06, "loss": 0.8331, "step": 51540 }, { "epoch": 0.6281915347397414, "grad_norm": 1.6917060613632202, "learning_rate": 1.956959589480436e-06, "loss": 0.8768, "step": 51545 }, { "epoch": 0.6282524709638891, "grad_norm": 1.7978016138076782, "learning_rate": 1.9566388710711996e-06, "loss": 0.7579, "step": 51550 }, { "epoch": 0.628313407188037, "grad_norm": 1.8995317220687866, "learning_rate": 1.956318152661963e-06, "loss": 0.8739, "step": 51555 }, { "epoch": 0.6283743434121848, "grad_norm": 1.8983250856399536, "learning_rate": 1.9559974342527265e-06, "loss": 0.8713, "step": 51560 }, { "epoch": 0.6284352796363326, "grad_norm": 1.8720444440841675, "learning_rate": 1.9556767158434895e-06, "loss": 0.8826, "step": 51565 }, { "epoch": 0.6284962158604804, "grad_norm": 2.1929562091827393, "learning_rate": 1.955355997434253e-06, "loss": 0.8367, "step": 51570 }, { "epoch": 0.6285571520846283, "grad_norm": 1.6938374042510986, "learning_rate": 1.9550352790250164e-06, "loss": 0.7911, "step": 51575 }, { "epoch": 0.6286180883087761, "grad_norm": 1.8963309526443481, "learning_rate": 1.9547145606157794e-06, "loss": 0.8305, "step": 51580 }, { "epoch": 0.6286790245329238, "grad_norm": 1.9909603595733643, "learning_rate": 1.954393842206543e-06, "loss": 0.8416, "step": 51585 }, { "epoch": 0.6287399607570716, "grad_norm": 1.8541589975357056, "learning_rate": 1.9540731237973063e-06, "loss": 0.7432, "step": 51590 }, { "epoch": 0.6288008969812194, "grad_norm": 2.129608392715454, "learning_rate": 1.9537524053880693e-06, "loss": 0.7198, "step": 51595 }, { "epoch": 0.6288618332053673, "grad_norm": 2.0524723529815674, "learning_rate": 1.9534316869788327e-06, "loss": 0.9, "step": 51600 }, { "epoch": 0.6289227694295151, "grad_norm": 1.8387141227722168, "learning_rate": 1.9531109685695957e-06, "loss": 0.8344, "step": 51605 }, { "epoch": 0.6289837056536629, "grad_norm": 1.9146469831466675, "learning_rate": 1.952790250160359e-06, "loss": 0.7802, "step": 51610 }, { "epoch": 0.6290446418778107, "grad_norm": 1.9138708114624023, "learning_rate": 1.9524695317511226e-06, "loss": 0.8592, "step": 51615 }, { "epoch": 0.6291055781019584, "grad_norm": 2.297290802001953, "learning_rate": 1.952148813341886e-06, "loss": 0.8196, "step": 51620 }, { "epoch": 0.6291665143261063, "grad_norm": 2.308434247970581, "learning_rate": 1.9518280949326495e-06, "loss": 0.8793, "step": 51625 }, { "epoch": 0.6292274505502541, "grad_norm": 1.8889267444610596, "learning_rate": 1.9515073765234125e-06, "loss": 0.8723, "step": 51630 }, { "epoch": 0.6292883867744019, "grad_norm": 2.3535430431365967, "learning_rate": 1.951186658114176e-06, "loss": 0.879, "step": 51635 }, { "epoch": 0.6293493229985497, "grad_norm": 2.0190186500549316, "learning_rate": 1.9508659397049394e-06, "loss": 0.838, "step": 51640 }, { "epoch": 0.6294102592226976, "grad_norm": 1.7609751224517822, "learning_rate": 1.9505452212957024e-06, "loss": 0.8272, "step": 51645 }, { "epoch": 0.6294711954468454, "grad_norm": 1.9926468133926392, "learning_rate": 1.950224502886466e-06, "loss": 0.8187, "step": 51650 }, { "epoch": 0.6295321316709931, "grad_norm": 2.1260874271392822, "learning_rate": 1.9499037844772293e-06, "loss": 0.8525, "step": 51655 }, { "epoch": 0.6295930678951409, "grad_norm": 2.0200083255767822, "learning_rate": 1.9495830660679923e-06, "loss": 0.8061, "step": 51660 }, { "epoch": 0.6296540041192887, "grad_norm": 1.7498540878295898, "learning_rate": 1.9492623476587558e-06, "loss": 0.8289, "step": 51665 }, { "epoch": 0.6297149403434366, "grad_norm": 2.0433285236358643, "learning_rate": 1.948941629249519e-06, "loss": 0.8129, "step": 51670 }, { "epoch": 0.6297758765675844, "grad_norm": 2.309513568878174, "learning_rate": 1.948620910840282e-06, "loss": 0.8242, "step": 51675 }, { "epoch": 0.6298368127917322, "grad_norm": 1.9655194282531738, "learning_rate": 1.9483001924310456e-06, "loss": 0.7558, "step": 51680 }, { "epoch": 0.62989774901588, "grad_norm": 1.8079464435577393, "learning_rate": 1.947979474021809e-06, "loss": 0.7521, "step": 51685 }, { "epoch": 0.6299586852400277, "grad_norm": 2.133011817932129, "learning_rate": 1.947658755612572e-06, "loss": 0.7556, "step": 51690 }, { "epoch": 0.6300196214641756, "grad_norm": 1.8973414897918701, "learning_rate": 1.9473380372033355e-06, "loss": 0.7675, "step": 51695 }, { "epoch": 0.6300805576883234, "grad_norm": 2.0202605724334717, "learning_rate": 1.947017318794099e-06, "loss": 0.8202, "step": 51700 }, { "epoch": 0.6301414939124712, "grad_norm": 1.5229151248931885, "learning_rate": 1.9466966003848624e-06, "loss": 0.8432, "step": 51705 }, { "epoch": 0.630202430136619, "grad_norm": 2.061361312866211, "learning_rate": 1.946375881975626e-06, "loss": 0.916, "step": 51710 }, { "epoch": 0.6302633663607669, "grad_norm": 2.2003555297851562, "learning_rate": 1.946055163566389e-06, "loss": 0.8285, "step": 51715 }, { "epoch": 0.6303243025849147, "grad_norm": 1.9073641300201416, "learning_rate": 1.9457344451571523e-06, "loss": 0.8261, "step": 51720 }, { "epoch": 0.6303852388090624, "grad_norm": 1.98457670211792, "learning_rate": 1.9454137267479158e-06, "loss": 0.8418, "step": 51725 }, { "epoch": 0.6304461750332102, "grad_norm": 1.790048360824585, "learning_rate": 1.9450930083386788e-06, "loss": 0.8403, "step": 51730 }, { "epoch": 0.630507111257358, "grad_norm": 2.087294340133667, "learning_rate": 1.9447722899294422e-06, "loss": 0.8166, "step": 51735 }, { "epoch": 0.6305680474815059, "grad_norm": 2.001086950302124, "learning_rate": 1.9444515715202052e-06, "loss": 0.8693, "step": 51740 }, { "epoch": 0.6306289837056537, "grad_norm": 1.8120793104171753, "learning_rate": 1.9441308531109687e-06, "loss": 0.8371, "step": 51745 }, { "epoch": 0.6306899199298015, "grad_norm": 1.9354346990585327, "learning_rate": 1.943810134701732e-06, "loss": 0.8997, "step": 51750 }, { "epoch": 0.6307508561539493, "grad_norm": 1.9829899072647095, "learning_rate": 1.943489416292495e-06, "loss": 0.8121, "step": 51755 }, { "epoch": 0.630811792378097, "grad_norm": 2.241626262664795, "learning_rate": 1.9431686978832586e-06, "loss": 0.7834, "step": 51760 }, { "epoch": 0.6308727286022449, "grad_norm": 2.002779722213745, "learning_rate": 1.942847979474022e-06, "loss": 0.814, "step": 51765 }, { "epoch": 0.6309336648263927, "grad_norm": 2.216034412384033, "learning_rate": 1.942527261064785e-06, "loss": 0.7851, "step": 51770 }, { "epoch": 0.6309946010505405, "grad_norm": 1.9557416439056396, "learning_rate": 1.9422065426555485e-06, "loss": 0.7662, "step": 51775 }, { "epoch": 0.6310555372746883, "grad_norm": 2.047616958618164, "learning_rate": 1.941885824246312e-06, "loss": 0.8656, "step": 51780 }, { "epoch": 0.6311164734988362, "grad_norm": 2.306126594543457, "learning_rate": 1.9415651058370754e-06, "loss": 0.8229, "step": 51785 }, { "epoch": 0.631177409722984, "grad_norm": 1.694880723953247, "learning_rate": 1.941244387427839e-06, "loss": 0.8407, "step": 51790 }, { "epoch": 0.6312383459471317, "grad_norm": 1.840059757232666, "learning_rate": 1.940923669018602e-06, "loss": 0.7526, "step": 51795 }, { "epoch": 0.6312992821712795, "grad_norm": 2.1336824893951416, "learning_rate": 1.9406029506093653e-06, "loss": 0.8492, "step": 51800 }, { "epoch": 0.6313602183954273, "grad_norm": 1.7657335996627808, "learning_rate": 1.9402822322001287e-06, "loss": 0.8095, "step": 51805 }, { "epoch": 0.6314211546195752, "grad_norm": 1.9530794620513916, "learning_rate": 1.9399615137908917e-06, "loss": 0.7993, "step": 51810 }, { "epoch": 0.631482090843723, "grad_norm": 1.8461604118347168, "learning_rate": 1.939640795381655e-06, "loss": 0.7964, "step": 51815 }, { "epoch": 0.6315430270678708, "grad_norm": 2.09628963470459, "learning_rate": 1.939320076972418e-06, "loss": 0.8459, "step": 51820 }, { "epoch": 0.6316039632920186, "grad_norm": 1.8125083446502686, "learning_rate": 1.9389993585631816e-06, "loss": 0.822, "step": 51825 }, { "epoch": 0.6316648995161663, "grad_norm": 2.104304313659668, "learning_rate": 1.938678640153945e-06, "loss": 0.8399, "step": 51830 }, { "epoch": 0.6317258357403142, "grad_norm": 2.438786506652832, "learning_rate": 1.938357921744708e-06, "loss": 0.874, "step": 51835 }, { "epoch": 0.631786771964462, "grad_norm": 1.9374587535858154, "learning_rate": 1.9380372033354715e-06, "loss": 0.8793, "step": 51840 }, { "epoch": 0.6318477081886098, "grad_norm": 2.1439313888549805, "learning_rate": 1.937716484926235e-06, "loss": 0.7913, "step": 51845 }, { "epoch": 0.6319086444127576, "grad_norm": 2.215282440185547, "learning_rate": 1.9373957665169984e-06, "loss": 0.8352, "step": 51850 }, { "epoch": 0.6319695806369054, "grad_norm": 2.3395004272460938, "learning_rate": 1.9370750481077614e-06, "loss": 0.8186, "step": 51855 }, { "epoch": 0.6320305168610533, "grad_norm": 2.017982244491577, "learning_rate": 1.936754329698525e-06, "loss": 0.7976, "step": 51860 }, { "epoch": 0.632091453085201, "grad_norm": 1.997165322303772, "learning_rate": 1.9364336112892883e-06, "loss": 0.8171, "step": 51865 }, { "epoch": 0.6321523893093488, "grad_norm": 1.8700830936431885, "learning_rate": 1.9361128928800517e-06, "loss": 0.8504, "step": 51870 }, { "epoch": 0.6322133255334966, "grad_norm": 2.0487606525421143, "learning_rate": 1.9357921744708147e-06, "loss": 0.8791, "step": 51875 }, { "epoch": 0.6322742617576445, "grad_norm": 1.9152058362960815, "learning_rate": 1.935471456061578e-06, "loss": 0.8804, "step": 51880 }, { "epoch": 0.6323351979817923, "grad_norm": 1.7639670372009277, "learning_rate": 1.9351507376523416e-06, "loss": 0.864, "step": 51885 }, { "epoch": 0.6323961342059401, "grad_norm": 2.1979076862335205, "learning_rate": 1.9348300192431046e-06, "loss": 0.8318, "step": 51890 }, { "epoch": 0.6324570704300879, "grad_norm": 1.6690928936004639, "learning_rate": 1.934509300833868e-06, "loss": 0.9329, "step": 51895 }, { "epoch": 0.6325180066542356, "grad_norm": 1.7383050918579102, "learning_rate": 1.934188582424631e-06, "loss": 0.8178, "step": 51900 }, { "epoch": 0.6325789428783835, "grad_norm": 2.1083130836486816, "learning_rate": 1.9338678640153945e-06, "loss": 0.8592, "step": 51905 }, { "epoch": 0.6326398791025313, "grad_norm": 1.746711254119873, "learning_rate": 1.933547145606158e-06, "loss": 0.8594, "step": 51910 }, { "epoch": 0.6327008153266791, "grad_norm": 2.261699676513672, "learning_rate": 1.933226427196921e-06, "loss": 0.8167, "step": 51915 }, { "epoch": 0.6327617515508269, "grad_norm": 1.9662405252456665, "learning_rate": 1.9329057087876844e-06, "loss": 0.856, "step": 51920 }, { "epoch": 0.6328226877749747, "grad_norm": 2.117143392562866, "learning_rate": 1.932584990378448e-06, "loss": 0.7969, "step": 51925 }, { "epoch": 0.6328836239991226, "grad_norm": 2.1722195148468018, "learning_rate": 1.9322642719692113e-06, "loss": 0.8123, "step": 51930 }, { "epoch": 0.6329445602232703, "grad_norm": 2.1520962715148926, "learning_rate": 1.9319435535599747e-06, "loss": 0.9343, "step": 51935 }, { "epoch": 0.6330054964474181, "grad_norm": 1.8266221284866333, "learning_rate": 1.9316228351507378e-06, "loss": 0.7987, "step": 51940 }, { "epoch": 0.6330664326715659, "grad_norm": 2.0603930950164795, "learning_rate": 1.931302116741501e-06, "loss": 0.845, "step": 51945 }, { "epoch": 0.6331273688957137, "grad_norm": 1.7718932628631592, "learning_rate": 1.9309813983322646e-06, "loss": 0.8643, "step": 51950 }, { "epoch": 0.6331883051198616, "grad_norm": 1.7662497758865356, "learning_rate": 1.9306606799230277e-06, "loss": 0.9102, "step": 51955 }, { "epoch": 0.6332492413440094, "grad_norm": 2.106703042984009, "learning_rate": 1.930339961513791e-06, "loss": 0.7842, "step": 51960 }, { "epoch": 0.6333101775681572, "grad_norm": 1.8813844919204712, "learning_rate": 1.9300192431045545e-06, "loss": 0.8226, "step": 51965 }, { "epoch": 0.6333711137923049, "grad_norm": 2.42429780960083, "learning_rate": 1.9296985246953176e-06, "loss": 0.8195, "step": 51970 }, { "epoch": 0.6334320500164528, "grad_norm": 2.005237340927124, "learning_rate": 1.929377806286081e-06, "loss": 0.8268, "step": 51975 }, { "epoch": 0.6334929862406006, "grad_norm": 1.9152573347091675, "learning_rate": 1.9290570878768444e-06, "loss": 0.7938, "step": 51980 }, { "epoch": 0.6335539224647484, "grad_norm": 2.1737687587738037, "learning_rate": 1.9287363694676075e-06, "loss": 0.8767, "step": 51985 }, { "epoch": 0.6336148586888962, "grad_norm": 1.8353990316390991, "learning_rate": 1.928415651058371e-06, "loss": 0.7153, "step": 51990 }, { "epoch": 0.633675794913044, "grad_norm": 2.2210566997528076, "learning_rate": 1.928094932649134e-06, "loss": 0.8293, "step": 51995 }, { "epoch": 0.6337367311371919, "grad_norm": 1.827378273010254, "learning_rate": 1.9277742142398974e-06, "loss": 0.7523, "step": 52000 }, { "epoch": 0.6337976673613396, "grad_norm": 1.7666698694229126, "learning_rate": 1.927453495830661e-06, "loss": 0.8308, "step": 52005 }, { "epoch": 0.6338586035854874, "grad_norm": 2.214829206466675, "learning_rate": 1.9271327774214242e-06, "loss": 0.7867, "step": 52010 }, { "epoch": 0.6339195398096352, "grad_norm": 1.657283067703247, "learning_rate": 1.9268120590121877e-06, "loss": 0.8449, "step": 52015 }, { "epoch": 0.633980476033783, "grad_norm": 1.7335635423660278, "learning_rate": 1.9264913406029507e-06, "loss": 0.811, "step": 52020 }, { "epoch": 0.6340414122579309, "grad_norm": 2.323596954345703, "learning_rate": 1.926170622193714e-06, "loss": 0.867, "step": 52025 }, { "epoch": 0.6341023484820787, "grad_norm": 2.071989059448242, "learning_rate": 1.9258499037844776e-06, "loss": 0.8551, "step": 52030 }, { "epoch": 0.6341632847062265, "grad_norm": 1.9397205114364624, "learning_rate": 1.9255291853752406e-06, "loss": 0.8639, "step": 52035 }, { "epoch": 0.6342242209303742, "grad_norm": 1.9387376308441162, "learning_rate": 1.925208466966004e-06, "loss": 0.794, "step": 52040 }, { "epoch": 0.634285157154522, "grad_norm": 1.878436803817749, "learning_rate": 1.9248877485567675e-06, "loss": 0.8189, "step": 52045 }, { "epoch": 0.6343460933786699, "grad_norm": 2.0927603244781494, "learning_rate": 1.9245670301475305e-06, "loss": 0.846, "step": 52050 }, { "epoch": 0.6344070296028177, "grad_norm": 1.8027164936065674, "learning_rate": 1.924246311738294e-06, "loss": 0.7521, "step": 52055 }, { "epoch": 0.6344679658269655, "grad_norm": 1.9369730949401855, "learning_rate": 1.9239255933290574e-06, "loss": 0.8258, "step": 52060 }, { "epoch": 0.6345289020511133, "grad_norm": 1.9714698791503906, "learning_rate": 1.9236048749198204e-06, "loss": 0.8325, "step": 52065 }, { "epoch": 0.6345898382752612, "grad_norm": 1.9271824359893799, "learning_rate": 1.923284156510584e-06, "loss": 0.8261, "step": 52070 }, { "epoch": 0.6346507744994089, "grad_norm": 1.9901624917984009, "learning_rate": 1.9229634381013473e-06, "loss": 0.8629, "step": 52075 }, { "epoch": 0.6347117107235567, "grad_norm": 2.1780645847320557, "learning_rate": 1.9226427196921103e-06, "loss": 0.8181, "step": 52080 }, { "epoch": 0.6347726469477045, "grad_norm": 2.3404359817504883, "learning_rate": 1.9223220012828737e-06, "loss": 0.8043, "step": 52085 }, { "epoch": 0.6348335831718523, "grad_norm": 1.8789950609207153, "learning_rate": 1.922001282873637e-06, "loss": 0.7801, "step": 52090 }, { "epoch": 0.6348945193960002, "grad_norm": 2.319986343383789, "learning_rate": 1.9216805644644006e-06, "loss": 0.8101, "step": 52095 }, { "epoch": 0.634955455620148, "grad_norm": 2.070739507675171, "learning_rate": 1.921359846055164e-06, "loss": 0.8494, "step": 52100 }, { "epoch": 0.6350163918442958, "grad_norm": 2.483879566192627, "learning_rate": 1.921039127645927e-06, "loss": 0.7915, "step": 52105 }, { "epoch": 0.6350773280684435, "grad_norm": 1.9781842231750488, "learning_rate": 1.9207184092366905e-06, "loss": 0.7586, "step": 52110 }, { "epoch": 0.6351382642925913, "grad_norm": 1.9128165245056152, "learning_rate": 1.9203976908274535e-06, "loss": 0.7982, "step": 52115 }, { "epoch": 0.6351992005167392, "grad_norm": 1.997468113899231, "learning_rate": 1.920076972418217e-06, "loss": 0.8097, "step": 52120 }, { "epoch": 0.635260136740887, "grad_norm": 1.8311493396759033, "learning_rate": 1.9197562540089804e-06, "loss": 0.8808, "step": 52125 }, { "epoch": 0.6353210729650348, "grad_norm": 1.8138303756713867, "learning_rate": 1.9194355355997434e-06, "loss": 0.8212, "step": 52130 }, { "epoch": 0.6353820091891826, "grad_norm": 1.8741525411605835, "learning_rate": 1.919114817190507e-06, "loss": 0.8343, "step": 52135 }, { "epoch": 0.6354429454133305, "grad_norm": 2.2446389198303223, "learning_rate": 1.9187940987812703e-06, "loss": 0.8568, "step": 52140 }, { "epoch": 0.6355038816374782, "grad_norm": 2.1755926609039307, "learning_rate": 1.9184733803720333e-06, "loss": 0.8533, "step": 52145 }, { "epoch": 0.635564817861626, "grad_norm": 2.098994731903076, "learning_rate": 1.9181526619627968e-06, "loss": 0.9578, "step": 52150 }, { "epoch": 0.6356257540857738, "grad_norm": 1.8656266927719116, "learning_rate": 1.91783194355356e-06, "loss": 0.7766, "step": 52155 }, { "epoch": 0.6356866903099216, "grad_norm": 1.8292555809020996, "learning_rate": 1.917511225144323e-06, "loss": 0.7513, "step": 52160 }, { "epoch": 0.6357476265340695, "grad_norm": 1.8199098110198975, "learning_rate": 1.9171905067350867e-06, "loss": 0.8451, "step": 52165 }, { "epoch": 0.6358085627582173, "grad_norm": 2.1201395988464355, "learning_rate": 1.91686978832585e-06, "loss": 0.8423, "step": 52170 }, { "epoch": 0.6358694989823651, "grad_norm": 1.8744988441467285, "learning_rate": 1.9165490699166135e-06, "loss": 0.7954, "step": 52175 }, { "epoch": 0.6359304352065128, "grad_norm": 1.7214792966842651, "learning_rate": 1.916228351507377e-06, "loss": 0.8514, "step": 52180 }, { "epoch": 0.6359913714306606, "grad_norm": 2.160186529159546, "learning_rate": 1.91590763309814e-06, "loss": 0.8368, "step": 52185 }, { "epoch": 0.6360523076548085, "grad_norm": 1.9933964014053345, "learning_rate": 1.9155869146889034e-06, "loss": 0.8195, "step": 52190 }, { "epoch": 0.6361132438789563, "grad_norm": 2.194572925567627, "learning_rate": 1.9152661962796664e-06, "loss": 0.8467, "step": 52195 }, { "epoch": 0.6361741801031041, "grad_norm": 1.8991187810897827, "learning_rate": 1.91494547787043e-06, "loss": 0.8111, "step": 52200 }, { "epoch": 0.6362351163272519, "grad_norm": 1.896706461906433, "learning_rate": 1.9146247594611933e-06, "loss": 0.8038, "step": 52205 }, { "epoch": 0.6362960525513998, "grad_norm": 1.9864914417266846, "learning_rate": 1.9143040410519563e-06, "loss": 0.7503, "step": 52210 }, { "epoch": 0.6363569887755475, "grad_norm": 5.014170169830322, "learning_rate": 1.9139833226427198e-06, "loss": 0.8293, "step": 52215 }, { "epoch": 0.6364179249996953, "grad_norm": 2.2150163650512695, "learning_rate": 1.9136626042334832e-06, "loss": 0.8829, "step": 52220 }, { "epoch": 0.6364788612238431, "grad_norm": 1.7049508094787598, "learning_rate": 1.9133418858242462e-06, "loss": 0.849, "step": 52225 }, { "epoch": 0.6365397974479909, "grad_norm": 1.7831790447235107, "learning_rate": 1.9130211674150097e-06, "loss": 0.8482, "step": 52230 }, { "epoch": 0.6366007336721388, "grad_norm": 1.9360533952713013, "learning_rate": 1.912700449005773e-06, "loss": 0.8241, "step": 52235 }, { "epoch": 0.6366616698962866, "grad_norm": 2.2221877574920654, "learning_rate": 1.9123797305965366e-06, "loss": 0.8087, "step": 52240 }, { "epoch": 0.6367226061204343, "grad_norm": 1.9788451194763184, "learning_rate": 1.9120590121872996e-06, "loss": 0.7756, "step": 52245 }, { "epoch": 0.6367835423445821, "grad_norm": 2.0715384483337402, "learning_rate": 1.911738293778063e-06, "loss": 0.7811, "step": 52250 }, { "epoch": 0.6368444785687299, "grad_norm": 2.1825003623962402, "learning_rate": 1.9114175753688265e-06, "loss": 0.8235, "step": 52255 }, { "epoch": 0.6369054147928778, "grad_norm": 1.6814148426055908, "learning_rate": 1.91109685695959e-06, "loss": 0.7606, "step": 52260 }, { "epoch": 0.6369663510170256, "grad_norm": 1.9756797552108765, "learning_rate": 1.910776138550353e-06, "loss": 0.7905, "step": 52265 }, { "epoch": 0.6370272872411734, "grad_norm": 1.928358554840088, "learning_rate": 1.9104554201411164e-06, "loss": 0.8164, "step": 52270 }, { "epoch": 0.6370882234653212, "grad_norm": 2.0487539768218994, "learning_rate": 1.91013470173188e-06, "loss": 0.8496, "step": 52275 }, { "epoch": 0.637149159689469, "grad_norm": 2.3586552143096924, "learning_rate": 1.909813983322643e-06, "loss": 0.8098, "step": 52280 }, { "epoch": 0.6372100959136168, "grad_norm": 2.033146619796753, "learning_rate": 1.9094932649134063e-06, "loss": 0.8705, "step": 52285 }, { "epoch": 0.6372710321377646, "grad_norm": 1.8126591444015503, "learning_rate": 1.9091725465041693e-06, "loss": 0.8263, "step": 52290 }, { "epoch": 0.6373319683619124, "grad_norm": 1.952782154083252, "learning_rate": 1.9088518280949327e-06, "loss": 0.8338, "step": 52295 }, { "epoch": 0.6373929045860602, "grad_norm": 2.0500991344451904, "learning_rate": 1.908531109685696e-06, "loss": 0.7779, "step": 52300 }, { "epoch": 0.6374538408102081, "grad_norm": 2.1666674613952637, "learning_rate": 1.908210391276459e-06, "loss": 0.8014, "step": 52305 }, { "epoch": 0.6375147770343559, "grad_norm": 1.7090458869934082, "learning_rate": 1.9078896728672226e-06, "loss": 0.8156, "step": 52310 }, { "epoch": 0.6375757132585036, "grad_norm": 1.884925127029419, "learning_rate": 1.907568954457986e-06, "loss": 0.8235, "step": 52315 }, { "epoch": 0.6376366494826514, "grad_norm": 2.1267123222351074, "learning_rate": 1.9072482360487493e-06, "loss": 0.839, "step": 52320 }, { "epoch": 0.6376975857067992, "grad_norm": 2.73878812789917, "learning_rate": 1.9069275176395127e-06, "loss": 0.9052, "step": 52325 }, { "epoch": 0.6377585219309471, "grad_norm": 1.7559876441955566, "learning_rate": 1.906606799230276e-06, "loss": 0.8172, "step": 52330 }, { "epoch": 0.6378194581550949, "grad_norm": 2.2030506134033203, "learning_rate": 1.9062860808210392e-06, "loss": 0.8227, "step": 52335 }, { "epoch": 0.6378803943792427, "grad_norm": 1.7925037145614624, "learning_rate": 1.9059653624118026e-06, "loss": 0.8106, "step": 52340 }, { "epoch": 0.6379413306033905, "grad_norm": 2.0576117038726807, "learning_rate": 1.9056446440025658e-06, "loss": 0.8562, "step": 52345 }, { "epoch": 0.6380022668275382, "grad_norm": 2.720254898071289, "learning_rate": 1.9053239255933293e-06, "loss": 0.8251, "step": 52350 }, { "epoch": 0.6380632030516861, "grad_norm": 2.0049445629119873, "learning_rate": 1.9050032071840927e-06, "loss": 0.8277, "step": 52355 }, { "epoch": 0.6381241392758339, "grad_norm": 2.5376360416412354, "learning_rate": 1.9046824887748557e-06, "loss": 0.8384, "step": 52360 }, { "epoch": 0.6381850754999817, "grad_norm": 1.787879228591919, "learning_rate": 1.9043617703656192e-06, "loss": 0.7807, "step": 52365 }, { "epoch": 0.6382460117241295, "grad_norm": 2.10796856880188, "learning_rate": 1.9040410519563824e-06, "loss": 0.8054, "step": 52370 }, { "epoch": 0.6383069479482774, "grad_norm": 2.260807752609253, "learning_rate": 1.9037203335471458e-06, "loss": 0.8465, "step": 52375 }, { "epoch": 0.6383678841724252, "grad_norm": 2.041943311691284, "learning_rate": 1.903399615137909e-06, "loss": 0.8098, "step": 52380 }, { "epoch": 0.6384288203965729, "grad_norm": 1.7629033327102661, "learning_rate": 1.9030788967286723e-06, "loss": 0.8467, "step": 52385 }, { "epoch": 0.6384897566207207, "grad_norm": 1.9606609344482422, "learning_rate": 1.9027581783194357e-06, "loss": 0.7877, "step": 52390 }, { "epoch": 0.6385506928448685, "grad_norm": 1.503196120262146, "learning_rate": 1.9024374599101992e-06, "loss": 0.8459, "step": 52395 }, { "epoch": 0.6386116290690164, "grad_norm": 1.5867897272109985, "learning_rate": 1.9021167415009622e-06, "loss": 0.8024, "step": 52400 }, { "epoch": 0.6386725652931642, "grad_norm": 2.0918843746185303, "learning_rate": 1.9017960230917256e-06, "loss": 0.8892, "step": 52405 }, { "epoch": 0.638733501517312, "grad_norm": 2.5070652961730957, "learning_rate": 1.9014753046824889e-06, "loss": 0.8227, "step": 52410 }, { "epoch": 0.6387944377414598, "grad_norm": 2.0268895626068115, "learning_rate": 1.9011545862732523e-06, "loss": 0.8279, "step": 52415 }, { "epoch": 0.6388553739656075, "grad_norm": 1.906339406967163, "learning_rate": 1.9008338678640155e-06, "loss": 0.8762, "step": 52420 }, { "epoch": 0.6389163101897554, "grad_norm": 2.120291233062744, "learning_rate": 1.9005131494547788e-06, "loss": 0.8175, "step": 52425 }, { "epoch": 0.6389772464139032, "grad_norm": 2.0699124336242676, "learning_rate": 1.9001924310455422e-06, "loss": 0.8163, "step": 52430 }, { "epoch": 0.639038182638051, "grad_norm": 2.0787177085876465, "learning_rate": 1.8998717126363057e-06, "loss": 0.868, "step": 52435 }, { "epoch": 0.6390991188621988, "grad_norm": 1.7305344343185425, "learning_rate": 1.8995509942270687e-06, "loss": 0.8589, "step": 52440 }, { "epoch": 0.6391600550863467, "grad_norm": 1.9899131059646606, "learning_rate": 1.8992302758178321e-06, "loss": 0.8645, "step": 52445 }, { "epoch": 0.6392209913104945, "grad_norm": 1.8944110870361328, "learning_rate": 1.8989095574085953e-06, "loss": 0.8813, "step": 52450 }, { "epoch": 0.6392819275346422, "grad_norm": 1.9952404499053955, "learning_rate": 1.8985888389993588e-06, "loss": 0.7842, "step": 52455 }, { "epoch": 0.63934286375879, "grad_norm": 2.0400326251983643, "learning_rate": 1.898268120590122e-06, "loss": 0.8123, "step": 52460 }, { "epoch": 0.6394037999829378, "grad_norm": 2.366605520248413, "learning_rate": 1.8979474021808852e-06, "loss": 0.8328, "step": 52465 }, { "epoch": 0.6394647362070857, "grad_norm": 2.2040841579437256, "learning_rate": 1.8976266837716487e-06, "loss": 0.8305, "step": 52470 }, { "epoch": 0.6395256724312335, "grad_norm": 2.069554328918457, "learning_rate": 1.8973059653624121e-06, "loss": 0.8903, "step": 52475 }, { "epoch": 0.6395866086553813, "grad_norm": 1.6789391040802002, "learning_rate": 1.8969852469531751e-06, "loss": 0.7596, "step": 52480 }, { "epoch": 0.6396475448795291, "grad_norm": 1.841315746307373, "learning_rate": 1.8966645285439386e-06, "loss": 0.8502, "step": 52485 }, { "epoch": 0.6397084811036768, "grad_norm": 1.8943641185760498, "learning_rate": 1.8963438101347018e-06, "loss": 0.8624, "step": 52490 }, { "epoch": 0.6397694173278247, "grad_norm": 1.7834010124206543, "learning_rate": 1.8960230917254652e-06, "loss": 0.8032, "step": 52495 }, { "epoch": 0.6398303535519725, "grad_norm": 2.517906665802002, "learning_rate": 1.8957023733162287e-06, "loss": 0.7607, "step": 52500 }, { "epoch": 0.6398912897761203, "grad_norm": 2.2453935146331787, "learning_rate": 1.8953816549069917e-06, "loss": 0.8857, "step": 52505 }, { "epoch": 0.6399522260002681, "grad_norm": 1.7940969467163086, "learning_rate": 1.8950609364977551e-06, "loss": 0.8819, "step": 52510 }, { "epoch": 0.640013162224416, "grad_norm": 2.362783670425415, "learning_rate": 1.8947402180885186e-06, "loss": 0.8194, "step": 52515 }, { "epoch": 0.6400740984485638, "grad_norm": 1.6000308990478516, "learning_rate": 1.8944194996792816e-06, "loss": 0.8743, "step": 52520 }, { "epoch": 0.6401350346727115, "grad_norm": 2.524074077606201, "learning_rate": 1.894098781270045e-06, "loss": 0.878, "step": 52525 }, { "epoch": 0.6401959708968593, "grad_norm": 2.235865592956543, "learning_rate": 1.8937780628608083e-06, "loss": 0.8864, "step": 52530 }, { "epoch": 0.6402569071210071, "grad_norm": 1.870793342590332, "learning_rate": 1.8934573444515717e-06, "loss": 0.8555, "step": 52535 }, { "epoch": 0.640317843345155, "grad_norm": 1.992011308670044, "learning_rate": 1.8931366260423351e-06, "loss": 0.7766, "step": 52540 }, { "epoch": 0.6403787795693028, "grad_norm": 2.136262893676758, "learning_rate": 1.8928159076330982e-06, "loss": 0.8341, "step": 52545 }, { "epoch": 0.6404397157934506, "grad_norm": 2.04502272605896, "learning_rate": 1.8924951892238616e-06, "loss": 0.8251, "step": 52550 }, { "epoch": 0.6405006520175984, "grad_norm": 2.156486988067627, "learning_rate": 1.892174470814625e-06, "loss": 0.846, "step": 52555 }, { "epoch": 0.6405615882417461, "grad_norm": 2.0093584060668945, "learning_rate": 1.891853752405388e-06, "loss": 0.8283, "step": 52560 }, { "epoch": 0.640622524465894, "grad_norm": 2.4040791988372803, "learning_rate": 1.8915330339961515e-06, "loss": 0.8545, "step": 52565 }, { "epoch": 0.6406834606900418, "grad_norm": 1.916054606437683, "learning_rate": 1.891212315586915e-06, "loss": 0.7935, "step": 52570 }, { "epoch": 0.6407443969141896, "grad_norm": 2.0062923431396484, "learning_rate": 1.8908915971776782e-06, "loss": 0.8242, "step": 52575 }, { "epoch": 0.6408053331383374, "grad_norm": 1.8208309412002563, "learning_rate": 1.8905708787684416e-06, "loss": 0.779, "step": 52580 }, { "epoch": 0.6408662693624853, "grad_norm": 2.5171937942504883, "learning_rate": 1.8902501603592046e-06, "loss": 0.7273, "step": 52585 }, { "epoch": 0.6409272055866331, "grad_norm": 2.1614410877227783, "learning_rate": 1.889929441949968e-06, "loss": 0.8201, "step": 52590 }, { "epoch": 0.6409881418107808, "grad_norm": 2.1731934547424316, "learning_rate": 1.8896087235407315e-06, "loss": 0.837, "step": 52595 }, { "epoch": 0.6410490780349286, "grad_norm": 2.011488199234009, "learning_rate": 1.8892880051314947e-06, "loss": 0.872, "step": 52600 }, { "epoch": 0.6411100142590764, "grad_norm": 1.969690203666687, "learning_rate": 1.888967286722258e-06, "loss": 0.859, "step": 52605 }, { "epoch": 0.6411709504832243, "grad_norm": 1.9544806480407715, "learning_rate": 1.8886465683130214e-06, "loss": 0.8047, "step": 52610 }, { "epoch": 0.6412318867073721, "grad_norm": 2.0022034645080566, "learning_rate": 1.8883258499037846e-06, "loss": 0.7756, "step": 52615 }, { "epoch": 0.6412928229315199, "grad_norm": 2.141885757446289, "learning_rate": 1.888005131494548e-06, "loss": 0.9315, "step": 52620 }, { "epoch": 0.6413537591556677, "grad_norm": 1.8908932209014893, "learning_rate": 1.887684413085311e-06, "loss": 0.8659, "step": 52625 }, { "epoch": 0.6414146953798154, "grad_norm": 1.8514400720596313, "learning_rate": 1.8873636946760745e-06, "loss": 0.8809, "step": 52630 }, { "epoch": 0.6414756316039633, "grad_norm": 2.004460096359253, "learning_rate": 1.887042976266838e-06, "loss": 0.8574, "step": 52635 }, { "epoch": 0.6415365678281111, "grad_norm": 1.8426589965820312, "learning_rate": 1.8867222578576012e-06, "loss": 0.8014, "step": 52640 }, { "epoch": 0.6415975040522589, "grad_norm": 1.893288493156433, "learning_rate": 1.8864015394483644e-06, "loss": 0.853, "step": 52645 }, { "epoch": 0.6416584402764067, "grad_norm": 1.85979425907135, "learning_rate": 1.8860808210391279e-06, "loss": 0.8452, "step": 52650 }, { "epoch": 0.6417193765005546, "grad_norm": 1.9909018278121948, "learning_rate": 1.885760102629891e-06, "loss": 0.8483, "step": 52655 }, { "epoch": 0.6417803127247024, "grad_norm": 1.9389146566390991, "learning_rate": 1.8854393842206545e-06, "loss": 0.8304, "step": 52660 }, { "epoch": 0.6418412489488501, "grad_norm": 1.8918418884277344, "learning_rate": 1.8851186658114176e-06, "loss": 0.802, "step": 52665 }, { "epoch": 0.6419021851729979, "grad_norm": 2.20857310295105, "learning_rate": 1.884797947402181e-06, "loss": 0.8358, "step": 52670 }, { "epoch": 0.6419631213971457, "grad_norm": 2.815467357635498, "learning_rate": 1.8844772289929444e-06, "loss": 0.8687, "step": 52675 }, { "epoch": 0.6420240576212936, "grad_norm": 1.804317831993103, "learning_rate": 1.8841565105837077e-06, "loss": 0.8494, "step": 52680 }, { "epoch": 0.6420849938454414, "grad_norm": 2.327798366546631, "learning_rate": 1.8838357921744709e-06, "loss": 0.7931, "step": 52685 }, { "epoch": 0.6421459300695892, "grad_norm": 2.156684637069702, "learning_rate": 1.8835150737652343e-06, "loss": 0.8019, "step": 52690 }, { "epoch": 0.642206866293737, "grad_norm": 2.084996223449707, "learning_rate": 1.8831943553559976e-06, "loss": 0.8403, "step": 52695 }, { "epoch": 0.6422678025178847, "grad_norm": 1.7172536849975586, "learning_rate": 1.882873636946761e-06, "loss": 0.7381, "step": 52700 }, { "epoch": 0.6423287387420326, "grad_norm": 1.9682527780532837, "learning_rate": 1.882552918537524e-06, "loss": 0.8231, "step": 52705 }, { "epoch": 0.6423896749661804, "grad_norm": 1.6292673349380493, "learning_rate": 1.8822322001282875e-06, "loss": 0.8171, "step": 52710 }, { "epoch": 0.6424506111903282, "grad_norm": 2.1473841667175293, "learning_rate": 1.881911481719051e-06, "loss": 0.8238, "step": 52715 }, { "epoch": 0.642511547414476, "grad_norm": 2.460143566131592, "learning_rate": 1.8815907633098141e-06, "loss": 0.8362, "step": 52720 }, { "epoch": 0.6425724836386238, "grad_norm": 1.9875808954238892, "learning_rate": 1.8812700449005776e-06, "loss": 0.7852, "step": 52725 }, { "epoch": 0.6426334198627717, "grad_norm": 2.113126039505005, "learning_rate": 1.8809493264913408e-06, "loss": 0.812, "step": 52730 }, { "epoch": 0.6426943560869194, "grad_norm": 1.9041247367858887, "learning_rate": 1.880628608082104e-06, "loss": 0.7949, "step": 52735 }, { "epoch": 0.6427552923110672, "grad_norm": 1.5752403736114502, "learning_rate": 1.8803078896728675e-06, "loss": 0.8051, "step": 52740 }, { "epoch": 0.642816228535215, "grad_norm": 2.202648878097534, "learning_rate": 1.8799871712636305e-06, "loss": 0.8045, "step": 52745 }, { "epoch": 0.6428771647593629, "grad_norm": 2.1433138847351074, "learning_rate": 1.879666452854394e-06, "loss": 0.8801, "step": 52750 }, { "epoch": 0.6429381009835107, "grad_norm": 2.501039981842041, "learning_rate": 1.8793457344451574e-06, "loss": 0.8248, "step": 52755 }, { "epoch": 0.6429990372076585, "grad_norm": 1.7252888679504395, "learning_rate": 1.8790250160359206e-06, "loss": 0.8353, "step": 52760 }, { "epoch": 0.6430599734318063, "grad_norm": 1.8981982469558716, "learning_rate": 1.878704297626684e-06, "loss": 0.8016, "step": 52765 }, { "epoch": 0.643120909655954, "grad_norm": 2.100191116333008, "learning_rate": 1.8783835792174473e-06, "loss": 0.9245, "step": 52770 }, { "epoch": 0.6431818458801019, "grad_norm": 2.0069422721862793, "learning_rate": 1.8780628608082105e-06, "loss": 0.8391, "step": 52775 }, { "epoch": 0.6432427821042497, "grad_norm": 1.7312625646591187, "learning_rate": 1.877742142398974e-06, "loss": 0.8245, "step": 52780 }, { "epoch": 0.6433037183283975, "grad_norm": 2.0147080421447754, "learning_rate": 1.877421423989737e-06, "loss": 0.8275, "step": 52785 }, { "epoch": 0.6433646545525453, "grad_norm": 2.043269395828247, "learning_rate": 1.8771007055805004e-06, "loss": 0.8298, "step": 52790 }, { "epoch": 0.6434255907766931, "grad_norm": 2.0600619316101074, "learning_rate": 1.8767799871712638e-06, "loss": 0.7819, "step": 52795 }, { "epoch": 0.643486527000841, "grad_norm": 2.1636345386505127, "learning_rate": 1.876459268762027e-06, "loss": 0.8166, "step": 52800 }, { "epoch": 0.6435474632249887, "grad_norm": 1.865221619606018, "learning_rate": 1.8761385503527905e-06, "loss": 0.8109, "step": 52805 }, { "epoch": 0.6436083994491365, "grad_norm": 1.7713290452957153, "learning_rate": 1.8758178319435537e-06, "loss": 0.8917, "step": 52810 }, { "epoch": 0.6436693356732843, "grad_norm": 1.859731912612915, "learning_rate": 1.875497113534317e-06, "loss": 0.8282, "step": 52815 }, { "epoch": 0.6437302718974321, "grad_norm": 2.074420213699341, "learning_rate": 1.8751763951250804e-06, "loss": 0.8025, "step": 52820 }, { "epoch": 0.64379120812158, "grad_norm": 2.1347789764404297, "learning_rate": 1.8748556767158434e-06, "loss": 0.8703, "step": 52825 }, { "epoch": 0.6438521443457278, "grad_norm": 1.9059958457946777, "learning_rate": 1.8745349583066068e-06, "loss": 0.8587, "step": 52830 }, { "epoch": 0.6439130805698756, "grad_norm": 1.9575560092926025, "learning_rate": 1.8742142398973703e-06, "loss": 0.747, "step": 52835 }, { "epoch": 0.6439740167940233, "grad_norm": 2.008040189743042, "learning_rate": 1.8738935214881335e-06, "loss": 0.8239, "step": 52840 }, { "epoch": 0.6440349530181712, "grad_norm": 2.5779123306274414, "learning_rate": 1.873572803078897e-06, "loss": 0.7887, "step": 52845 }, { "epoch": 0.644095889242319, "grad_norm": 1.795041561126709, "learning_rate": 1.8732520846696604e-06, "loss": 0.8849, "step": 52850 }, { "epoch": 0.6441568254664668, "grad_norm": 2.696791410446167, "learning_rate": 1.8729313662604234e-06, "loss": 0.8146, "step": 52855 }, { "epoch": 0.6442177616906146, "grad_norm": 1.9276891946792603, "learning_rate": 1.8726106478511869e-06, "loss": 0.7478, "step": 52860 }, { "epoch": 0.6442786979147624, "grad_norm": 2.336289644241333, "learning_rate": 1.87228992944195e-06, "loss": 0.8277, "step": 52865 }, { "epoch": 0.6443396341389103, "grad_norm": 1.9166233539581299, "learning_rate": 1.8719692110327133e-06, "loss": 0.8508, "step": 52870 }, { "epoch": 0.644400570363058, "grad_norm": 2.0030717849731445, "learning_rate": 1.8716484926234767e-06, "loss": 0.8333, "step": 52875 }, { "epoch": 0.6444615065872058, "grad_norm": 1.8989136219024658, "learning_rate": 1.87132777421424e-06, "loss": 0.7626, "step": 52880 }, { "epoch": 0.6445224428113536, "grad_norm": 2.225910186767578, "learning_rate": 1.8710070558050034e-06, "loss": 0.9079, "step": 52885 }, { "epoch": 0.6445833790355014, "grad_norm": 2.0254812240600586, "learning_rate": 1.8706863373957669e-06, "loss": 0.7983, "step": 52890 }, { "epoch": 0.6446443152596493, "grad_norm": 2.0273263454437256, "learning_rate": 1.8703656189865299e-06, "loss": 0.9112, "step": 52895 }, { "epoch": 0.6447052514837971, "grad_norm": 1.8915126323699951, "learning_rate": 1.8700449005772933e-06, "loss": 0.8298, "step": 52900 }, { "epoch": 0.6447661877079449, "grad_norm": 2.019473075866699, "learning_rate": 1.8697241821680568e-06, "loss": 0.7481, "step": 52905 }, { "epoch": 0.6448271239320926, "grad_norm": 2.129176378250122, "learning_rate": 1.8694034637588198e-06, "loss": 0.861, "step": 52910 }, { "epoch": 0.6448880601562405, "grad_norm": 2.1850578784942627, "learning_rate": 1.8690827453495832e-06, "loss": 0.7971, "step": 52915 }, { "epoch": 0.6449489963803883, "grad_norm": 1.7557629346847534, "learning_rate": 1.8687620269403464e-06, "loss": 0.7744, "step": 52920 }, { "epoch": 0.6450099326045361, "grad_norm": 2.094698429107666, "learning_rate": 1.8684413085311099e-06, "loss": 0.8523, "step": 52925 }, { "epoch": 0.6450708688286839, "grad_norm": 2.1388988494873047, "learning_rate": 1.8681205901218733e-06, "loss": 0.8735, "step": 52930 }, { "epoch": 0.6451318050528317, "grad_norm": 2.0649406909942627, "learning_rate": 1.8677998717126363e-06, "loss": 0.8979, "step": 52935 }, { "epoch": 0.6451927412769796, "grad_norm": 2.1802732944488525, "learning_rate": 1.8674791533033998e-06, "loss": 0.8419, "step": 52940 }, { "epoch": 0.6452536775011273, "grad_norm": 1.749326467514038, "learning_rate": 1.8671584348941632e-06, "loss": 0.821, "step": 52945 }, { "epoch": 0.6453146137252751, "grad_norm": 2.4258744716644287, "learning_rate": 1.8668377164849264e-06, "loss": 0.8015, "step": 52950 }, { "epoch": 0.6453755499494229, "grad_norm": 3.5632472038269043, "learning_rate": 1.8665169980756897e-06, "loss": 0.7841, "step": 52955 }, { "epoch": 0.6454364861735707, "grad_norm": 2.1643002033233643, "learning_rate": 1.866196279666453e-06, "loss": 0.7952, "step": 52960 }, { "epoch": 0.6454974223977186, "grad_norm": 1.942314624786377, "learning_rate": 1.8658755612572163e-06, "loss": 0.8453, "step": 52965 }, { "epoch": 0.6455583586218664, "grad_norm": 2.186384439468384, "learning_rate": 1.8655548428479798e-06, "loss": 0.8271, "step": 52970 }, { "epoch": 0.6456192948460142, "grad_norm": 1.7644836902618408, "learning_rate": 1.8652341244387428e-06, "loss": 0.7624, "step": 52975 }, { "epoch": 0.6456802310701619, "grad_norm": 1.828583002090454, "learning_rate": 1.8649134060295062e-06, "loss": 0.8104, "step": 52980 }, { "epoch": 0.6457411672943097, "grad_norm": 1.5983855724334717, "learning_rate": 1.8645926876202697e-06, "loss": 0.8267, "step": 52985 }, { "epoch": 0.6458021035184576, "grad_norm": 1.805112361907959, "learning_rate": 1.864271969211033e-06, "loss": 0.794, "step": 52990 }, { "epoch": 0.6458630397426054, "grad_norm": 2.1577377319335938, "learning_rate": 1.8639512508017961e-06, "loss": 0.8129, "step": 52995 }, { "epoch": 0.6459239759667532, "grad_norm": 2.0569708347320557, "learning_rate": 1.8636305323925594e-06, "loss": 0.8415, "step": 53000 }, { "epoch": 0.645984912190901, "grad_norm": 2.076307535171509, "learning_rate": 1.8633098139833228e-06, "loss": 0.8261, "step": 53005 }, { "epoch": 0.6460458484150489, "grad_norm": 1.8296656608581543, "learning_rate": 1.8629890955740862e-06, "loss": 0.7842, "step": 53010 }, { "epoch": 0.6461067846391966, "grad_norm": 4.846462726593018, "learning_rate": 1.8626683771648493e-06, "loss": 0.8637, "step": 53015 }, { "epoch": 0.6461677208633444, "grad_norm": 2.294811487197876, "learning_rate": 1.8623476587556127e-06, "loss": 0.8859, "step": 53020 }, { "epoch": 0.6462286570874922, "grad_norm": 2.0190248489379883, "learning_rate": 1.8620269403463761e-06, "loss": 0.7466, "step": 53025 }, { "epoch": 0.64628959331164, "grad_norm": 1.8534202575683594, "learning_rate": 1.8617062219371394e-06, "loss": 0.8272, "step": 53030 }, { "epoch": 0.6463505295357879, "grad_norm": 2.1504204273223877, "learning_rate": 1.8613855035279026e-06, "loss": 0.8166, "step": 53035 }, { "epoch": 0.6464114657599357, "grad_norm": 1.7049078941345215, "learning_rate": 1.8610647851186658e-06, "loss": 0.8862, "step": 53040 }, { "epoch": 0.6464724019840835, "grad_norm": 1.8069276809692383, "learning_rate": 1.8607440667094293e-06, "loss": 0.8335, "step": 53045 }, { "epoch": 0.6465333382082312, "grad_norm": 3.0239529609680176, "learning_rate": 1.8604233483001927e-06, "loss": 0.7949, "step": 53050 }, { "epoch": 0.646594274432379, "grad_norm": 2.133178472518921, "learning_rate": 1.8601026298909557e-06, "loss": 0.8287, "step": 53055 }, { "epoch": 0.6466552106565269, "grad_norm": 1.74905526638031, "learning_rate": 1.8597819114817192e-06, "loss": 0.8423, "step": 53060 }, { "epoch": 0.6467161468806747, "grad_norm": 1.9213252067565918, "learning_rate": 1.8594611930724826e-06, "loss": 0.8891, "step": 53065 }, { "epoch": 0.6467770831048225, "grad_norm": 2.141662359237671, "learning_rate": 1.8591404746632458e-06, "loss": 0.8959, "step": 53070 }, { "epoch": 0.6468380193289703, "grad_norm": 2.015460729598999, "learning_rate": 1.8588197562540093e-06, "loss": 0.8282, "step": 53075 }, { "epoch": 0.6468989555531182, "grad_norm": 2.049126625061035, "learning_rate": 1.8584990378447723e-06, "loss": 0.8063, "step": 53080 }, { "epoch": 0.6469598917772659, "grad_norm": 1.8711448907852173, "learning_rate": 1.8581783194355357e-06, "loss": 0.8714, "step": 53085 }, { "epoch": 0.6470208280014137, "grad_norm": 1.9526009559631348, "learning_rate": 1.8578576010262992e-06, "loss": 0.8578, "step": 53090 }, { "epoch": 0.6470817642255615, "grad_norm": 2.1049840450286865, "learning_rate": 1.8575368826170622e-06, "loss": 0.8064, "step": 53095 }, { "epoch": 0.6471427004497093, "grad_norm": 2.088850736618042, "learning_rate": 1.8572161642078256e-06, "loss": 0.8562, "step": 53100 }, { "epoch": 0.6472036366738572, "grad_norm": 2.228307008743286, "learning_rate": 1.856895445798589e-06, "loss": 0.7833, "step": 53105 }, { "epoch": 0.647264572898005, "grad_norm": 2.690054178237915, "learning_rate": 1.8565747273893523e-06, "loss": 0.8669, "step": 53110 }, { "epoch": 0.6473255091221528, "grad_norm": 2.1489248275756836, "learning_rate": 1.8562540089801157e-06, "loss": 0.8548, "step": 53115 }, { "epoch": 0.6473864453463005, "grad_norm": 2.428541660308838, "learning_rate": 1.8559332905708788e-06, "loss": 0.8358, "step": 53120 }, { "epoch": 0.6474473815704483, "grad_norm": 1.94120454788208, "learning_rate": 1.8556125721616422e-06, "loss": 0.7997, "step": 53125 }, { "epoch": 0.6475083177945962, "grad_norm": 1.7949597835540771, "learning_rate": 1.8552918537524056e-06, "loss": 0.7711, "step": 53130 }, { "epoch": 0.647569254018744, "grad_norm": 1.9073185920715332, "learning_rate": 1.8549711353431687e-06, "loss": 0.7947, "step": 53135 }, { "epoch": 0.6476301902428918, "grad_norm": 1.8377265930175781, "learning_rate": 1.854650416933932e-06, "loss": 0.8699, "step": 53140 }, { "epoch": 0.6476911264670396, "grad_norm": 1.9262689352035522, "learning_rate": 1.8543296985246955e-06, "loss": 0.8068, "step": 53145 }, { "epoch": 0.6477520626911875, "grad_norm": 1.8053553104400635, "learning_rate": 1.8540089801154588e-06, "loss": 0.8222, "step": 53150 }, { "epoch": 0.6478129989153352, "grad_norm": 1.7812713384628296, "learning_rate": 1.8536882617062222e-06, "loss": 0.733, "step": 53155 }, { "epoch": 0.647873935139483, "grad_norm": 1.8943839073181152, "learning_rate": 1.8533675432969852e-06, "loss": 0.8439, "step": 53160 }, { "epoch": 0.6479348713636308, "grad_norm": 2.622847080230713, "learning_rate": 1.8530468248877487e-06, "loss": 0.8195, "step": 53165 }, { "epoch": 0.6479958075877786, "grad_norm": 2.088651657104492, "learning_rate": 1.852726106478512e-06, "loss": 0.7716, "step": 53170 }, { "epoch": 0.6480567438119265, "grad_norm": 2.015979528427124, "learning_rate": 1.8524053880692751e-06, "loss": 0.8234, "step": 53175 }, { "epoch": 0.6481176800360743, "grad_norm": 1.8433140516281128, "learning_rate": 1.8520846696600386e-06, "loss": 0.8418, "step": 53180 }, { "epoch": 0.6481786162602221, "grad_norm": 1.9390645027160645, "learning_rate": 1.851763951250802e-06, "loss": 0.8447, "step": 53185 }, { "epoch": 0.6482395524843698, "grad_norm": 1.8044958114624023, "learning_rate": 1.8514432328415652e-06, "loss": 0.8326, "step": 53190 }, { "epoch": 0.6483004887085176, "grad_norm": 2.2560930252075195, "learning_rate": 1.8511225144323287e-06, "loss": 0.7777, "step": 53195 }, { "epoch": 0.6483614249326655, "grad_norm": 2.729024887084961, "learning_rate": 1.8508017960230921e-06, "loss": 0.8121, "step": 53200 }, { "epoch": 0.6484223611568133, "grad_norm": 1.9335432052612305, "learning_rate": 1.8504810776138551e-06, "loss": 0.837, "step": 53205 }, { "epoch": 0.6484832973809611, "grad_norm": 1.9714008569717407, "learning_rate": 1.8501603592046186e-06, "loss": 0.8234, "step": 53210 }, { "epoch": 0.6485442336051089, "grad_norm": 1.9009675979614258, "learning_rate": 1.8498396407953818e-06, "loss": 0.8448, "step": 53215 }, { "epoch": 0.6486051698292566, "grad_norm": 1.6862261295318604, "learning_rate": 1.849518922386145e-06, "loss": 0.8106, "step": 53220 }, { "epoch": 0.6486661060534045, "grad_norm": 1.5977877378463745, "learning_rate": 1.8491982039769085e-06, "loss": 0.8673, "step": 53225 }, { "epoch": 0.6487270422775523, "grad_norm": 1.7909168004989624, "learning_rate": 1.8488774855676717e-06, "loss": 0.7989, "step": 53230 }, { "epoch": 0.6487879785017001, "grad_norm": 1.853385090827942, "learning_rate": 1.8485567671584351e-06, "loss": 0.7761, "step": 53235 }, { "epoch": 0.6488489147258479, "grad_norm": 1.940476655960083, "learning_rate": 1.8482360487491986e-06, "loss": 0.8053, "step": 53240 }, { "epoch": 0.6489098509499958, "grad_norm": 1.7398779392242432, "learning_rate": 1.8479153303399616e-06, "loss": 0.8129, "step": 53245 }, { "epoch": 0.6489707871741436, "grad_norm": 1.7026656866073608, "learning_rate": 1.847594611930725e-06, "loss": 0.8143, "step": 53250 }, { "epoch": 0.6490317233982913, "grad_norm": 1.77427077293396, "learning_rate": 1.8472738935214883e-06, "loss": 0.8542, "step": 53255 }, { "epoch": 0.6490926596224391, "grad_norm": 1.913757085800171, "learning_rate": 1.8469531751122515e-06, "loss": 0.7976, "step": 53260 }, { "epoch": 0.6491535958465869, "grad_norm": 2.0421910285949707, "learning_rate": 1.846632456703015e-06, "loss": 0.7848, "step": 53265 }, { "epoch": 0.6492145320707348, "grad_norm": 1.8713959455490112, "learning_rate": 1.8463117382937782e-06, "loss": 0.8483, "step": 53270 }, { "epoch": 0.6492754682948826, "grad_norm": 1.8229094743728638, "learning_rate": 1.8459910198845416e-06, "loss": 0.8907, "step": 53275 }, { "epoch": 0.6493364045190304, "grad_norm": 2.279050588607788, "learning_rate": 1.845670301475305e-06, "loss": 0.831, "step": 53280 }, { "epoch": 0.6493973407431782, "grad_norm": 1.6140834093093872, "learning_rate": 1.845349583066068e-06, "loss": 0.8167, "step": 53285 }, { "epoch": 0.6494582769673259, "grad_norm": 1.9149774312973022, "learning_rate": 1.8450288646568315e-06, "loss": 0.8166, "step": 53290 }, { "epoch": 0.6495192131914738, "grad_norm": 2.3021485805511475, "learning_rate": 1.8447081462475947e-06, "loss": 0.8347, "step": 53295 }, { "epoch": 0.6495801494156216, "grad_norm": 1.820689082145691, "learning_rate": 1.8443874278383582e-06, "loss": 0.883, "step": 53300 }, { "epoch": 0.6496410856397694, "grad_norm": 2.0893094539642334, "learning_rate": 1.8440667094291214e-06, "loss": 0.8193, "step": 53305 }, { "epoch": 0.6497020218639172, "grad_norm": 2.0388944149017334, "learning_rate": 1.8437459910198846e-06, "loss": 0.7863, "step": 53310 }, { "epoch": 0.6497629580880651, "grad_norm": 1.4830001592636108, "learning_rate": 1.843425272610648e-06, "loss": 0.8372, "step": 53315 }, { "epoch": 0.6498238943122129, "grad_norm": 1.7537858486175537, "learning_rate": 1.8431045542014115e-06, "loss": 0.8151, "step": 53320 }, { "epoch": 0.6498848305363606, "grad_norm": 1.8286681175231934, "learning_rate": 1.8427838357921745e-06, "loss": 0.7885, "step": 53325 }, { "epoch": 0.6499457667605084, "grad_norm": 2.2254862785339355, "learning_rate": 1.842463117382938e-06, "loss": 0.8752, "step": 53330 }, { "epoch": 0.6500067029846562, "grad_norm": 1.7209222316741943, "learning_rate": 1.8421423989737012e-06, "loss": 0.7876, "step": 53335 }, { "epoch": 0.6500676392088041, "grad_norm": 1.7344146966934204, "learning_rate": 1.8418216805644646e-06, "loss": 0.8684, "step": 53340 }, { "epoch": 0.6501285754329519, "grad_norm": 1.868672251701355, "learning_rate": 1.8415009621552279e-06, "loss": 0.8384, "step": 53345 }, { "epoch": 0.6501895116570997, "grad_norm": 1.971711277961731, "learning_rate": 1.841180243745991e-06, "loss": 0.8597, "step": 53350 }, { "epoch": 0.6502504478812475, "grad_norm": 2.737701416015625, "learning_rate": 1.8408595253367545e-06, "loss": 0.8779, "step": 53355 }, { "epoch": 0.6503113841053952, "grad_norm": 2.254196882247925, "learning_rate": 1.840538806927518e-06, "loss": 0.7998, "step": 53360 }, { "epoch": 0.6503723203295431, "grad_norm": 2.0914459228515625, "learning_rate": 1.840218088518281e-06, "loss": 0.7992, "step": 53365 }, { "epoch": 0.6504332565536909, "grad_norm": 2.1005401611328125, "learning_rate": 1.8398973701090444e-06, "loss": 0.8399, "step": 53370 }, { "epoch": 0.6504941927778387, "grad_norm": 1.8132033348083496, "learning_rate": 1.8395766516998076e-06, "loss": 0.8352, "step": 53375 }, { "epoch": 0.6505551290019865, "grad_norm": 1.9519013166427612, "learning_rate": 1.839255933290571e-06, "loss": 0.8585, "step": 53380 }, { "epoch": 0.6506160652261344, "grad_norm": 1.9189201593399048, "learning_rate": 1.8389352148813343e-06, "loss": 0.7734, "step": 53385 }, { "epoch": 0.6506770014502822, "grad_norm": 2.5356929302215576, "learning_rate": 1.8386144964720975e-06, "loss": 0.7793, "step": 53390 }, { "epoch": 0.6507379376744299, "grad_norm": 1.8319982290267944, "learning_rate": 1.838293778062861e-06, "loss": 0.8018, "step": 53395 }, { "epoch": 0.6507988738985777, "grad_norm": 1.9458982944488525, "learning_rate": 1.8379730596536244e-06, "loss": 0.7384, "step": 53400 }, { "epoch": 0.6508598101227255, "grad_norm": 1.8987154960632324, "learning_rate": 1.8376523412443874e-06, "loss": 0.7953, "step": 53405 }, { "epoch": 0.6509207463468734, "grad_norm": 1.7164362668991089, "learning_rate": 1.8373316228351509e-06, "loss": 0.8269, "step": 53410 }, { "epoch": 0.6509816825710212, "grad_norm": 2.2261946201324463, "learning_rate": 1.8370109044259141e-06, "loss": 0.8464, "step": 53415 }, { "epoch": 0.651042618795169, "grad_norm": 1.9435659646987915, "learning_rate": 1.8366901860166776e-06, "loss": 0.7877, "step": 53420 }, { "epoch": 0.6511035550193168, "grad_norm": 2.1176364421844482, "learning_rate": 1.836369467607441e-06, "loss": 0.953, "step": 53425 }, { "epoch": 0.6511644912434645, "grad_norm": 2.059798240661621, "learning_rate": 1.836048749198204e-06, "loss": 0.8195, "step": 53430 }, { "epoch": 0.6512254274676124, "grad_norm": 1.9449396133422852, "learning_rate": 1.8357280307889675e-06, "loss": 0.8473, "step": 53435 }, { "epoch": 0.6512863636917602, "grad_norm": 1.8014416694641113, "learning_rate": 1.8354073123797309e-06, "loss": 0.8302, "step": 53440 }, { "epoch": 0.651347299915908, "grad_norm": 1.9695998430252075, "learning_rate": 1.835086593970494e-06, "loss": 0.8475, "step": 53445 }, { "epoch": 0.6514082361400558, "grad_norm": 2.3023505210876465, "learning_rate": 1.8347658755612573e-06, "loss": 0.8783, "step": 53450 }, { "epoch": 0.6514691723642037, "grad_norm": 2.0764150619506836, "learning_rate": 1.8344451571520206e-06, "loss": 0.814, "step": 53455 }, { "epoch": 0.6515301085883515, "grad_norm": 1.584414005279541, "learning_rate": 1.834124438742784e-06, "loss": 0.7723, "step": 53460 }, { "epoch": 0.6515910448124992, "grad_norm": 1.9629490375518799, "learning_rate": 1.8338037203335475e-06, "loss": 0.7898, "step": 53465 }, { "epoch": 0.651651981036647, "grad_norm": 1.8422373533248901, "learning_rate": 1.8334830019243105e-06, "loss": 0.7921, "step": 53470 }, { "epoch": 0.6517129172607948, "grad_norm": 1.8667486906051636, "learning_rate": 1.833162283515074e-06, "loss": 0.8063, "step": 53475 }, { "epoch": 0.6517738534849427, "grad_norm": 3.247920274734497, "learning_rate": 1.8328415651058374e-06, "loss": 0.8227, "step": 53480 }, { "epoch": 0.6518347897090905, "grad_norm": 1.8737417459487915, "learning_rate": 1.8325208466966004e-06, "loss": 0.8592, "step": 53485 }, { "epoch": 0.6518957259332383, "grad_norm": 1.9709532260894775, "learning_rate": 1.8322001282873638e-06, "loss": 0.8521, "step": 53490 }, { "epoch": 0.6519566621573861, "grad_norm": 2.5513675212860107, "learning_rate": 1.8318794098781273e-06, "loss": 0.883, "step": 53495 }, { "epoch": 0.6520175983815338, "grad_norm": 1.7341585159301758, "learning_rate": 1.8315586914688905e-06, "loss": 0.7824, "step": 53500 }, { "epoch": 0.6520785346056817, "grad_norm": 2.126310110092163, "learning_rate": 1.831237973059654e-06, "loss": 0.8626, "step": 53505 }, { "epoch": 0.6521394708298295, "grad_norm": 2.0260605812072754, "learning_rate": 1.830917254650417e-06, "loss": 0.7893, "step": 53510 }, { "epoch": 0.6522004070539773, "grad_norm": 1.8493678569793701, "learning_rate": 1.8305965362411804e-06, "loss": 0.7985, "step": 53515 }, { "epoch": 0.6522613432781251, "grad_norm": 2.0547471046447754, "learning_rate": 1.8302758178319438e-06, "loss": 0.8947, "step": 53520 }, { "epoch": 0.652322279502273, "grad_norm": 2.01682448387146, "learning_rate": 1.8299550994227068e-06, "loss": 0.7722, "step": 53525 }, { "epoch": 0.6523832157264208, "grad_norm": 2.1504368782043457, "learning_rate": 1.8296343810134703e-06, "loss": 0.8683, "step": 53530 }, { "epoch": 0.6524441519505685, "grad_norm": 1.9592655897140503, "learning_rate": 1.8293136626042337e-06, "loss": 0.8458, "step": 53535 }, { "epoch": 0.6525050881747163, "grad_norm": 1.895216941833496, "learning_rate": 1.828992944194997e-06, "loss": 0.8422, "step": 53540 }, { "epoch": 0.6525660243988641, "grad_norm": 1.8192386627197266, "learning_rate": 1.8286722257857604e-06, "loss": 0.8246, "step": 53545 }, { "epoch": 0.652626960623012, "grad_norm": 1.9655832052230835, "learning_rate": 1.8283515073765234e-06, "loss": 0.7835, "step": 53550 }, { "epoch": 0.6526878968471598, "grad_norm": 1.481095552444458, "learning_rate": 1.8280307889672868e-06, "loss": 0.7943, "step": 53555 }, { "epoch": 0.6527488330713076, "grad_norm": 2.432199716567993, "learning_rate": 1.8277100705580503e-06, "loss": 0.7981, "step": 53560 }, { "epoch": 0.6528097692954554, "grad_norm": 1.7377076148986816, "learning_rate": 1.8273893521488135e-06, "loss": 0.8028, "step": 53565 }, { "epoch": 0.6528707055196031, "grad_norm": 1.9120005369186401, "learning_rate": 1.8270686337395767e-06, "loss": 0.8083, "step": 53570 }, { "epoch": 0.652931641743751, "grad_norm": 1.8993377685546875, "learning_rate": 1.8267479153303402e-06, "loss": 0.8249, "step": 53575 }, { "epoch": 0.6529925779678988, "grad_norm": 2.1557486057281494, "learning_rate": 1.8264271969211034e-06, "loss": 0.7986, "step": 53580 }, { "epoch": 0.6530535141920466, "grad_norm": 1.9477475881576538, "learning_rate": 1.8261064785118668e-06, "loss": 0.807, "step": 53585 }, { "epoch": 0.6531144504161944, "grad_norm": 2.0579638481140137, "learning_rate": 1.8257857601026299e-06, "loss": 0.8191, "step": 53590 }, { "epoch": 0.6531753866403422, "grad_norm": 1.9737701416015625, "learning_rate": 1.8254650416933933e-06, "loss": 0.8123, "step": 53595 }, { "epoch": 0.6532363228644901, "grad_norm": 2.268670082092285, "learning_rate": 1.8251443232841567e-06, "loss": 0.8526, "step": 53600 }, { "epoch": 0.6532972590886378, "grad_norm": 2.1354808807373047, "learning_rate": 1.82482360487492e-06, "loss": 0.8084, "step": 53605 }, { "epoch": 0.6533581953127856, "grad_norm": 1.8793796300888062, "learning_rate": 1.8245028864656832e-06, "loss": 0.7964, "step": 53610 }, { "epoch": 0.6534191315369334, "grad_norm": 1.7892316579818726, "learning_rate": 1.8241821680564466e-06, "loss": 0.8657, "step": 53615 }, { "epoch": 0.6534800677610813, "grad_norm": 1.9177799224853516, "learning_rate": 1.8238614496472099e-06, "loss": 0.8613, "step": 53620 }, { "epoch": 0.6535410039852291, "grad_norm": 2.547773599624634, "learning_rate": 1.8235407312379733e-06, "loss": 0.7765, "step": 53625 }, { "epoch": 0.6536019402093769, "grad_norm": 1.807457685470581, "learning_rate": 1.8232200128287363e-06, "loss": 0.8776, "step": 53630 }, { "epoch": 0.6536628764335247, "grad_norm": 1.760168433189392, "learning_rate": 1.8228992944194998e-06, "loss": 0.8744, "step": 53635 }, { "epoch": 0.6537238126576724, "grad_norm": 1.705744743347168, "learning_rate": 1.8225785760102632e-06, "loss": 0.758, "step": 53640 }, { "epoch": 0.6537847488818203, "grad_norm": 2.0932810306549072, "learning_rate": 1.8222578576010264e-06, "loss": 0.8748, "step": 53645 }, { "epoch": 0.6538456851059681, "grad_norm": 2.6806278228759766, "learning_rate": 1.8219371391917899e-06, "loss": 0.8638, "step": 53650 }, { "epoch": 0.6539066213301159, "grad_norm": 1.9103281497955322, "learning_rate": 1.8216164207825531e-06, "loss": 0.8828, "step": 53655 }, { "epoch": 0.6539675575542637, "grad_norm": 1.9934494495391846, "learning_rate": 1.8212957023733163e-06, "loss": 0.8151, "step": 53660 }, { "epoch": 0.6540284937784115, "grad_norm": 1.9218419790267944, "learning_rate": 1.8209749839640798e-06, "loss": 0.853, "step": 53665 }, { "epoch": 0.6540894300025594, "grad_norm": 1.9483941793441772, "learning_rate": 1.8206542655548428e-06, "loss": 0.8415, "step": 53670 }, { "epoch": 0.6541503662267071, "grad_norm": 2.0370256900787354, "learning_rate": 1.8203335471456062e-06, "loss": 0.9201, "step": 53675 }, { "epoch": 0.6542113024508549, "grad_norm": 2.6345200538635254, "learning_rate": 1.8200128287363697e-06, "loss": 0.8586, "step": 53680 }, { "epoch": 0.6542722386750027, "grad_norm": 1.7743953466415405, "learning_rate": 1.819692110327133e-06, "loss": 0.7864, "step": 53685 }, { "epoch": 0.6543331748991505, "grad_norm": 1.8201240301132202, "learning_rate": 1.8193713919178963e-06, "loss": 0.8078, "step": 53690 }, { "epoch": 0.6543941111232984, "grad_norm": 2.3113648891448975, "learning_rate": 1.8190506735086596e-06, "loss": 0.8433, "step": 53695 }, { "epoch": 0.6544550473474462, "grad_norm": 1.8912920951843262, "learning_rate": 1.8187299550994228e-06, "loss": 0.8233, "step": 53700 }, { "epoch": 0.654515983571594, "grad_norm": 1.823617935180664, "learning_rate": 1.8184092366901862e-06, "loss": 0.8571, "step": 53705 }, { "epoch": 0.6545769197957417, "grad_norm": 1.9946720600128174, "learning_rate": 1.8180885182809493e-06, "loss": 0.8611, "step": 53710 }, { "epoch": 0.6546378560198896, "grad_norm": 1.8262028694152832, "learning_rate": 1.8177677998717127e-06, "loss": 0.7707, "step": 53715 }, { "epoch": 0.6546987922440374, "grad_norm": 1.9188268184661865, "learning_rate": 1.8174470814624761e-06, "loss": 0.8327, "step": 53720 }, { "epoch": 0.6547597284681852, "grad_norm": 1.9830363988876343, "learning_rate": 1.8171263630532394e-06, "loss": 0.7993, "step": 53725 }, { "epoch": 0.654820664692333, "grad_norm": 1.9005517959594727, "learning_rate": 1.8168056446440028e-06, "loss": 0.8394, "step": 53730 }, { "epoch": 0.6548816009164808, "grad_norm": 1.9370323419570923, "learning_rate": 1.816484926234766e-06, "loss": 0.7882, "step": 53735 }, { "epoch": 0.6549425371406287, "grad_norm": 2.191303253173828, "learning_rate": 1.8161642078255293e-06, "loss": 0.9179, "step": 53740 }, { "epoch": 0.6550034733647764, "grad_norm": 2.0321078300476074, "learning_rate": 1.8158434894162927e-06, "loss": 0.8262, "step": 53745 }, { "epoch": 0.6550644095889242, "grad_norm": 1.950058102607727, "learning_rate": 1.8155227710070557e-06, "loss": 0.7799, "step": 53750 }, { "epoch": 0.655125345813072, "grad_norm": 1.9797307252883911, "learning_rate": 1.8152020525978192e-06, "loss": 0.8382, "step": 53755 }, { "epoch": 0.6551862820372198, "grad_norm": 1.7318346500396729, "learning_rate": 1.8148813341885826e-06, "loss": 0.811, "step": 53760 }, { "epoch": 0.6552472182613677, "grad_norm": 2.2174735069274902, "learning_rate": 1.8145606157793458e-06, "loss": 0.8707, "step": 53765 }, { "epoch": 0.6553081544855155, "grad_norm": 1.8336926698684692, "learning_rate": 1.8142398973701093e-06, "loss": 0.8023, "step": 53770 }, { "epoch": 0.6553690907096633, "grad_norm": 1.7632099390029907, "learning_rate": 1.8139191789608727e-06, "loss": 0.803, "step": 53775 }, { "epoch": 0.655430026933811, "grad_norm": 2.0899343490600586, "learning_rate": 1.8135984605516357e-06, "loss": 0.7772, "step": 53780 }, { "epoch": 0.6554909631579589, "grad_norm": 1.6752917766571045, "learning_rate": 1.8132777421423992e-06, "loss": 0.7717, "step": 53785 }, { "epoch": 0.6555518993821067, "grad_norm": 2.075201988220215, "learning_rate": 1.8129570237331626e-06, "loss": 0.8726, "step": 53790 }, { "epoch": 0.6556128356062545, "grad_norm": 1.6171540021896362, "learning_rate": 1.8126363053239256e-06, "loss": 0.8488, "step": 53795 }, { "epoch": 0.6556737718304023, "grad_norm": 2.1015207767486572, "learning_rate": 1.812315586914689e-06, "loss": 0.835, "step": 53800 }, { "epoch": 0.6557347080545501, "grad_norm": 2.2579920291900635, "learning_rate": 1.8119948685054523e-06, "loss": 0.7399, "step": 53805 }, { "epoch": 0.655795644278698, "grad_norm": 2.105266809463501, "learning_rate": 1.8116741500962157e-06, "loss": 0.8686, "step": 53810 }, { "epoch": 0.6558565805028457, "grad_norm": 1.7353260517120361, "learning_rate": 1.8113534316869792e-06, "loss": 0.8215, "step": 53815 }, { "epoch": 0.6559175167269935, "grad_norm": 1.9867476224899292, "learning_rate": 1.8110327132777422e-06, "loss": 0.7793, "step": 53820 }, { "epoch": 0.6559784529511413, "grad_norm": 2.209263563156128, "learning_rate": 1.8107119948685056e-06, "loss": 0.8136, "step": 53825 }, { "epoch": 0.6560393891752891, "grad_norm": 2.5877432823181152, "learning_rate": 1.810391276459269e-06, "loss": 0.8237, "step": 53830 }, { "epoch": 0.656100325399437, "grad_norm": 2.184889793395996, "learning_rate": 1.810070558050032e-06, "loss": 0.8435, "step": 53835 }, { "epoch": 0.6561612616235848, "grad_norm": 1.9318231344223022, "learning_rate": 1.8097498396407955e-06, "loss": 0.8499, "step": 53840 }, { "epoch": 0.6562221978477326, "grad_norm": 1.5960068702697754, "learning_rate": 1.8094291212315588e-06, "loss": 0.8354, "step": 53845 }, { "epoch": 0.6562831340718803, "grad_norm": 1.9330259561538696, "learning_rate": 1.8091084028223222e-06, "loss": 0.8189, "step": 53850 }, { "epoch": 0.6563440702960281, "grad_norm": 1.970545768737793, "learning_rate": 1.8087876844130856e-06, "loss": 0.782, "step": 53855 }, { "epoch": 0.656405006520176, "grad_norm": 1.8531150817871094, "learning_rate": 1.8084669660038487e-06, "loss": 0.791, "step": 53860 }, { "epoch": 0.6564659427443238, "grad_norm": 1.834193468093872, "learning_rate": 1.808146247594612e-06, "loss": 0.777, "step": 53865 }, { "epoch": 0.6565268789684716, "grad_norm": 2.0882372856140137, "learning_rate": 1.8078255291853755e-06, "loss": 0.7954, "step": 53870 }, { "epoch": 0.6565878151926194, "grad_norm": 1.9778411388397217, "learning_rate": 1.8075048107761385e-06, "loss": 0.8472, "step": 53875 }, { "epoch": 0.6566487514167673, "grad_norm": 1.665915846824646, "learning_rate": 1.807184092366902e-06, "loss": 0.851, "step": 53880 }, { "epoch": 0.656709687640915, "grad_norm": 1.8375742435455322, "learning_rate": 1.8068633739576652e-06, "loss": 0.8137, "step": 53885 }, { "epoch": 0.6567706238650628, "grad_norm": 1.693690538406372, "learning_rate": 1.8065426555484287e-06, "loss": 0.8011, "step": 53890 }, { "epoch": 0.6568315600892106, "grad_norm": 2.000447988510132, "learning_rate": 1.806221937139192e-06, "loss": 0.8474, "step": 53895 }, { "epoch": 0.6568924963133584, "grad_norm": 2.018897533416748, "learning_rate": 1.8059012187299551e-06, "loss": 0.8767, "step": 53900 }, { "epoch": 0.6569534325375063, "grad_norm": 2.516245126724243, "learning_rate": 1.8055805003207186e-06, "loss": 0.8233, "step": 53905 }, { "epoch": 0.6570143687616541, "grad_norm": 2.161980390548706, "learning_rate": 1.805259781911482e-06, "loss": 0.7439, "step": 53910 }, { "epoch": 0.6570753049858019, "grad_norm": 1.8209452629089355, "learning_rate": 1.8049390635022452e-06, "loss": 0.866, "step": 53915 }, { "epoch": 0.6571362412099496, "grad_norm": 1.7788395881652832, "learning_rate": 1.8046183450930085e-06, "loss": 0.7084, "step": 53920 }, { "epoch": 0.6571971774340974, "grad_norm": 2.247004747390747, "learning_rate": 1.8042976266837717e-06, "loss": 0.8405, "step": 53925 }, { "epoch": 0.6572581136582453, "grad_norm": 1.9471760988235474, "learning_rate": 1.8039769082745351e-06, "loss": 0.7825, "step": 53930 }, { "epoch": 0.6573190498823931, "grad_norm": 1.7694742679595947, "learning_rate": 1.8036561898652986e-06, "loss": 0.8611, "step": 53935 }, { "epoch": 0.6573799861065409, "grad_norm": 1.9315379858016968, "learning_rate": 1.8033354714560616e-06, "loss": 0.8808, "step": 53940 }, { "epoch": 0.6574409223306887, "grad_norm": 2.0116281509399414, "learning_rate": 1.803014753046825e-06, "loss": 0.7961, "step": 53945 }, { "epoch": 0.6575018585548366, "grad_norm": 2.469672918319702, "learning_rate": 1.8026940346375885e-06, "loss": 0.8109, "step": 53950 }, { "epoch": 0.6575627947789843, "grad_norm": 2.4223601818084717, "learning_rate": 1.8023733162283517e-06, "loss": 0.8585, "step": 53955 }, { "epoch": 0.6576237310031321, "grad_norm": 2.132847309112549, "learning_rate": 1.802052597819115e-06, "loss": 0.7896, "step": 53960 }, { "epoch": 0.6576846672272799, "grad_norm": 2.056412696838379, "learning_rate": 1.8017318794098781e-06, "loss": 0.8287, "step": 53965 }, { "epoch": 0.6577456034514277, "grad_norm": 1.900436282157898, "learning_rate": 1.8014111610006416e-06, "loss": 0.8795, "step": 53970 }, { "epoch": 0.6578065396755756, "grad_norm": 1.7369253635406494, "learning_rate": 1.801090442591405e-06, "loss": 0.7841, "step": 53975 }, { "epoch": 0.6578674758997234, "grad_norm": 1.8498587608337402, "learning_rate": 1.800769724182168e-06, "loss": 0.7782, "step": 53980 }, { "epoch": 0.6579284121238712, "grad_norm": 1.8070740699768066, "learning_rate": 1.8004490057729315e-06, "loss": 0.8654, "step": 53985 }, { "epoch": 0.6579893483480189, "grad_norm": 1.9227579832077026, "learning_rate": 1.800128287363695e-06, "loss": 0.8677, "step": 53990 }, { "epoch": 0.6580502845721667, "grad_norm": 1.9946824312210083, "learning_rate": 1.7998075689544582e-06, "loss": 0.7701, "step": 53995 }, { "epoch": 0.6581112207963146, "grad_norm": 1.6511679887771606, "learning_rate": 1.7994868505452214e-06, "loss": 0.7166, "step": 54000 }, { "epoch": 0.6581721570204624, "grad_norm": 2.188736915588379, "learning_rate": 1.7991661321359846e-06, "loss": 0.8715, "step": 54005 }, { "epoch": 0.6582330932446102, "grad_norm": 1.8087137937545776, "learning_rate": 1.798845413726748e-06, "loss": 0.7915, "step": 54010 }, { "epoch": 0.658294029468758, "grad_norm": 1.904061198234558, "learning_rate": 1.7985246953175115e-06, "loss": 0.6954, "step": 54015 }, { "epoch": 0.6583549656929059, "grad_norm": 2.03486704826355, "learning_rate": 1.7982039769082745e-06, "loss": 0.8322, "step": 54020 }, { "epoch": 0.6584159019170536, "grad_norm": 2.1165733337402344, "learning_rate": 1.797883258499038e-06, "loss": 0.8022, "step": 54025 }, { "epoch": 0.6584768381412014, "grad_norm": 2.1994385719299316, "learning_rate": 1.7975625400898014e-06, "loss": 0.8217, "step": 54030 }, { "epoch": 0.6585377743653492, "grad_norm": 1.8166327476501465, "learning_rate": 1.7972418216805646e-06, "loss": 0.7896, "step": 54035 }, { "epoch": 0.658598710589497, "grad_norm": 1.5922023057937622, "learning_rate": 1.796921103271328e-06, "loss": 0.8477, "step": 54040 }, { "epoch": 0.6586596468136449, "grad_norm": 1.7317430973052979, "learning_rate": 1.796600384862091e-06, "loss": 0.8096, "step": 54045 }, { "epoch": 0.6587205830377927, "grad_norm": 1.9280730485916138, "learning_rate": 1.7962796664528545e-06, "loss": 0.8038, "step": 54050 }, { "epoch": 0.6587815192619405, "grad_norm": 1.845449686050415, "learning_rate": 1.795958948043618e-06, "loss": 0.7969, "step": 54055 }, { "epoch": 0.6588424554860882, "grad_norm": 1.9161275625228882, "learning_rate": 1.795638229634381e-06, "loss": 0.827, "step": 54060 }, { "epoch": 0.658903391710236, "grad_norm": 1.925459623336792, "learning_rate": 1.7953175112251444e-06, "loss": 0.8002, "step": 54065 }, { "epoch": 0.6589643279343839, "grad_norm": 1.7621437311172485, "learning_rate": 1.7949967928159079e-06, "loss": 0.8615, "step": 54070 }, { "epoch": 0.6590252641585317, "grad_norm": 1.7816355228424072, "learning_rate": 1.794676074406671e-06, "loss": 0.8229, "step": 54075 }, { "epoch": 0.6590862003826795, "grad_norm": 2.1005778312683105, "learning_rate": 1.7943553559974345e-06, "loss": 0.9032, "step": 54080 }, { "epoch": 0.6591471366068273, "grad_norm": 2.287302017211914, "learning_rate": 1.7940346375881977e-06, "loss": 0.8185, "step": 54085 }, { "epoch": 0.6592080728309752, "grad_norm": 1.9437222480773926, "learning_rate": 1.793713919178961e-06, "loss": 0.7586, "step": 54090 }, { "epoch": 0.6592690090551229, "grad_norm": 2.120893955230713, "learning_rate": 1.7933932007697244e-06, "loss": 0.8598, "step": 54095 }, { "epoch": 0.6593299452792707, "grad_norm": 2.106412172317505, "learning_rate": 1.7930724823604874e-06, "loss": 0.8633, "step": 54100 }, { "epoch": 0.6593908815034185, "grad_norm": 2.1227569580078125, "learning_rate": 1.7927517639512509e-06, "loss": 0.8647, "step": 54105 }, { "epoch": 0.6594518177275663, "grad_norm": 1.8953304290771484, "learning_rate": 1.7924310455420143e-06, "loss": 0.8577, "step": 54110 }, { "epoch": 0.6595127539517142, "grad_norm": 1.8176714181900024, "learning_rate": 1.7921103271327775e-06, "loss": 0.8299, "step": 54115 }, { "epoch": 0.659573690175862, "grad_norm": 2.246243476867676, "learning_rate": 1.791789608723541e-06, "loss": 0.8161, "step": 54120 }, { "epoch": 0.6596346264000098, "grad_norm": 2.118403911590576, "learning_rate": 1.7914688903143044e-06, "loss": 0.8504, "step": 54125 }, { "epoch": 0.6596955626241575, "grad_norm": 1.9037988185882568, "learning_rate": 1.7911481719050674e-06, "loss": 0.8342, "step": 54130 }, { "epoch": 0.6597564988483053, "grad_norm": 2.0073957443237305, "learning_rate": 1.7908274534958309e-06, "loss": 0.7979, "step": 54135 }, { "epoch": 0.6598174350724532, "grad_norm": 2.004180908203125, "learning_rate": 1.7905067350865941e-06, "loss": 0.8094, "step": 54140 }, { "epoch": 0.659878371296601, "grad_norm": 2.018406629562378, "learning_rate": 1.7901860166773573e-06, "loss": 0.8048, "step": 54145 }, { "epoch": 0.6599393075207488, "grad_norm": 1.6421133279800415, "learning_rate": 1.7898652982681208e-06, "loss": 0.8301, "step": 54150 }, { "epoch": 0.6600002437448966, "grad_norm": 1.962181806564331, "learning_rate": 1.789544579858884e-06, "loss": 0.7922, "step": 54155 }, { "epoch": 0.6600611799690443, "grad_norm": 2.0661940574645996, "learning_rate": 1.7892238614496474e-06, "loss": 0.8046, "step": 54160 }, { "epoch": 0.6601221161931922, "grad_norm": 2.0111072063446045, "learning_rate": 1.7889031430404109e-06, "loss": 0.7786, "step": 54165 }, { "epoch": 0.66018305241734, "grad_norm": 1.9542728662490845, "learning_rate": 1.788582424631174e-06, "loss": 0.8379, "step": 54170 }, { "epoch": 0.6602439886414878, "grad_norm": 2.2404110431671143, "learning_rate": 1.7882617062219373e-06, "loss": 0.8859, "step": 54175 }, { "epoch": 0.6603049248656356, "grad_norm": 2.0065560340881348, "learning_rate": 1.7879409878127006e-06, "loss": 0.7545, "step": 54180 }, { "epoch": 0.6603658610897835, "grad_norm": 1.8405132293701172, "learning_rate": 1.7876202694034638e-06, "loss": 0.8309, "step": 54185 }, { "epoch": 0.6604267973139313, "grad_norm": 2.430382251739502, "learning_rate": 1.7872995509942272e-06, "loss": 0.8755, "step": 54190 }, { "epoch": 0.660487733538079, "grad_norm": 1.7915775775909424, "learning_rate": 1.7869788325849905e-06, "loss": 0.8365, "step": 54195 }, { "epoch": 0.6605486697622268, "grad_norm": 1.625410556793213, "learning_rate": 1.786658114175754e-06, "loss": 0.8431, "step": 54200 }, { "epoch": 0.6606096059863746, "grad_norm": 1.9037492275238037, "learning_rate": 1.7863373957665174e-06, "loss": 0.8, "step": 54205 }, { "epoch": 0.6606705422105225, "grad_norm": 1.9529671669006348, "learning_rate": 1.7860166773572804e-06, "loss": 0.8121, "step": 54210 }, { "epoch": 0.6607314784346703, "grad_norm": 2.187302827835083, "learning_rate": 1.7856959589480438e-06, "loss": 0.7982, "step": 54215 }, { "epoch": 0.6607924146588181, "grad_norm": 2.313748598098755, "learning_rate": 1.785375240538807e-06, "loss": 0.8157, "step": 54220 }, { "epoch": 0.6608533508829659, "grad_norm": 1.9125100374221802, "learning_rate": 1.7850545221295703e-06, "loss": 0.8429, "step": 54225 }, { "epoch": 0.6609142871071136, "grad_norm": 2.127534866333008, "learning_rate": 1.7847338037203337e-06, "loss": 0.8139, "step": 54230 }, { "epoch": 0.6609752233312615, "grad_norm": 1.9625083208084106, "learning_rate": 1.784413085311097e-06, "loss": 0.8156, "step": 54235 }, { "epoch": 0.6610361595554093, "grad_norm": 2.3474819660186768, "learning_rate": 1.7840923669018604e-06, "loss": 0.7947, "step": 54240 }, { "epoch": 0.6610970957795571, "grad_norm": 1.8627997636795044, "learning_rate": 1.7837716484926238e-06, "loss": 0.8491, "step": 54245 }, { "epoch": 0.6611580320037049, "grad_norm": 2.695939779281616, "learning_rate": 1.7834509300833868e-06, "loss": 0.8185, "step": 54250 }, { "epoch": 0.6612189682278528, "grad_norm": 1.8556755781173706, "learning_rate": 1.7831302116741503e-06, "loss": 0.8117, "step": 54255 }, { "epoch": 0.6612799044520006, "grad_norm": 1.8449523448944092, "learning_rate": 1.7828094932649135e-06, "loss": 0.7745, "step": 54260 }, { "epoch": 0.6613408406761483, "grad_norm": 1.815829873085022, "learning_rate": 1.782488774855677e-06, "loss": 0.7768, "step": 54265 }, { "epoch": 0.6614017769002961, "grad_norm": 1.8611547946929932, "learning_rate": 1.7821680564464402e-06, "loss": 0.8044, "step": 54270 }, { "epoch": 0.6614627131244439, "grad_norm": 1.7902376651763916, "learning_rate": 1.7818473380372034e-06, "loss": 0.8375, "step": 54275 }, { "epoch": 0.6615236493485918, "grad_norm": 2.1093454360961914, "learning_rate": 1.7815266196279668e-06, "loss": 0.8337, "step": 54280 }, { "epoch": 0.6615845855727396, "grad_norm": 2.249819755554199, "learning_rate": 1.7812059012187303e-06, "loss": 0.8086, "step": 54285 }, { "epoch": 0.6616455217968874, "grad_norm": 2.021294355392456, "learning_rate": 1.7808851828094933e-06, "loss": 0.7882, "step": 54290 }, { "epoch": 0.6617064580210352, "grad_norm": 2.1228530406951904, "learning_rate": 1.7805644644002567e-06, "loss": 0.8654, "step": 54295 }, { "epoch": 0.6617673942451829, "grad_norm": 2.40244197845459, "learning_rate": 1.78024374599102e-06, "loss": 0.8915, "step": 54300 }, { "epoch": 0.6618283304693308, "grad_norm": 1.9800763130187988, "learning_rate": 1.7799230275817834e-06, "loss": 0.8099, "step": 54305 }, { "epoch": 0.6618892666934786, "grad_norm": 2.2059710025787354, "learning_rate": 1.7796023091725466e-06, "loss": 0.7987, "step": 54310 }, { "epoch": 0.6619502029176264, "grad_norm": 2.122957229614258, "learning_rate": 1.7792815907633099e-06, "loss": 0.7983, "step": 54315 }, { "epoch": 0.6620111391417742, "grad_norm": 1.9409412145614624, "learning_rate": 1.7789608723540733e-06, "loss": 0.782, "step": 54320 }, { "epoch": 0.662072075365922, "grad_norm": 1.7813142538070679, "learning_rate": 1.7786401539448367e-06, "loss": 0.8348, "step": 54325 }, { "epoch": 0.6621330115900699, "grad_norm": 1.8876124620437622, "learning_rate": 1.7783194355355998e-06, "loss": 0.8674, "step": 54330 }, { "epoch": 0.6621939478142176, "grad_norm": 2.136606216430664, "learning_rate": 1.7779987171263632e-06, "loss": 0.8025, "step": 54335 }, { "epoch": 0.6622548840383654, "grad_norm": 2.4546425342559814, "learning_rate": 1.7776779987171264e-06, "loss": 0.8164, "step": 54340 }, { "epoch": 0.6623158202625132, "grad_norm": 1.9357846975326538, "learning_rate": 1.7773572803078899e-06, "loss": 0.7668, "step": 54345 }, { "epoch": 0.6623767564866611, "grad_norm": 1.9825092554092407, "learning_rate": 1.777036561898653e-06, "loss": 0.7834, "step": 54350 }, { "epoch": 0.6624376927108089, "grad_norm": 2.354673385620117, "learning_rate": 1.7767158434894163e-06, "loss": 0.8315, "step": 54355 }, { "epoch": 0.6624986289349567, "grad_norm": 2.030702590942383, "learning_rate": 1.7763951250801798e-06, "loss": 0.8303, "step": 54360 }, { "epoch": 0.6625595651591045, "grad_norm": 2.2162065505981445, "learning_rate": 1.7760744066709432e-06, "loss": 0.8498, "step": 54365 }, { "epoch": 0.6626205013832522, "grad_norm": 2.030829906463623, "learning_rate": 1.7757536882617062e-06, "loss": 0.8664, "step": 54370 }, { "epoch": 0.6626814376074001, "grad_norm": 1.809739112854004, "learning_rate": 1.7754329698524697e-06, "loss": 0.8658, "step": 54375 }, { "epoch": 0.6627423738315479, "grad_norm": 1.7082432508468628, "learning_rate": 1.775112251443233e-06, "loss": 0.8048, "step": 54380 }, { "epoch": 0.6628033100556957, "grad_norm": 1.749245047569275, "learning_rate": 1.7747915330339963e-06, "loss": 0.8347, "step": 54385 }, { "epoch": 0.6628642462798435, "grad_norm": 1.9482450485229492, "learning_rate": 1.7744708146247598e-06, "loss": 0.7488, "step": 54390 }, { "epoch": 0.6629251825039914, "grad_norm": 1.6349519491195679, "learning_rate": 1.7741500962155228e-06, "loss": 0.794, "step": 54395 }, { "epoch": 0.6629861187281392, "grad_norm": 2.068993330001831, "learning_rate": 1.7738293778062862e-06, "loss": 0.8151, "step": 54400 }, { "epoch": 0.6630470549522869, "grad_norm": 1.8312522172927856, "learning_rate": 1.7735086593970497e-06, "loss": 0.8534, "step": 54405 }, { "epoch": 0.6631079911764347, "grad_norm": 1.891034722328186, "learning_rate": 1.7731879409878127e-06, "loss": 0.8124, "step": 54410 }, { "epoch": 0.6631689274005825, "grad_norm": 2.1491806507110596, "learning_rate": 1.7728672225785761e-06, "loss": 0.7963, "step": 54415 }, { "epoch": 0.6632298636247304, "grad_norm": 1.8911278247833252, "learning_rate": 1.7725465041693396e-06, "loss": 0.8598, "step": 54420 }, { "epoch": 0.6632907998488782, "grad_norm": 2.2534940242767334, "learning_rate": 1.7722257857601028e-06, "loss": 0.8987, "step": 54425 }, { "epoch": 0.663351736073026, "grad_norm": 1.9643690586090088, "learning_rate": 1.7719050673508662e-06, "loss": 0.7668, "step": 54430 }, { "epoch": 0.6634126722971738, "grad_norm": 2.1058855056762695, "learning_rate": 1.7715843489416293e-06, "loss": 0.8583, "step": 54435 }, { "epoch": 0.6634736085213215, "grad_norm": 1.975524663925171, "learning_rate": 1.7712636305323927e-06, "loss": 0.8097, "step": 54440 }, { "epoch": 0.6635345447454694, "grad_norm": 1.864254117012024, "learning_rate": 1.7709429121231561e-06, "loss": 0.8302, "step": 54445 }, { "epoch": 0.6635954809696172, "grad_norm": 2.088080883026123, "learning_rate": 1.7706221937139191e-06, "loss": 0.7696, "step": 54450 }, { "epoch": 0.663656417193765, "grad_norm": 1.9029227495193481, "learning_rate": 1.7703014753046826e-06, "loss": 0.8374, "step": 54455 }, { "epoch": 0.6637173534179128, "grad_norm": 2.0387983322143555, "learning_rate": 1.769980756895446e-06, "loss": 0.8382, "step": 54460 }, { "epoch": 0.6637782896420606, "grad_norm": 1.7515937089920044, "learning_rate": 1.7696600384862093e-06, "loss": 0.7767, "step": 54465 }, { "epoch": 0.6638392258662085, "grad_norm": 1.94940185546875, "learning_rate": 1.7693393200769727e-06, "loss": 0.8685, "step": 54470 }, { "epoch": 0.6639001620903562, "grad_norm": 2.091237783432007, "learning_rate": 1.7690186016677357e-06, "loss": 0.8505, "step": 54475 }, { "epoch": 0.663961098314504, "grad_norm": 1.7289215326309204, "learning_rate": 1.7686978832584992e-06, "loss": 0.7879, "step": 54480 }, { "epoch": 0.6640220345386518, "grad_norm": 2.0248048305511475, "learning_rate": 1.7683771648492626e-06, "loss": 0.8471, "step": 54485 }, { "epoch": 0.6640829707627997, "grad_norm": 1.7994887828826904, "learning_rate": 1.7680564464400258e-06, "loss": 0.7798, "step": 54490 }, { "epoch": 0.6641439069869475, "grad_norm": 1.7121950387954712, "learning_rate": 1.767735728030789e-06, "loss": 0.7938, "step": 54495 }, { "epoch": 0.6642048432110953, "grad_norm": 1.8905553817749023, "learning_rate": 1.7674150096215525e-06, "loss": 0.7891, "step": 54500 }, { "epoch": 0.6642657794352431, "grad_norm": 2.196606159210205, "learning_rate": 1.7670942912123157e-06, "loss": 0.8778, "step": 54505 }, { "epoch": 0.6643267156593908, "grad_norm": 1.999508023262024, "learning_rate": 1.7667735728030792e-06, "loss": 0.8164, "step": 54510 }, { "epoch": 0.6643876518835387, "grad_norm": 1.6777554750442505, "learning_rate": 1.7664528543938422e-06, "loss": 0.7936, "step": 54515 }, { "epoch": 0.6644485881076865, "grad_norm": 2.016986608505249, "learning_rate": 1.7661321359846056e-06, "loss": 0.7927, "step": 54520 }, { "epoch": 0.6645095243318343, "grad_norm": 1.7279514074325562, "learning_rate": 1.765811417575369e-06, "loss": 0.7853, "step": 54525 }, { "epoch": 0.6645704605559821, "grad_norm": 1.882129192352295, "learning_rate": 1.7654906991661323e-06, "loss": 0.8274, "step": 54530 }, { "epoch": 0.66463139678013, "grad_norm": 1.8352731466293335, "learning_rate": 1.7651699807568955e-06, "loss": 0.8544, "step": 54535 }, { "epoch": 0.6646923330042778, "grad_norm": 1.9260720014572144, "learning_rate": 1.764849262347659e-06, "loss": 0.8058, "step": 54540 }, { "epoch": 0.6647532692284255, "grad_norm": 2.0736870765686035, "learning_rate": 1.7645285439384222e-06, "loss": 0.8513, "step": 54545 }, { "epoch": 0.6648142054525733, "grad_norm": 1.9039639234542847, "learning_rate": 1.7642078255291856e-06, "loss": 0.7424, "step": 54550 }, { "epoch": 0.6648751416767211, "grad_norm": 1.8275227546691895, "learning_rate": 1.7638871071199486e-06, "loss": 0.7754, "step": 54555 }, { "epoch": 0.664936077900869, "grad_norm": 2.471343755722046, "learning_rate": 1.763566388710712e-06, "loss": 0.8329, "step": 54560 }, { "epoch": 0.6649970141250168, "grad_norm": 1.9314872026443481, "learning_rate": 1.7632456703014755e-06, "loss": 0.9101, "step": 54565 }, { "epoch": 0.6650579503491646, "grad_norm": 2.146580696105957, "learning_rate": 1.7629249518922388e-06, "loss": 0.8601, "step": 54570 }, { "epoch": 0.6651188865733124, "grad_norm": 1.9609944820404053, "learning_rate": 1.762604233483002e-06, "loss": 0.84, "step": 54575 }, { "epoch": 0.6651798227974601, "grad_norm": 1.9676836729049683, "learning_rate": 1.7622835150737654e-06, "loss": 0.831, "step": 54580 }, { "epoch": 0.665240759021608, "grad_norm": 1.8630638122558594, "learning_rate": 1.7619627966645286e-06, "loss": 0.7953, "step": 54585 }, { "epoch": 0.6653016952457558, "grad_norm": 1.9371379613876343, "learning_rate": 1.761642078255292e-06, "loss": 0.9041, "step": 54590 }, { "epoch": 0.6653626314699036, "grad_norm": 2.356235980987549, "learning_rate": 1.761321359846055e-06, "loss": 0.8504, "step": 54595 }, { "epoch": 0.6654235676940514, "grad_norm": 2.1548609733581543, "learning_rate": 1.7610006414368185e-06, "loss": 0.801, "step": 54600 }, { "epoch": 0.6654845039181992, "grad_norm": 1.901839017868042, "learning_rate": 1.760679923027582e-06, "loss": 0.7893, "step": 54605 }, { "epoch": 0.6655454401423471, "grad_norm": 2.1563189029693604, "learning_rate": 1.7603592046183452e-06, "loss": 0.7813, "step": 54610 }, { "epoch": 0.6656063763664948, "grad_norm": 1.8470897674560547, "learning_rate": 1.7600384862091087e-06, "loss": 0.8288, "step": 54615 }, { "epoch": 0.6656673125906426, "grad_norm": 1.855242371559143, "learning_rate": 1.7597177677998719e-06, "loss": 0.7736, "step": 54620 }, { "epoch": 0.6657282488147904, "grad_norm": 1.744537591934204, "learning_rate": 1.7593970493906351e-06, "loss": 0.7838, "step": 54625 }, { "epoch": 0.6657891850389382, "grad_norm": 1.8213837146759033, "learning_rate": 1.7590763309813986e-06, "loss": 0.8005, "step": 54630 }, { "epoch": 0.6658501212630861, "grad_norm": 2.217194080352783, "learning_rate": 1.7587556125721616e-06, "loss": 0.8353, "step": 54635 }, { "epoch": 0.6659110574872339, "grad_norm": 1.907212495803833, "learning_rate": 1.758434894162925e-06, "loss": 0.8392, "step": 54640 }, { "epoch": 0.6659719937113817, "grad_norm": 1.9557621479034424, "learning_rate": 1.7581141757536884e-06, "loss": 0.8045, "step": 54645 }, { "epoch": 0.6660329299355294, "grad_norm": 2.0730459690093994, "learning_rate": 1.7577934573444517e-06, "loss": 0.788, "step": 54650 }, { "epoch": 0.6660938661596773, "grad_norm": 1.8359094858169556, "learning_rate": 1.7574727389352151e-06, "loss": 0.8247, "step": 54655 }, { "epoch": 0.6661548023838251, "grad_norm": 1.9595513343811035, "learning_rate": 1.7571520205259783e-06, "loss": 0.8249, "step": 54660 }, { "epoch": 0.6662157386079729, "grad_norm": 1.7875010967254639, "learning_rate": 1.7568313021167416e-06, "loss": 0.7875, "step": 54665 }, { "epoch": 0.6662766748321207, "grad_norm": 2.2093024253845215, "learning_rate": 1.756510583707505e-06, "loss": 0.8281, "step": 54670 }, { "epoch": 0.6663376110562685, "grad_norm": 1.847905158996582, "learning_rate": 1.756189865298268e-06, "loss": 0.8217, "step": 54675 }, { "epoch": 0.6663985472804164, "grad_norm": 2.0090842247009277, "learning_rate": 1.7558691468890315e-06, "loss": 0.7973, "step": 54680 }, { "epoch": 0.6664594835045641, "grad_norm": 2.1657891273498535, "learning_rate": 1.755548428479795e-06, "loss": 0.7973, "step": 54685 }, { "epoch": 0.6665204197287119, "grad_norm": 1.8440499305725098, "learning_rate": 1.7552277100705581e-06, "loss": 0.8357, "step": 54690 }, { "epoch": 0.6665813559528597, "grad_norm": 1.9757312536239624, "learning_rate": 1.7549069916613216e-06, "loss": 0.7744, "step": 54695 }, { "epoch": 0.6666422921770075, "grad_norm": 1.784842848777771, "learning_rate": 1.7545862732520848e-06, "loss": 0.8821, "step": 54700 }, { "epoch": 0.6667032284011554, "grad_norm": 2.191535472869873, "learning_rate": 1.754265554842848e-06, "loss": 0.8136, "step": 54705 }, { "epoch": 0.6667641646253032, "grad_norm": 2.135411024093628, "learning_rate": 1.7539448364336115e-06, "loss": 0.7882, "step": 54710 }, { "epoch": 0.666825100849451, "grad_norm": 1.9844805002212524, "learning_rate": 1.753624118024375e-06, "loss": 0.7984, "step": 54715 }, { "epoch": 0.6668860370735987, "grad_norm": 1.9504059553146362, "learning_rate": 1.753303399615138e-06, "loss": 0.8325, "step": 54720 }, { "epoch": 0.6669469732977465, "grad_norm": 1.9344249963760376, "learning_rate": 1.7529826812059014e-06, "loss": 0.7255, "step": 54725 }, { "epoch": 0.6670079095218944, "grad_norm": 1.8936604261398315, "learning_rate": 1.7526619627966646e-06, "loss": 0.7999, "step": 54730 }, { "epoch": 0.6670688457460422, "grad_norm": 2.1895387172698975, "learning_rate": 1.752341244387428e-06, "loss": 0.7781, "step": 54735 }, { "epoch": 0.66712978197019, "grad_norm": 2.0387425422668457, "learning_rate": 1.7520205259781915e-06, "loss": 0.8567, "step": 54740 }, { "epoch": 0.6671907181943378, "grad_norm": 1.94791841506958, "learning_rate": 1.7516998075689545e-06, "loss": 0.7844, "step": 54745 }, { "epoch": 0.6672516544184857, "grad_norm": 1.8829240798950195, "learning_rate": 1.751379089159718e-06, "loss": 0.8212, "step": 54750 }, { "epoch": 0.6673125906426334, "grad_norm": 1.9704375267028809, "learning_rate": 1.7510583707504814e-06, "loss": 0.7892, "step": 54755 }, { "epoch": 0.6673735268667812, "grad_norm": 2.4284098148345947, "learning_rate": 1.7507376523412444e-06, "loss": 0.8354, "step": 54760 }, { "epoch": 0.667434463090929, "grad_norm": 1.9139015674591064, "learning_rate": 1.7504169339320078e-06, "loss": 0.8258, "step": 54765 }, { "epoch": 0.6674953993150768, "grad_norm": 1.800093650817871, "learning_rate": 1.750096215522771e-06, "loss": 0.801, "step": 54770 }, { "epoch": 0.6675563355392247, "grad_norm": 1.9555652141571045, "learning_rate": 1.7497754971135345e-06, "loss": 0.8387, "step": 54775 }, { "epoch": 0.6676172717633725, "grad_norm": 2.006765127182007, "learning_rate": 1.749454778704298e-06, "loss": 0.825, "step": 54780 }, { "epoch": 0.6676782079875203, "grad_norm": 1.7260257005691528, "learning_rate": 1.749134060295061e-06, "loss": 0.8417, "step": 54785 }, { "epoch": 0.667739144211668, "grad_norm": 1.7703797817230225, "learning_rate": 1.7488133418858244e-06, "loss": 0.864, "step": 54790 }, { "epoch": 0.6678000804358158, "grad_norm": 1.9074547290802002, "learning_rate": 1.7484926234765878e-06, "loss": 0.8158, "step": 54795 }, { "epoch": 0.6678610166599637, "grad_norm": 1.7935144901275635, "learning_rate": 1.7481719050673509e-06, "loss": 0.7699, "step": 54800 }, { "epoch": 0.6679219528841115, "grad_norm": 1.9530670642852783, "learning_rate": 1.7478511866581143e-06, "loss": 0.8164, "step": 54805 }, { "epoch": 0.6679828891082593, "grad_norm": 1.94309663772583, "learning_rate": 1.7475304682488775e-06, "loss": 0.8533, "step": 54810 }, { "epoch": 0.6680438253324071, "grad_norm": 1.8299238681793213, "learning_rate": 1.747209749839641e-06, "loss": 0.8358, "step": 54815 }, { "epoch": 0.668104761556555, "grad_norm": 1.8303428888320923, "learning_rate": 1.7468890314304044e-06, "loss": 0.7995, "step": 54820 }, { "epoch": 0.6681656977807027, "grad_norm": 2.023061513900757, "learning_rate": 1.7465683130211674e-06, "loss": 0.815, "step": 54825 }, { "epoch": 0.6682266340048505, "grad_norm": 1.849915862083435, "learning_rate": 1.7462475946119309e-06, "loss": 0.822, "step": 54830 }, { "epoch": 0.6682875702289983, "grad_norm": 1.7691161632537842, "learning_rate": 1.7459268762026943e-06, "loss": 0.7504, "step": 54835 }, { "epoch": 0.6683485064531461, "grad_norm": 2.1263835430145264, "learning_rate": 1.7456061577934575e-06, "loss": 0.8229, "step": 54840 }, { "epoch": 0.668409442677294, "grad_norm": 2.204846143722534, "learning_rate": 1.7452854393842208e-06, "loss": 0.8608, "step": 54845 }, { "epoch": 0.6684703789014418, "grad_norm": 1.7245646715164185, "learning_rate": 1.744964720974984e-06, "loss": 0.7763, "step": 54850 }, { "epoch": 0.6685313151255896, "grad_norm": 1.8667744398117065, "learning_rate": 1.7446440025657474e-06, "loss": 0.7913, "step": 54855 }, { "epoch": 0.6685922513497373, "grad_norm": 1.9965943098068237, "learning_rate": 1.7443232841565109e-06, "loss": 0.8024, "step": 54860 }, { "epoch": 0.6686531875738851, "grad_norm": 1.7586041688919067, "learning_rate": 1.7440025657472739e-06, "loss": 0.8495, "step": 54865 }, { "epoch": 0.668714123798033, "grad_norm": 1.8612254858016968, "learning_rate": 1.7436818473380373e-06, "loss": 0.7764, "step": 54870 }, { "epoch": 0.6687750600221808, "grad_norm": 1.8586534261703491, "learning_rate": 1.7433611289288008e-06, "loss": 0.8208, "step": 54875 }, { "epoch": 0.6688359962463286, "grad_norm": 2.0759217739105225, "learning_rate": 1.743040410519564e-06, "loss": 0.8879, "step": 54880 }, { "epoch": 0.6688969324704764, "grad_norm": 1.9064788818359375, "learning_rate": 1.7427196921103272e-06, "loss": 0.8101, "step": 54885 }, { "epoch": 0.6689578686946243, "grad_norm": 2.180384874343872, "learning_rate": 1.7423989737010905e-06, "loss": 0.7932, "step": 54890 }, { "epoch": 0.669018804918772, "grad_norm": 1.8885546922683716, "learning_rate": 1.742078255291854e-06, "loss": 0.8373, "step": 54895 }, { "epoch": 0.6690797411429198, "grad_norm": 1.9235583543777466, "learning_rate": 1.7417575368826173e-06, "loss": 0.8719, "step": 54900 }, { "epoch": 0.6691406773670676, "grad_norm": 1.979333758354187, "learning_rate": 1.7414368184733804e-06, "loss": 0.7952, "step": 54905 }, { "epoch": 0.6692016135912154, "grad_norm": 2.034207582473755, "learning_rate": 1.7411161000641438e-06, "loss": 0.7933, "step": 54910 }, { "epoch": 0.6692625498153633, "grad_norm": 2.4492647647857666, "learning_rate": 1.7407953816549072e-06, "loss": 0.9092, "step": 54915 }, { "epoch": 0.6693234860395111, "grad_norm": 1.8096767663955688, "learning_rate": 1.7404746632456705e-06, "loss": 0.7704, "step": 54920 }, { "epoch": 0.6693844222636589, "grad_norm": 2.6003129482269287, "learning_rate": 1.7401539448364337e-06, "loss": 0.7595, "step": 54925 }, { "epoch": 0.6694453584878066, "grad_norm": 2.079310894012451, "learning_rate": 1.739833226427197e-06, "loss": 0.8825, "step": 54930 }, { "epoch": 0.6695062947119544, "grad_norm": 1.8479998111724854, "learning_rate": 1.7395125080179604e-06, "loss": 0.794, "step": 54935 }, { "epoch": 0.6695672309361023, "grad_norm": 1.892035961151123, "learning_rate": 1.7391917896087238e-06, "loss": 0.8109, "step": 54940 }, { "epoch": 0.6696281671602501, "grad_norm": 2.6032330989837646, "learning_rate": 1.7388710711994868e-06, "loss": 0.9073, "step": 54945 }, { "epoch": 0.6696891033843979, "grad_norm": 1.8743082284927368, "learning_rate": 1.7385503527902503e-06, "loss": 0.8065, "step": 54950 }, { "epoch": 0.6697500396085457, "grad_norm": 1.8425427675247192, "learning_rate": 1.7382296343810137e-06, "loss": 0.8308, "step": 54955 }, { "epoch": 0.6698109758326936, "grad_norm": 2.14131498336792, "learning_rate": 1.737908915971777e-06, "loss": 0.8458, "step": 54960 }, { "epoch": 0.6698719120568413, "grad_norm": 2.153616189956665, "learning_rate": 1.7375881975625404e-06, "loss": 0.8272, "step": 54965 }, { "epoch": 0.6699328482809891, "grad_norm": 2.0530569553375244, "learning_rate": 1.7372674791533034e-06, "loss": 0.7602, "step": 54970 }, { "epoch": 0.6699937845051369, "grad_norm": 1.872604250907898, "learning_rate": 1.7369467607440668e-06, "loss": 0.7855, "step": 54975 }, { "epoch": 0.6700547207292847, "grad_norm": 1.736602544784546, "learning_rate": 1.7366260423348303e-06, "loss": 0.8003, "step": 54980 }, { "epoch": 0.6701156569534326, "grad_norm": 2.4317071437835693, "learning_rate": 1.7363053239255933e-06, "loss": 0.8802, "step": 54985 }, { "epoch": 0.6701765931775804, "grad_norm": 2.077317476272583, "learning_rate": 1.7359846055163567e-06, "loss": 0.8948, "step": 54990 }, { "epoch": 0.6702375294017282, "grad_norm": 1.8748778104782104, "learning_rate": 1.7356638871071202e-06, "loss": 0.8077, "step": 54995 }, { "epoch": 0.6702984656258759, "grad_norm": 1.526321530342102, "learning_rate": 1.7353431686978834e-06, "loss": 0.8068, "step": 55000 }, { "epoch": 0.6703594018500237, "grad_norm": 1.6330279111862183, "learning_rate": 1.7350224502886468e-06, "loss": 0.8022, "step": 55005 }, { "epoch": 0.6704203380741716, "grad_norm": 1.8794046640396118, "learning_rate": 1.73470173187941e-06, "loss": 0.7967, "step": 55010 }, { "epoch": 0.6704812742983194, "grad_norm": 1.9985536336898804, "learning_rate": 1.7343810134701733e-06, "loss": 0.7968, "step": 55015 }, { "epoch": 0.6705422105224672, "grad_norm": 2.175625801086426, "learning_rate": 1.7340602950609367e-06, "loss": 0.9014, "step": 55020 }, { "epoch": 0.670603146746615, "grad_norm": 2.16996169090271, "learning_rate": 1.7337395766516997e-06, "loss": 0.764, "step": 55025 }, { "epoch": 0.6706640829707629, "grad_norm": 2.214186906814575, "learning_rate": 1.7334188582424632e-06, "loss": 0.8828, "step": 55030 }, { "epoch": 0.6707250191949106, "grad_norm": 2.0913891792297363, "learning_rate": 1.7330981398332266e-06, "loss": 0.8316, "step": 55035 }, { "epoch": 0.6707859554190584, "grad_norm": 1.615389108657837, "learning_rate": 1.7327774214239899e-06, "loss": 0.8341, "step": 55040 }, { "epoch": 0.6708468916432062, "grad_norm": 1.8850634098052979, "learning_rate": 1.7324567030147533e-06, "loss": 0.8095, "step": 55045 }, { "epoch": 0.670907827867354, "grad_norm": 1.8351856470108032, "learning_rate": 1.7321359846055165e-06, "loss": 0.8483, "step": 55050 }, { "epoch": 0.6709687640915019, "grad_norm": 2.1181540489196777, "learning_rate": 1.7318152661962798e-06, "loss": 0.8249, "step": 55055 }, { "epoch": 0.6710297003156497, "grad_norm": 2.0421457290649414, "learning_rate": 1.7314945477870432e-06, "loss": 0.8346, "step": 55060 }, { "epoch": 0.6710906365397975, "grad_norm": 1.8486948013305664, "learning_rate": 1.7311738293778062e-06, "loss": 0.8627, "step": 55065 }, { "epoch": 0.6711515727639452, "grad_norm": 1.9322115182876587, "learning_rate": 1.7308531109685697e-06, "loss": 0.8412, "step": 55070 }, { "epoch": 0.671212508988093, "grad_norm": 1.7373321056365967, "learning_rate": 1.730532392559333e-06, "loss": 0.7166, "step": 55075 }, { "epoch": 0.6712734452122409, "grad_norm": 1.7093228101730347, "learning_rate": 1.7302116741500963e-06, "loss": 0.8239, "step": 55080 }, { "epoch": 0.6713343814363887, "grad_norm": 1.7888957262039185, "learning_rate": 1.7298909557408598e-06, "loss": 0.7373, "step": 55085 }, { "epoch": 0.6713953176605365, "grad_norm": 1.9917209148406982, "learning_rate": 1.7295702373316232e-06, "loss": 0.9237, "step": 55090 }, { "epoch": 0.6714562538846843, "grad_norm": 2.020641326904297, "learning_rate": 1.7292495189223862e-06, "loss": 0.8321, "step": 55095 }, { "epoch": 0.6715171901088322, "grad_norm": 1.991178274154663, "learning_rate": 1.7289288005131497e-06, "loss": 0.8061, "step": 55100 }, { "epoch": 0.6715781263329799, "grad_norm": 2.0492706298828125, "learning_rate": 1.7286080821039129e-06, "loss": 0.8329, "step": 55105 }, { "epoch": 0.6716390625571277, "grad_norm": 1.956612467765808, "learning_rate": 1.7282873636946761e-06, "loss": 0.8491, "step": 55110 }, { "epoch": 0.6716999987812755, "grad_norm": 1.9692126512527466, "learning_rate": 1.7279666452854396e-06, "loss": 0.8759, "step": 55115 }, { "epoch": 0.6717609350054233, "grad_norm": 2.139247417449951, "learning_rate": 1.7276459268762028e-06, "loss": 0.769, "step": 55120 }, { "epoch": 0.6718218712295712, "grad_norm": 2.0706892013549805, "learning_rate": 1.7273252084669662e-06, "loss": 0.8118, "step": 55125 }, { "epoch": 0.671882807453719, "grad_norm": 1.8603863716125488, "learning_rate": 1.7270044900577297e-06, "loss": 0.8103, "step": 55130 }, { "epoch": 0.6719437436778667, "grad_norm": 2.2595765590667725, "learning_rate": 1.7266837716484927e-06, "loss": 0.825, "step": 55135 }, { "epoch": 0.6720046799020145, "grad_norm": 1.912007212638855, "learning_rate": 1.7263630532392561e-06, "loss": 0.8065, "step": 55140 }, { "epoch": 0.6720656161261623, "grad_norm": 1.7697257995605469, "learning_rate": 1.7260423348300193e-06, "loss": 0.8279, "step": 55145 }, { "epoch": 0.6721265523503102, "grad_norm": 2.361825942993164, "learning_rate": 1.7257216164207826e-06, "loss": 0.8505, "step": 55150 }, { "epoch": 0.672187488574458, "grad_norm": 1.6914982795715332, "learning_rate": 1.725400898011546e-06, "loss": 0.8072, "step": 55155 }, { "epoch": 0.6722484247986058, "grad_norm": 2.4165589809417725, "learning_rate": 1.7250801796023092e-06, "loss": 0.8252, "step": 55160 }, { "epoch": 0.6723093610227536, "grad_norm": 1.9546858072280884, "learning_rate": 1.7247594611930727e-06, "loss": 0.8619, "step": 55165 }, { "epoch": 0.6723702972469013, "grad_norm": 2.0067648887634277, "learning_rate": 1.7244387427838361e-06, "loss": 0.7501, "step": 55170 }, { "epoch": 0.6724312334710492, "grad_norm": 1.933969259262085, "learning_rate": 1.7241180243745991e-06, "loss": 0.8289, "step": 55175 }, { "epoch": 0.672492169695197, "grad_norm": 2.131624221801758, "learning_rate": 1.7237973059653626e-06, "loss": 0.8849, "step": 55180 }, { "epoch": 0.6725531059193448, "grad_norm": 1.9824787378311157, "learning_rate": 1.7234765875561258e-06, "loss": 0.8279, "step": 55185 }, { "epoch": 0.6726140421434926, "grad_norm": 1.9966204166412354, "learning_rate": 1.7231558691468893e-06, "loss": 0.8097, "step": 55190 }, { "epoch": 0.6726749783676405, "grad_norm": 1.6969764232635498, "learning_rate": 1.7228351507376525e-06, "loss": 0.8781, "step": 55195 }, { "epoch": 0.6727359145917883, "grad_norm": 1.7338787317276, "learning_rate": 1.7225144323284157e-06, "loss": 0.8267, "step": 55200 }, { "epoch": 0.672796850815936, "grad_norm": 2.0869288444519043, "learning_rate": 1.7221937139191792e-06, "loss": 0.8308, "step": 55205 }, { "epoch": 0.6728577870400838, "grad_norm": 2.390526294708252, "learning_rate": 1.7218729955099426e-06, "loss": 0.8204, "step": 55210 }, { "epoch": 0.6729187232642316, "grad_norm": 1.8233051300048828, "learning_rate": 1.7215522771007056e-06, "loss": 0.7802, "step": 55215 }, { "epoch": 0.6729796594883795, "grad_norm": 1.8964442014694214, "learning_rate": 1.721231558691469e-06, "loss": 0.8157, "step": 55220 }, { "epoch": 0.6730405957125273, "grad_norm": 2.100876808166504, "learning_rate": 1.7209108402822323e-06, "loss": 0.7337, "step": 55225 }, { "epoch": 0.6731015319366751, "grad_norm": 1.8321305513381958, "learning_rate": 1.7205901218729957e-06, "loss": 0.7102, "step": 55230 }, { "epoch": 0.6731624681608229, "grad_norm": 2.4474432468414307, "learning_rate": 1.720269403463759e-06, "loss": 0.8425, "step": 55235 }, { "epoch": 0.6732234043849706, "grad_norm": 2.0328598022460938, "learning_rate": 1.7199486850545222e-06, "loss": 0.8495, "step": 55240 }, { "epoch": 0.6732843406091185, "grad_norm": 2.152764081954956, "learning_rate": 1.7196279666452856e-06, "loss": 0.8127, "step": 55245 }, { "epoch": 0.6733452768332663, "grad_norm": 2.0712392330169678, "learning_rate": 1.719307248236049e-06, "loss": 0.8522, "step": 55250 }, { "epoch": 0.6734062130574141, "grad_norm": 1.9951143264770508, "learning_rate": 1.718986529826812e-06, "loss": 0.7134, "step": 55255 }, { "epoch": 0.6734671492815619, "grad_norm": 1.743890643119812, "learning_rate": 1.7186658114175755e-06, "loss": 0.7446, "step": 55260 }, { "epoch": 0.6735280855057098, "grad_norm": 1.999703049659729, "learning_rate": 1.7183450930083387e-06, "loss": 0.8054, "step": 55265 }, { "epoch": 0.6735890217298576, "grad_norm": 1.6658599376678467, "learning_rate": 1.7180243745991022e-06, "loss": 0.8671, "step": 55270 }, { "epoch": 0.6736499579540053, "grad_norm": 2.0325331687927246, "learning_rate": 1.7177036561898654e-06, "loss": 0.7751, "step": 55275 }, { "epoch": 0.6737108941781531, "grad_norm": 1.8150936365127563, "learning_rate": 1.7173829377806286e-06, "loss": 0.7628, "step": 55280 }, { "epoch": 0.6737718304023009, "grad_norm": 2.1202735900878906, "learning_rate": 1.717062219371392e-06, "loss": 0.8082, "step": 55285 }, { "epoch": 0.6738327666264488, "grad_norm": 1.9068524837493896, "learning_rate": 1.7167415009621555e-06, "loss": 0.8265, "step": 55290 }, { "epoch": 0.6738937028505966, "grad_norm": 1.91305410861969, "learning_rate": 1.7164207825529185e-06, "loss": 0.816, "step": 55295 }, { "epoch": 0.6739546390747444, "grad_norm": 1.9572051763534546, "learning_rate": 1.716100064143682e-06, "loss": 0.8158, "step": 55300 }, { "epoch": 0.6740155752988922, "grad_norm": 1.942911982536316, "learning_rate": 1.7157793457344454e-06, "loss": 0.7935, "step": 55305 }, { "epoch": 0.6740765115230399, "grad_norm": 1.7489526271820068, "learning_rate": 1.7154586273252086e-06, "loss": 0.866, "step": 55310 }, { "epoch": 0.6741374477471878, "grad_norm": 1.7190645933151245, "learning_rate": 1.715137908915972e-06, "loss": 0.8157, "step": 55315 }, { "epoch": 0.6741983839713356, "grad_norm": 2.0677082538604736, "learning_rate": 1.714817190506735e-06, "loss": 0.8021, "step": 55320 }, { "epoch": 0.6742593201954834, "grad_norm": 1.6702762842178345, "learning_rate": 1.7144964720974985e-06, "loss": 0.7671, "step": 55325 }, { "epoch": 0.6743202564196312, "grad_norm": 1.629177212715149, "learning_rate": 1.714175753688262e-06, "loss": 0.7551, "step": 55330 }, { "epoch": 0.674381192643779, "grad_norm": 1.9917088747024536, "learning_rate": 1.713855035279025e-06, "loss": 0.8399, "step": 55335 }, { "epoch": 0.6744421288679269, "grad_norm": 1.7902334928512573, "learning_rate": 1.7135343168697884e-06, "loss": 0.8135, "step": 55340 }, { "epoch": 0.6745030650920746, "grad_norm": 1.794224739074707, "learning_rate": 1.7132135984605519e-06, "loss": 0.8854, "step": 55345 }, { "epoch": 0.6745640013162224, "grad_norm": 1.815659761428833, "learning_rate": 1.7128928800513151e-06, "loss": 0.8142, "step": 55350 }, { "epoch": 0.6746249375403702, "grad_norm": 2.08943510055542, "learning_rate": 1.7125721616420785e-06, "loss": 0.7841, "step": 55355 }, { "epoch": 0.674685873764518, "grad_norm": 1.8274015188217163, "learning_rate": 1.7122514432328416e-06, "loss": 0.8592, "step": 55360 }, { "epoch": 0.6747468099886659, "grad_norm": 1.7817389965057373, "learning_rate": 1.711930724823605e-06, "loss": 0.8103, "step": 55365 }, { "epoch": 0.6748077462128137, "grad_norm": 2.144407033920288, "learning_rate": 1.7116100064143684e-06, "loss": 0.8506, "step": 55370 }, { "epoch": 0.6748686824369615, "grad_norm": 1.9580084085464478, "learning_rate": 1.7112892880051315e-06, "loss": 0.8442, "step": 55375 }, { "epoch": 0.6749296186611092, "grad_norm": 2.18147873878479, "learning_rate": 1.710968569595895e-06, "loss": 0.8012, "step": 55380 }, { "epoch": 0.674990554885257, "grad_norm": 1.944003939628601, "learning_rate": 1.7106478511866583e-06, "loss": 0.8473, "step": 55385 }, { "epoch": 0.6750514911094049, "grad_norm": 1.9551323652267456, "learning_rate": 1.7103271327774216e-06, "loss": 0.8599, "step": 55390 }, { "epoch": 0.6751124273335527, "grad_norm": 1.95401930809021, "learning_rate": 1.710006414368185e-06, "loss": 0.855, "step": 55395 }, { "epoch": 0.6751733635577005, "grad_norm": 1.9078733921051025, "learning_rate": 1.709685695958948e-06, "loss": 0.8666, "step": 55400 }, { "epoch": 0.6752342997818483, "grad_norm": 2.247995376586914, "learning_rate": 1.7093649775497115e-06, "loss": 0.8362, "step": 55405 }, { "epoch": 0.6752952360059962, "grad_norm": 2.278909683227539, "learning_rate": 1.709044259140475e-06, "loss": 0.8629, "step": 55410 }, { "epoch": 0.6753561722301439, "grad_norm": 2.1389272212982178, "learning_rate": 1.708723540731238e-06, "loss": 0.8314, "step": 55415 }, { "epoch": 0.6754171084542917, "grad_norm": 2.0726099014282227, "learning_rate": 1.7084028223220014e-06, "loss": 0.8318, "step": 55420 }, { "epoch": 0.6754780446784395, "grad_norm": 2.06965970993042, "learning_rate": 1.7080821039127648e-06, "loss": 0.8402, "step": 55425 }, { "epoch": 0.6755389809025873, "grad_norm": 2.1622154712677, "learning_rate": 1.707761385503528e-06, "loss": 0.7933, "step": 55430 }, { "epoch": 0.6755999171267352, "grad_norm": 2.072066307067871, "learning_rate": 1.7074406670942915e-06, "loss": 0.8547, "step": 55435 }, { "epoch": 0.675660853350883, "grad_norm": 1.988769292831421, "learning_rate": 1.7071199486850545e-06, "loss": 0.8154, "step": 55440 }, { "epoch": 0.6757217895750308, "grad_norm": 2.1254634857177734, "learning_rate": 1.706799230275818e-06, "loss": 0.85, "step": 55445 }, { "epoch": 0.6757827257991785, "grad_norm": 1.8840872049331665, "learning_rate": 1.7064785118665814e-06, "loss": 0.8503, "step": 55450 }, { "epoch": 0.6758436620233264, "grad_norm": 2.252819061279297, "learning_rate": 1.7061577934573446e-06, "loss": 0.7639, "step": 55455 }, { "epoch": 0.6759045982474742, "grad_norm": 1.8581790924072266, "learning_rate": 1.7058370750481078e-06, "loss": 0.8236, "step": 55460 }, { "epoch": 0.675965534471622, "grad_norm": 1.8120591640472412, "learning_rate": 1.7055163566388713e-06, "loss": 0.7458, "step": 55465 }, { "epoch": 0.6760264706957698, "grad_norm": 2.505941867828369, "learning_rate": 1.7051956382296345e-06, "loss": 0.8376, "step": 55470 }, { "epoch": 0.6760874069199176, "grad_norm": 1.745958685874939, "learning_rate": 1.704874919820398e-06, "loss": 0.7897, "step": 55475 }, { "epoch": 0.6761483431440655, "grad_norm": 1.6882447004318237, "learning_rate": 1.704554201411161e-06, "loss": 0.8258, "step": 55480 }, { "epoch": 0.6762092793682132, "grad_norm": 1.7879916429519653, "learning_rate": 1.7042334830019244e-06, "loss": 0.7991, "step": 55485 }, { "epoch": 0.676270215592361, "grad_norm": 1.9079492092132568, "learning_rate": 1.7039127645926878e-06, "loss": 0.862, "step": 55490 }, { "epoch": 0.6763311518165088, "grad_norm": 1.819204330444336, "learning_rate": 1.703592046183451e-06, "loss": 0.7916, "step": 55495 }, { "epoch": 0.6763920880406566, "grad_norm": 2.2840702533721924, "learning_rate": 1.7032713277742143e-06, "loss": 0.8151, "step": 55500 }, { "epoch": 0.6764530242648045, "grad_norm": 1.9107273817062378, "learning_rate": 1.7029506093649777e-06, "loss": 0.7833, "step": 55505 }, { "epoch": 0.6765139604889523, "grad_norm": 2.2135300636291504, "learning_rate": 1.702629890955741e-06, "loss": 0.8154, "step": 55510 }, { "epoch": 0.6765748967131001, "grad_norm": 2.341700315475464, "learning_rate": 1.7023091725465044e-06, "loss": 0.827, "step": 55515 }, { "epoch": 0.6766358329372478, "grad_norm": 1.9201383590698242, "learning_rate": 1.7019884541372674e-06, "loss": 0.8549, "step": 55520 }, { "epoch": 0.6766967691613957, "grad_norm": 2.0025148391723633, "learning_rate": 1.7016677357280309e-06, "loss": 0.9039, "step": 55525 }, { "epoch": 0.6767577053855435, "grad_norm": 1.9340636730194092, "learning_rate": 1.7013470173187943e-06, "loss": 0.7929, "step": 55530 }, { "epoch": 0.6768186416096913, "grad_norm": 1.9916249513626099, "learning_rate": 1.7010262989095575e-06, "loss": 0.7849, "step": 55535 }, { "epoch": 0.6768795778338391, "grad_norm": 2.3861467838287354, "learning_rate": 1.700705580500321e-06, "loss": 0.7777, "step": 55540 }, { "epoch": 0.6769405140579869, "grad_norm": 1.9694606065750122, "learning_rate": 1.7003848620910842e-06, "loss": 0.803, "step": 55545 }, { "epoch": 0.6770014502821348, "grad_norm": 2.182748317718506, "learning_rate": 1.7000641436818474e-06, "loss": 0.8503, "step": 55550 }, { "epoch": 0.6770623865062825, "grad_norm": 2.0329673290252686, "learning_rate": 1.6997434252726109e-06, "loss": 0.8188, "step": 55555 }, { "epoch": 0.6771233227304303, "grad_norm": 1.8421974182128906, "learning_rate": 1.6994227068633739e-06, "loss": 0.8504, "step": 55560 }, { "epoch": 0.6771842589545781, "grad_norm": 1.8446019887924194, "learning_rate": 1.6991019884541373e-06, "loss": 0.8837, "step": 55565 }, { "epoch": 0.677245195178726, "grad_norm": 1.955045223236084, "learning_rate": 1.6987812700449008e-06, "loss": 0.8461, "step": 55570 }, { "epoch": 0.6773061314028738, "grad_norm": 1.939541220664978, "learning_rate": 1.698460551635664e-06, "loss": 0.809, "step": 55575 }, { "epoch": 0.6773670676270216, "grad_norm": 1.7561193704605103, "learning_rate": 1.6981398332264274e-06, "loss": 0.8162, "step": 55580 }, { "epoch": 0.6774280038511694, "grad_norm": 1.71684730052948, "learning_rate": 1.6978191148171907e-06, "loss": 0.8651, "step": 55585 }, { "epoch": 0.6774889400753171, "grad_norm": 2.3232438564300537, "learning_rate": 1.6974983964079539e-06, "loss": 0.8063, "step": 55590 }, { "epoch": 0.677549876299465, "grad_norm": 2.1629467010498047, "learning_rate": 1.6971776779987173e-06, "loss": 0.8358, "step": 55595 }, { "epoch": 0.6776108125236128, "grad_norm": 1.8632932901382446, "learning_rate": 1.6968569595894808e-06, "loss": 0.8292, "step": 55600 }, { "epoch": 0.6776717487477606, "grad_norm": 2.0780131816864014, "learning_rate": 1.6965362411802438e-06, "loss": 0.7733, "step": 55605 }, { "epoch": 0.6777326849719084, "grad_norm": 2.8187062740325928, "learning_rate": 1.6962155227710072e-06, "loss": 0.8661, "step": 55610 }, { "epoch": 0.6777936211960562, "grad_norm": 2.1537606716156006, "learning_rate": 1.6958948043617705e-06, "loss": 0.7889, "step": 55615 }, { "epoch": 0.6778545574202041, "grad_norm": 1.8777825832366943, "learning_rate": 1.695574085952534e-06, "loss": 0.8082, "step": 55620 }, { "epoch": 0.6779154936443518, "grad_norm": 1.9143089056015015, "learning_rate": 1.6952533675432971e-06, "loss": 0.7987, "step": 55625 }, { "epoch": 0.6779764298684996, "grad_norm": 2.011296033859253, "learning_rate": 1.6949326491340604e-06, "loss": 0.818, "step": 55630 }, { "epoch": 0.6780373660926474, "grad_norm": 2.143204689025879, "learning_rate": 1.6946119307248238e-06, "loss": 0.8721, "step": 55635 }, { "epoch": 0.6780983023167952, "grad_norm": 2.086620569229126, "learning_rate": 1.6942912123155872e-06, "loss": 0.8064, "step": 55640 }, { "epoch": 0.6781592385409431, "grad_norm": 1.8259353637695312, "learning_rate": 1.6939704939063503e-06, "loss": 0.7889, "step": 55645 }, { "epoch": 0.6782201747650909, "grad_norm": 1.695885181427002, "learning_rate": 1.6936497754971137e-06, "loss": 0.7844, "step": 55650 }, { "epoch": 0.6782811109892387, "grad_norm": 2.1092405319213867, "learning_rate": 1.693329057087877e-06, "loss": 0.8897, "step": 55655 }, { "epoch": 0.6783420472133864, "grad_norm": 1.8180323839187622, "learning_rate": 1.6930083386786404e-06, "loss": 0.7834, "step": 55660 }, { "epoch": 0.6784029834375342, "grad_norm": 2.1407480239868164, "learning_rate": 1.6926876202694038e-06, "loss": 0.8436, "step": 55665 }, { "epoch": 0.6784639196616821, "grad_norm": 1.7541570663452148, "learning_rate": 1.6923669018601668e-06, "loss": 0.879, "step": 55670 }, { "epoch": 0.6785248558858299, "grad_norm": 2.109208106994629, "learning_rate": 1.6920461834509303e-06, "loss": 0.8187, "step": 55675 }, { "epoch": 0.6785857921099777, "grad_norm": 2.4119319915771484, "learning_rate": 1.6917254650416937e-06, "loss": 0.8991, "step": 55680 }, { "epoch": 0.6786467283341255, "grad_norm": 1.9544713497161865, "learning_rate": 1.6914047466324567e-06, "loss": 0.7879, "step": 55685 }, { "epoch": 0.6787076645582734, "grad_norm": 2.2035980224609375, "learning_rate": 1.6910840282232202e-06, "loss": 0.8041, "step": 55690 }, { "epoch": 0.6787686007824211, "grad_norm": 2.0537242889404297, "learning_rate": 1.6907633098139834e-06, "loss": 0.7892, "step": 55695 }, { "epoch": 0.6788295370065689, "grad_norm": 2.3258697986602783, "learning_rate": 1.6904425914047468e-06, "loss": 0.9127, "step": 55700 }, { "epoch": 0.6788904732307167, "grad_norm": 2.3683245182037354, "learning_rate": 1.6901218729955103e-06, "loss": 0.9035, "step": 55705 }, { "epoch": 0.6789514094548645, "grad_norm": 2.2310996055603027, "learning_rate": 1.6898011545862733e-06, "loss": 0.748, "step": 55710 }, { "epoch": 0.6790123456790124, "grad_norm": 2.5005533695220947, "learning_rate": 1.6894804361770367e-06, "loss": 0.8475, "step": 55715 }, { "epoch": 0.6790732819031602, "grad_norm": 1.7689878940582275, "learning_rate": 1.6891597177678002e-06, "loss": 0.8836, "step": 55720 }, { "epoch": 0.679134218127308, "grad_norm": 2.10332989692688, "learning_rate": 1.6888389993585632e-06, "loss": 0.8744, "step": 55725 }, { "epoch": 0.6791951543514557, "grad_norm": 1.6725327968597412, "learning_rate": 1.6885182809493266e-06, "loss": 0.7923, "step": 55730 }, { "epoch": 0.6792560905756035, "grad_norm": 2.006786584854126, "learning_rate": 1.6881975625400898e-06, "loss": 0.8048, "step": 55735 }, { "epoch": 0.6793170267997514, "grad_norm": 1.7940375804901123, "learning_rate": 1.6878768441308533e-06, "loss": 0.956, "step": 55740 }, { "epoch": 0.6793779630238992, "grad_norm": 2.177513360977173, "learning_rate": 1.6875561257216167e-06, "loss": 0.8725, "step": 55745 }, { "epoch": 0.679438899248047, "grad_norm": 2.071423292160034, "learning_rate": 1.6872354073123797e-06, "loss": 0.8358, "step": 55750 }, { "epoch": 0.6794998354721948, "grad_norm": 2.3359100818634033, "learning_rate": 1.6869146889031432e-06, "loss": 0.7957, "step": 55755 }, { "epoch": 0.6795607716963427, "grad_norm": 1.7528010606765747, "learning_rate": 1.6865939704939066e-06, "loss": 0.8346, "step": 55760 }, { "epoch": 0.6796217079204904, "grad_norm": 1.9956978559494019, "learning_rate": 1.6862732520846696e-06, "loss": 0.8285, "step": 55765 }, { "epoch": 0.6796826441446382, "grad_norm": 2.7878334522247314, "learning_rate": 1.685952533675433e-06, "loss": 0.8894, "step": 55770 }, { "epoch": 0.679743580368786, "grad_norm": 1.725096344947815, "learning_rate": 1.6856318152661963e-06, "loss": 0.8617, "step": 55775 }, { "epoch": 0.6798045165929338, "grad_norm": 2.145956516265869, "learning_rate": 1.6853110968569597e-06, "loss": 0.8745, "step": 55780 }, { "epoch": 0.6798654528170817, "grad_norm": 2.044363260269165, "learning_rate": 1.6849903784477232e-06, "loss": 0.8456, "step": 55785 }, { "epoch": 0.6799263890412295, "grad_norm": 1.7768335342407227, "learning_rate": 1.6846696600384862e-06, "loss": 0.7581, "step": 55790 }, { "epoch": 0.6799873252653773, "grad_norm": 1.9080661535263062, "learning_rate": 1.6843489416292496e-06, "loss": 0.7858, "step": 55795 }, { "epoch": 0.680048261489525, "grad_norm": 1.8771026134490967, "learning_rate": 1.684028223220013e-06, "loss": 0.7954, "step": 55800 }, { "epoch": 0.6801091977136728, "grad_norm": 1.9821547269821167, "learning_rate": 1.6837075048107763e-06, "loss": 0.8479, "step": 55805 }, { "epoch": 0.6801701339378207, "grad_norm": 2.1294336318969727, "learning_rate": 1.6833867864015395e-06, "loss": 0.8867, "step": 55810 }, { "epoch": 0.6802310701619685, "grad_norm": 2.3947935104370117, "learning_rate": 1.6830660679923028e-06, "loss": 0.8069, "step": 55815 }, { "epoch": 0.6802920063861163, "grad_norm": 2.08553409576416, "learning_rate": 1.6827453495830662e-06, "loss": 0.8509, "step": 55820 }, { "epoch": 0.6803529426102641, "grad_norm": 1.864963412284851, "learning_rate": 1.6824246311738297e-06, "loss": 0.904, "step": 55825 }, { "epoch": 0.680413878834412, "grad_norm": 2.113401174545288, "learning_rate": 1.6821039127645927e-06, "loss": 0.7739, "step": 55830 }, { "epoch": 0.6804748150585597, "grad_norm": 1.7261788845062256, "learning_rate": 1.6817831943553561e-06, "loss": 0.8501, "step": 55835 }, { "epoch": 0.6805357512827075, "grad_norm": 2.262118339538574, "learning_rate": 1.6814624759461196e-06, "loss": 0.8989, "step": 55840 }, { "epoch": 0.6805966875068553, "grad_norm": 1.9590057134628296, "learning_rate": 1.6811417575368828e-06, "loss": 0.7635, "step": 55845 }, { "epoch": 0.6806576237310031, "grad_norm": 1.8171331882476807, "learning_rate": 1.680821039127646e-06, "loss": 0.8376, "step": 55850 }, { "epoch": 0.680718559955151, "grad_norm": 1.9639168977737427, "learning_rate": 1.6805003207184092e-06, "loss": 0.8087, "step": 55855 }, { "epoch": 0.6807794961792988, "grad_norm": 2.109856367111206, "learning_rate": 1.6801796023091727e-06, "loss": 0.7782, "step": 55860 }, { "epoch": 0.6808404324034466, "grad_norm": 1.9993575811386108, "learning_rate": 1.6798588838999361e-06, "loss": 0.8129, "step": 55865 }, { "epoch": 0.6809013686275943, "grad_norm": 1.6935416460037231, "learning_rate": 1.6795381654906991e-06, "loss": 0.9135, "step": 55870 }, { "epoch": 0.6809623048517421, "grad_norm": 2.0364291667938232, "learning_rate": 1.6792174470814626e-06, "loss": 0.8287, "step": 55875 }, { "epoch": 0.68102324107589, "grad_norm": 1.8145803213119507, "learning_rate": 1.678896728672226e-06, "loss": 0.799, "step": 55880 }, { "epoch": 0.6810841773000378, "grad_norm": 2.451058864593506, "learning_rate": 1.6785760102629892e-06, "loss": 0.8458, "step": 55885 }, { "epoch": 0.6811451135241856, "grad_norm": 1.8141425848007202, "learning_rate": 1.6782552918537525e-06, "loss": 0.7673, "step": 55890 }, { "epoch": 0.6812060497483334, "grad_norm": 1.9308116436004639, "learning_rate": 1.677934573444516e-06, "loss": 0.9214, "step": 55895 }, { "epoch": 0.6812669859724813, "grad_norm": 1.7159003019332886, "learning_rate": 1.6776138550352791e-06, "loss": 0.8944, "step": 55900 }, { "epoch": 0.681327922196629, "grad_norm": 2.2685770988464355, "learning_rate": 1.6772931366260426e-06, "loss": 0.8438, "step": 55905 }, { "epoch": 0.6813888584207768, "grad_norm": 2.1061458587646484, "learning_rate": 1.6769724182168056e-06, "loss": 0.8806, "step": 55910 }, { "epoch": 0.6814497946449246, "grad_norm": 2.0624947547912598, "learning_rate": 1.676651699807569e-06, "loss": 0.8281, "step": 55915 }, { "epoch": 0.6815107308690724, "grad_norm": 1.918979525566101, "learning_rate": 1.6763309813983325e-06, "loss": 0.8177, "step": 55920 }, { "epoch": 0.6815716670932203, "grad_norm": 2.0963294506073, "learning_rate": 1.6760102629890957e-06, "loss": 0.8031, "step": 55925 }, { "epoch": 0.6816326033173681, "grad_norm": 2.006666898727417, "learning_rate": 1.6756895445798591e-06, "loss": 0.7938, "step": 55930 }, { "epoch": 0.6816935395415159, "grad_norm": 1.7575753927230835, "learning_rate": 1.6753688261706224e-06, "loss": 0.8224, "step": 55935 }, { "epoch": 0.6817544757656636, "grad_norm": 2.0356152057647705, "learning_rate": 1.6750481077613856e-06, "loss": 0.8423, "step": 55940 }, { "epoch": 0.6818154119898114, "grad_norm": 1.6505820751190186, "learning_rate": 1.674727389352149e-06, "loss": 0.8194, "step": 55945 }, { "epoch": 0.6818763482139593, "grad_norm": 1.7033841609954834, "learning_rate": 1.674406670942912e-06, "loss": 0.8279, "step": 55950 }, { "epoch": 0.6819372844381071, "grad_norm": 1.7728986740112305, "learning_rate": 1.6740859525336755e-06, "loss": 0.8381, "step": 55955 }, { "epoch": 0.6819982206622549, "grad_norm": 2.054105281829834, "learning_rate": 1.673765234124439e-06, "loss": 0.7864, "step": 55960 }, { "epoch": 0.6820591568864027, "grad_norm": 1.7834793329238892, "learning_rate": 1.6734445157152022e-06, "loss": 0.8123, "step": 55965 }, { "epoch": 0.6821200931105506, "grad_norm": 1.9704062938690186, "learning_rate": 1.6731237973059656e-06, "loss": 0.844, "step": 55970 }, { "epoch": 0.6821810293346983, "grad_norm": 3.0541892051696777, "learning_rate": 1.6728030788967288e-06, "loss": 0.8434, "step": 55975 }, { "epoch": 0.6822419655588461, "grad_norm": 1.8143725395202637, "learning_rate": 1.672482360487492e-06, "loss": 0.845, "step": 55980 }, { "epoch": 0.6823029017829939, "grad_norm": 1.9146047830581665, "learning_rate": 1.6721616420782555e-06, "loss": 0.8422, "step": 55985 }, { "epoch": 0.6823638380071417, "grad_norm": 1.7788077592849731, "learning_rate": 1.6718409236690185e-06, "loss": 0.7836, "step": 55990 }, { "epoch": 0.6824247742312896, "grad_norm": 1.7074530124664307, "learning_rate": 1.671520205259782e-06, "loss": 0.813, "step": 55995 }, { "epoch": 0.6824857104554374, "grad_norm": 2.2123026847839355, "learning_rate": 1.6711994868505454e-06, "loss": 0.733, "step": 56000 }, { "epoch": 0.6825466466795852, "grad_norm": 2.0666141510009766, "learning_rate": 1.6708787684413086e-06, "loss": 0.825, "step": 56005 }, { "epoch": 0.6826075829037329, "grad_norm": 1.8584989309310913, "learning_rate": 1.670558050032072e-06, "loss": 0.8116, "step": 56010 }, { "epoch": 0.6826685191278807, "grad_norm": 1.8587859869003296, "learning_rate": 1.6702373316228355e-06, "loss": 0.7911, "step": 56015 }, { "epoch": 0.6827294553520286, "grad_norm": 3.80501389503479, "learning_rate": 1.6699166132135985e-06, "loss": 0.8361, "step": 56020 }, { "epoch": 0.6827903915761764, "grad_norm": 1.958949327468872, "learning_rate": 1.669595894804362e-06, "loss": 0.7851, "step": 56025 }, { "epoch": 0.6828513278003242, "grad_norm": 2.672776460647583, "learning_rate": 1.6692751763951252e-06, "loss": 0.8682, "step": 56030 }, { "epoch": 0.682912264024472, "grad_norm": 1.9079232215881348, "learning_rate": 1.6689544579858884e-06, "loss": 0.7784, "step": 56035 }, { "epoch": 0.6829732002486198, "grad_norm": 2.0769197940826416, "learning_rate": 1.6686337395766519e-06, "loss": 0.8325, "step": 56040 }, { "epoch": 0.6830341364727676, "grad_norm": 2.1442830562591553, "learning_rate": 1.668313021167415e-06, "loss": 0.8869, "step": 56045 }, { "epoch": 0.6830950726969154, "grad_norm": 2.21354341506958, "learning_rate": 1.6679923027581785e-06, "loss": 0.8463, "step": 56050 }, { "epoch": 0.6831560089210632, "grad_norm": 1.8481789827346802, "learning_rate": 1.667671584348942e-06, "loss": 0.7547, "step": 56055 }, { "epoch": 0.683216945145211, "grad_norm": 2.0341856479644775, "learning_rate": 1.667350865939705e-06, "loss": 0.7628, "step": 56060 }, { "epoch": 0.6832778813693589, "grad_norm": 2.0951552391052246, "learning_rate": 1.6670301475304684e-06, "loss": 0.7721, "step": 56065 }, { "epoch": 0.6833388175935067, "grad_norm": 2.022315740585327, "learning_rate": 1.6667094291212317e-06, "loss": 0.7772, "step": 56070 }, { "epoch": 0.6833997538176545, "grad_norm": 1.9016374349594116, "learning_rate": 1.6663887107119949e-06, "loss": 0.8359, "step": 56075 }, { "epoch": 0.6834606900418022, "grad_norm": 1.7493492364883423, "learning_rate": 1.6660679923027583e-06, "loss": 0.8148, "step": 56080 }, { "epoch": 0.68352162626595, "grad_norm": 2.164046049118042, "learning_rate": 1.6657472738935216e-06, "loss": 0.7934, "step": 56085 }, { "epoch": 0.6835825624900979, "grad_norm": 2.0174636840820312, "learning_rate": 1.665426555484285e-06, "loss": 0.9251, "step": 56090 }, { "epoch": 0.6836434987142457, "grad_norm": 1.999266266822815, "learning_rate": 1.6651058370750484e-06, "loss": 0.8184, "step": 56095 }, { "epoch": 0.6837044349383935, "grad_norm": 2.053854465484619, "learning_rate": 1.6647851186658115e-06, "loss": 0.8064, "step": 56100 }, { "epoch": 0.6837653711625413, "grad_norm": 1.8093467950820923, "learning_rate": 1.664464400256575e-06, "loss": 0.8485, "step": 56105 }, { "epoch": 0.683826307386689, "grad_norm": 2.0171523094177246, "learning_rate": 1.6641436818473381e-06, "loss": 0.7811, "step": 56110 }, { "epoch": 0.6838872436108369, "grad_norm": 2.1478700637817383, "learning_rate": 1.6638229634381014e-06, "loss": 0.8135, "step": 56115 }, { "epoch": 0.6839481798349847, "grad_norm": 1.9313009977340698, "learning_rate": 1.6635022450288648e-06, "loss": 0.7918, "step": 56120 }, { "epoch": 0.6840091160591325, "grad_norm": 2.07670521736145, "learning_rate": 1.663181526619628e-06, "loss": 0.8008, "step": 56125 }, { "epoch": 0.6840700522832803, "grad_norm": 2.7381677627563477, "learning_rate": 1.6628608082103915e-06, "loss": 0.8848, "step": 56130 }, { "epoch": 0.6841309885074282, "grad_norm": 2.2520275115966797, "learning_rate": 1.662540089801155e-06, "loss": 0.7856, "step": 56135 }, { "epoch": 0.684191924731576, "grad_norm": 2.0456831455230713, "learning_rate": 1.662219371391918e-06, "loss": 0.8041, "step": 56140 }, { "epoch": 0.6842528609557237, "grad_norm": 1.9435089826583862, "learning_rate": 1.6618986529826814e-06, "loss": 0.7987, "step": 56145 }, { "epoch": 0.6843137971798715, "grad_norm": 1.711052417755127, "learning_rate": 1.6615779345734446e-06, "loss": 0.7662, "step": 56150 }, { "epoch": 0.6843747334040193, "grad_norm": 2.138862133026123, "learning_rate": 1.661257216164208e-06, "loss": 0.8003, "step": 56155 }, { "epoch": 0.6844356696281672, "grad_norm": 1.9077304601669312, "learning_rate": 1.6609364977549713e-06, "loss": 0.8117, "step": 56160 }, { "epoch": 0.684496605852315, "grad_norm": 1.9830368757247925, "learning_rate": 1.6606157793457345e-06, "loss": 0.8975, "step": 56165 }, { "epoch": 0.6845575420764628, "grad_norm": 1.7489649057388306, "learning_rate": 1.660295060936498e-06, "loss": 0.8048, "step": 56170 }, { "epoch": 0.6846184783006106, "grad_norm": 1.8655885457992554, "learning_rate": 1.6599743425272614e-06, "loss": 0.8089, "step": 56175 }, { "epoch": 0.6846794145247583, "grad_norm": 2.305783748626709, "learning_rate": 1.6596536241180244e-06, "loss": 0.8616, "step": 56180 }, { "epoch": 0.6847403507489062, "grad_norm": 2.028027057647705, "learning_rate": 1.6593329057087878e-06, "loss": 0.8082, "step": 56185 }, { "epoch": 0.684801286973054, "grad_norm": 2.118112802505493, "learning_rate": 1.659012187299551e-06, "loss": 0.805, "step": 56190 }, { "epoch": 0.6848622231972018, "grad_norm": 2.0387930870056152, "learning_rate": 1.6586914688903145e-06, "loss": 0.7935, "step": 56195 }, { "epoch": 0.6849231594213496, "grad_norm": 1.8090403079986572, "learning_rate": 1.6583707504810777e-06, "loss": 0.862, "step": 56200 }, { "epoch": 0.6849840956454974, "grad_norm": 2.3545196056365967, "learning_rate": 1.658050032071841e-06, "loss": 0.8787, "step": 56205 }, { "epoch": 0.6850450318696453, "grad_norm": 2.0554397106170654, "learning_rate": 1.6577293136626044e-06, "loss": 0.8092, "step": 56210 }, { "epoch": 0.685105968093793, "grad_norm": 2.0408363342285156, "learning_rate": 1.6574085952533678e-06, "loss": 0.764, "step": 56215 }, { "epoch": 0.6851669043179408, "grad_norm": 1.8221372365951538, "learning_rate": 1.6570878768441308e-06, "loss": 0.829, "step": 56220 }, { "epoch": 0.6852278405420886, "grad_norm": 2.292632818222046, "learning_rate": 1.6567671584348943e-06, "loss": 0.8165, "step": 56225 }, { "epoch": 0.6852887767662365, "grad_norm": 2.019585609436035, "learning_rate": 1.6564464400256577e-06, "loss": 0.8054, "step": 56230 }, { "epoch": 0.6853497129903843, "grad_norm": 2.489245653152466, "learning_rate": 1.656125721616421e-06, "loss": 0.853, "step": 56235 }, { "epoch": 0.6854106492145321, "grad_norm": 1.6457518339157104, "learning_rate": 1.6558050032071842e-06, "loss": 0.7992, "step": 56240 }, { "epoch": 0.6854715854386799, "grad_norm": 1.8346328735351562, "learning_rate": 1.6554842847979474e-06, "loss": 0.7414, "step": 56245 }, { "epoch": 0.6855325216628276, "grad_norm": 1.7008682489395142, "learning_rate": 1.6551635663887109e-06, "loss": 0.7901, "step": 56250 }, { "epoch": 0.6855934578869755, "grad_norm": 1.763105034828186, "learning_rate": 1.6548428479794743e-06, "loss": 0.8421, "step": 56255 }, { "epoch": 0.6856543941111233, "grad_norm": 1.878814458847046, "learning_rate": 1.6545221295702373e-06, "loss": 0.8194, "step": 56260 }, { "epoch": 0.6857153303352711, "grad_norm": 1.8302208185195923, "learning_rate": 1.6542014111610008e-06, "loss": 0.8316, "step": 56265 }, { "epoch": 0.6857762665594189, "grad_norm": 1.696844220161438, "learning_rate": 1.6538806927517642e-06, "loss": 0.8447, "step": 56270 }, { "epoch": 0.6858372027835667, "grad_norm": 1.6450042724609375, "learning_rate": 1.6535599743425274e-06, "loss": 0.7989, "step": 56275 }, { "epoch": 0.6858981390077146, "grad_norm": 2.012712001800537, "learning_rate": 1.6532392559332909e-06, "loss": 0.8649, "step": 56280 }, { "epoch": 0.6859590752318623, "grad_norm": 2.0109894275665283, "learning_rate": 1.6529185375240539e-06, "loss": 0.7892, "step": 56285 }, { "epoch": 0.6860200114560101, "grad_norm": 1.5990793704986572, "learning_rate": 1.6525978191148173e-06, "loss": 0.8078, "step": 56290 }, { "epoch": 0.6860809476801579, "grad_norm": 1.720879316329956, "learning_rate": 1.6522771007055808e-06, "loss": 0.8186, "step": 56295 }, { "epoch": 0.6861418839043057, "grad_norm": 1.8234081268310547, "learning_rate": 1.6519563822963438e-06, "loss": 0.7997, "step": 56300 }, { "epoch": 0.6862028201284536, "grad_norm": 2.0580508708953857, "learning_rate": 1.6516356638871072e-06, "loss": 0.8793, "step": 56305 }, { "epoch": 0.6862637563526014, "grad_norm": 1.907638430595398, "learning_rate": 1.6513149454778707e-06, "loss": 0.9394, "step": 56310 }, { "epoch": 0.6863246925767492, "grad_norm": 1.590397596359253, "learning_rate": 1.6509942270686339e-06, "loss": 0.7973, "step": 56315 }, { "epoch": 0.6863856288008969, "grad_norm": 2.0726706981658936, "learning_rate": 1.6506735086593973e-06, "loss": 0.9227, "step": 56320 }, { "epoch": 0.6864465650250448, "grad_norm": 2.2797720432281494, "learning_rate": 1.6503527902501603e-06, "loss": 0.7707, "step": 56325 }, { "epoch": 0.6865075012491926, "grad_norm": 1.8944416046142578, "learning_rate": 1.6500320718409238e-06, "loss": 0.795, "step": 56330 }, { "epoch": 0.6865684374733404, "grad_norm": 2.0737643241882324, "learning_rate": 1.6497113534316872e-06, "loss": 0.8672, "step": 56335 }, { "epoch": 0.6866293736974882, "grad_norm": 2.1473848819732666, "learning_rate": 1.6493906350224502e-06, "loss": 0.8562, "step": 56340 }, { "epoch": 0.686690309921636, "grad_norm": 2.18395733833313, "learning_rate": 1.6490699166132137e-06, "loss": 0.7811, "step": 56345 }, { "epoch": 0.6867512461457839, "grad_norm": 1.834579586982727, "learning_rate": 1.6487491982039771e-06, "loss": 0.8199, "step": 56350 }, { "epoch": 0.6868121823699316, "grad_norm": 1.774391531944275, "learning_rate": 1.6484284797947403e-06, "loss": 0.8505, "step": 56355 }, { "epoch": 0.6868731185940794, "grad_norm": 1.6837238073349, "learning_rate": 1.6481077613855038e-06, "loss": 0.8271, "step": 56360 }, { "epoch": 0.6869340548182272, "grad_norm": 2.009422540664673, "learning_rate": 1.6477870429762668e-06, "loss": 0.8419, "step": 56365 }, { "epoch": 0.686994991042375, "grad_norm": 1.912933111190796, "learning_rate": 1.6474663245670302e-06, "loss": 0.8541, "step": 56370 }, { "epoch": 0.6870559272665229, "grad_norm": 2.1862974166870117, "learning_rate": 1.6471456061577937e-06, "loss": 0.8015, "step": 56375 }, { "epoch": 0.6871168634906707, "grad_norm": 1.97188401222229, "learning_rate": 1.646824887748557e-06, "loss": 0.7824, "step": 56380 }, { "epoch": 0.6871777997148185, "grad_norm": 2.027939796447754, "learning_rate": 1.6465041693393201e-06, "loss": 0.8385, "step": 56385 }, { "epoch": 0.6872387359389662, "grad_norm": 2.117295026779175, "learning_rate": 1.6461834509300836e-06, "loss": 0.8347, "step": 56390 }, { "epoch": 0.687299672163114, "grad_norm": 1.9254180192947388, "learning_rate": 1.6458627325208468e-06, "loss": 0.7984, "step": 56395 }, { "epoch": 0.6873606083872619, "grad_norm": 2.092729091644287, "learning_rate": 1.6455420141116103e-06, "loss": 0.8225, "step": 56400 }, { "epoch": 0.6874215446114097, "grad_norm": 2.189340114593506, "learning_rate": 1.6452212957023733e-06, "loss": 0.8079, "step": 56405 }, { "epoch": 0.6874824808355575, "grad_norm": 1.8558542728424072, "learning_rate": 1.6449005772931367e-06, "loss": 0.8345, "step": 56410 }, { "epoch": 0.6875434170597053, "grad_norm": 2.355177164077759, "learning_rate": 1.6445798588839001e-06, "loss": 0.8531, "step": 56415 }, { "epoch": 0.6876043532838532, "grad_norm": 2.171518564224243, "learning_rate": 1.6442591404746634e-06, "loss": 0.8792, "step": 56420 }, { "epoch": 0.6876652895080009, "grad_norm": 1.9813977479934692, "learning_rate": 1.6439384220654266e-06, "loss": 0.8287, "step": 56425 }, { "epoch": 0.6877262257321487, "grad_norm": 1.7283592224121094, "learning_rate": 1.64361770365619e-06, "loss": 0.8804, "step": 56430 }, { "epoch": 0.6877871619562965, "grad_norm": 2.346271276473999, "learning_rate": 1.6432969852469533e-06, "loss": 0.7941, "step": 56435 }, { "epoch": 0.6878480981804443, "grad_norm": 2.5305769443511963, "learning_rate": 1.6429762668377167e-06, "loss": 0.851, "step": 56440 }, { "epoch": 0.6879090344045922, "grad_norm": 1.8114440441131592, "learning_rate": 1.6426555484284797e-06, "loss": 0.8313, "step": 56445 }, { "epoch": 0.68796997062874, "grad_norm": 1.9931806325912476, "learning_rate": 1.6423348300192432e-06, "loss": 0.8436, "step": 56450 }, { "epoch": 0.6880309068528878, "grad_norm": 1.8848844766616821, "learning_rate": 1.6420141116100066e-06, "loss": 0.8128, "step": 56455 }, { "epoch": 0.6880918430770355, "grad_norm": 1.9679241180419922, "learning_rate": 1.6416933932007698e-06, "loss": 0.8484, "step": 56460 }, { "epoch": 0.6881527793011833, "grad_norm": 2.6433868408203125, "learning_rate": 1.641372674791533e-06, "loss": 0.7662, "step": 56465 }, { "epoch": 0.6882137155253312, "grad_norm": 1.828488826751709, "learning_rate": 1.6410519563822965e-06, "loss": 0.7688, "step": 56470 }, { "epoch": 0.688274651749479, "grad_norm": 2.000138998031616, "learning_rate": 1.6407312379730597e-06, "loss": 0.8003, "step": 56475 }, { "epoch": 0.6883355879736268, "grad_norm": 3.783820867538452, "learning_rate": 1.6404105195638232e-06, "loss": 0.8637, "step": 56480 }, { "epoch": 0.6883965241977746, "grad_norm": 1.7189077138900757, "learning_rate": 1.6400898011545862e-06, "loss": 0.7546, "step": 56485 }, { "epoch": 0.6884574604219225, "grad_norm": 1.8883209228515625, "learning_rate": 1.6397690827453496e-06, "loss": 0.7885, "step": 56490 }, { "epoch": 0.6885183966460702, "grad_norm": 1.968752384185791, "learning_rate": 1.639448364336113e-06, "loss": 0.8448, "step": 56495 }, { "epoch": 0.688579332870218, "grad_norm": 2.388935089111328, "learning_rate": 1.6391276459268763e-06, "loss": 0.872, "step": 56500 }, { "epoch": 0.6886402690943658, "grad_norm": 2.111856460571289, "learning_rate": 1.6388069275176397e-06, "loss": 0.8255, "step": 56505 }, { "epoch": 0.6887012053185136, "grad_norm": 1.8240113258361816, "learning_rate": 1.638486209108403e-06, "loss": 0.823, "step": 56510 }, { "epoch": 0.6887621415426615, "grad_norm": 2.210247278213501, "learning_rate": 1.6381654906991662e-06, "loss": 0.828, "step": 56515 }, { "epoch": 0.6888230777668093, "grad_norm": 2.208930730819702, "learning_rate": 1.6378447722899296e-06, "loss": 0.8054, "step": 56520 }, { "epoch": 0.6888840139909571, "grad_norm": 1.821620225906372, "learning_rate": 1.637524053880693e-06, "loss": 0.8348, "step": 56525 }, { "epoch": 0.6889449502151048, "grad_norm": 2.0637199878692627, "learning_rate": 1.637203335471456e-06, "loss": 0.721, "step": 56530 }, { "epoch": 0.6890058864392526, "grad_norm": 2.0772922039031982, "learning_rate": 1.6368826170622195e-06, "loss": 0.8053, "step": 56535 }, { "epoch": 0.6890668226634005, "grad_norm": 1.7588766813278198, "learning_rate": 1.6365618986529828e-06, "loss": 0.7524, "step": 56540 }, { "epoch": 0.6891277588875483, "grad_norm": 2.032595157623291, "learning_rate": 1.6362411802437462e-06, "loss": 0.818, "step": 56545 }, { "epoch": 0.6891886951116961, "grad_norm": 2.150930166244507, "learning_rate": 1.6359204618345094e-06, "loss": 0.8275, "step": 56550 }, { "epoch": 0.6892496313358439, "grad_norm": 1.712041974067688, "learning_rate": 1.6355997434252727e-06, "loss": 0.7558, "step": 56555 }, { "epoch": 0.6893105675599918, "grad_norm": 2.0596234798431396, "learning_rate": 1.6352790250160361e-06, "loss": 0.9484, "step": 56560 }, { "epoch": 0.6893715037841395, "grad_norm": 6.50468635559082, "learning_rate": 1.6349583066067995e-06, "loss": 0.834, "step": 56565 }, { "epoch": 0.6894324400082873, "grad_norm": 1.9080764055252075, "learning_rate": 1.6346375881975626e-06, "loss": 0.7851, "step": 56570 }, { "epoch": 0.6894933762324351, "grad_norm": 1.7433319091796875, "learning_rate": 1.634316869788326e-06, "loss": 0.83, "step": 56575 }, { "epoch": 0.6895543124565829, "grad_norm": 1.8641071319580078, "learning_rate": 1.6339961513790892e-06, "loss": 0.8181, "step": 56580 }, { "epoch": 0.6896152486807308, "grad_norm": 1.655159592628479, "learning_rate": 1.6336754329698527e-06, "loss": 0.8621, "step": 56585 }, { "epoch": 0.6896761849048786, "grad_norm": 1.9885108470916748, "learning_rate": 1.633354714560616e-06, "loss": 0.8765, "step": 56590 }, { "epoch": 0.6897371211290264, "grad_norm": 1.8741350173950195, "learning_rate": 1.6330339961513791e-06, "loss": 0.834, "step": 56595 }, { "epoch": 0.6897980573531741, "grad_norm": 2.0080134868621826, "learning_rate": 1.6327132777421426e-06, "loss": 0.8562, "step": 56600 }, { "epoch": 0.6898589935773219, "grad_norm": 2.120743751525879, "learning_rate": 1.632392559332906e-06, "loss": 0.8765, "step": 56605 }, { "epoch": 0.6899199298014698, "grad_norm": 2.2809534072875977, "learning_rate": 1.632071840923669e-06, "loss": 0.808, "step": 56610 }, { "epoch": 0.6899808660256176, "grad_norm": 1.9434796571731567, "learning_rate": 1.6317511225144325e-06, "loss": 0.8043, "step": 56615 }, { "epoch": 0.6900418022497654, "grad_norm": 1.936579704284668, "learning_rate": 1.6314304041051957e-06, "loss": 0.9203, "step": 56620 }, { "epoch": 0.6901027384739132, "grad_norm": 1.9987483024597168, "learning_rate": 1.6311096856959591e-06, "loss": 0.7667, "step": 56625 }, { "epoch": 0.6901636746980611, "grad_norm": 2.065930128097534, "learning_rate": 1.6307889672867226e-06, "loss": 0.7787, "step": 56630 }, { "epoch": 0.6902246109222088, "grad_norm": 1.9862902164459229, "learning_rate": 1.6304682488774856e-06, "loss": 0.8045, "step": 56635 }, { "epoch": 0.6902855471463566, "grad_norm": 1.8397760391235352, "learning_rate": 1.630147530468249e-06, "loss": 0.7953, "step": 56640 }, { "epoch": 0.6903464833705044, "grad_norm": 1.7686197757720947, "learning_rate": 1.6298268120590125e-06, "loss": 0.7908, "step": 56645 }, { "epoch": 0.6904074195946522, "grad_norm": 1.8390798568725586, "learning_rate": 1.6295060936497755e-06, "loss": 0.8753, "step": 56650 }, { "epoch": 0.6904683558188001, "grad_norm": 1.7065798044204712, "learning_rate": 1.629185375240539e-06, "loss": 0.8188, "step": 56655 }, { "epoch": 0.6905292920429479, "grad_norm": 1.9098029136657715, "learning_rate": 1.6288646568313022e-06, "loss": 0.8165, "step": 56660 }, { "epoch": 0.6905902282670957, "grad_norm": 2.1348743438720703, "learning_rate": 1.6285439384220656e-06, "loss": 0.8152, "step": 56665 }, { "epoch": 0.6906511644912434, "grad_norm": 2.7884418964385986, "learning_rate": 1.628223220012829e-06, "loss": 0.8587, "step": 56670 }, { "epoch": 0.6907121007153912, "grad_norm": 1.9391125440597534, "learning_rate": 1.627902501603592e-06, "loss": 0.8318, "step": 56675 }, { "epoch": 0.6907730369395391, "grad_norm": 2.6631975173950195, "learning_rate": 1.6275817831943555e-06, "loss": 0.8726, "step": 56680 }, { "epoch": 0.6908339731636869, "grad_norm": 2.0184566974639893, "learning_rate": 1.627261064785119e-06, "loss": 0.805, "step": 56685 }, { "epoch": 0.6908949093878347, "grad_norm": 1.8284920454025269, "learning_rate": 1.626940346375882e-06, "loss": 0.7514, "step": 56690 }, { "epoch": 0.6909558456119825, "grad_norm": 1.8177289962768555, "learning_rate": 1.6266196279666454e-06, "loss": 0.7854, "step": 56695 }, { "epoch": 0.6910167818361304, "grad_norm": 1.9630059003829956, "learning_rate": 1.6262989095574086e-06, "loss": 0.8221, "step": 56700 }, { "epoch": 0.6910777180602781, "grad_norm": 1.6872071027755737, "learning_rate": 1.625978191148172e-06, "loss": 0.7963, "step": 56705 }, { "epoch": 0.6911386542844259, "grad_norm": 1.9265605211257935, "learning_rate": 1.6256574727389355e-06, "loss": 0.91, "step": 56710 }, { "epoch": 0.6911995905085737, "grad_norm": 1.9942569732666016, "learning_rate": 1.6253367543296985e-06, "loss": 0.8732, "step": 56715 }, { "epoch": 0.6912605267327215, "grad_norm": 2.021592617034912, "learning_rate": 1.625016035920462e-06, "loss": 0.8286, "step": 56720 }, { "epoch": 0.6913214629568694, "grad_norm": 1.871569037437439, "learning_rate": 1.6246953175112254e-06, "loss": 0.8223, "step": 56725 }, { "epoch": 0.6913823991810172, "grad_norm": 1.5718870162963867, "learning_rate": 1.6243745991019886e-06, "loss": 0.7627, "step": 56730 }, { "epoch": 0.691443335405165, "grad_norm": 2.2037906646728516, "learning_rate": 1.6240538806927519e-06, "loss": 0.8457, "step": 56735 }, { "epoch": 0.6915042716293127, "grad_norm": 1.925229787826538, "learning_rate": 1.623733162283515e-06, "loss": 0.8452, "step": 56740 }, { "epoch": 0.6915652078534605, "grad_norm": 1.8736189603805542, "learning_rate": 1.6234124438742785e-06, "loss": 0.8964, "step": 56745 }, { "epoch": 0.6916261440776084, "grad_norm": 2.050292491912842, "learning_rate": 1.623091725465042e-06, "loss": 0.7883, "step": 56750 }, { "epoch": 0.6916870803017562, "grad_norm": 1.6961208581924438, "learning_rate": 1.622771007055805e-06, "loss": 0.8063, "step": 56755 }, { "epoch": 0.691748016525904, "grad_norm": 1.7352159023284912, "learning_rate": 1.6224502886465684e-06, "loss": 0.8392, "step": 56760 }, { "epoch": 0.6918089527500518, "grad_norm": 2.0411124229431152, "learning_rate": 1.6221295702373319e-06, "loss": 0.74, "step": 56765 }, { "epoch": 0.6918698889741997, "grad_norm": 1.8104578256607056, "learning_rate": 1.621808851828095e-06, "loss": 0.7824, "step": 56770 }, { "epoch": 0.6919308251983474, "grad_norm": 2.131887197494507, "learning_rate": 1.6214881334188583e-06, "loss": 0.8275, "step": 56775 }, { "epoch": 0.6919917614224952, "grad_norm": 2.460235357284546, "learning_rate": 1.6211674150096216e-06, "loss": 0.7814, "step": 56780 }, { "epoch": 0.692052697646643, "grad_norm": 1.8835126161575317, "learning_rate": 1.620846696600385e-06, "loss": 0.8486, "step": 56785 }, { "epoch": 0.6921136338707908, "grad_norm": 1.869319200515747, "learning_rate": 1.6205259781911484e-06, "loss": 0.8367, "step": 56790 }, { "epoch": 0.6921745700949387, "grad_norm": 2.6448254585266113, "learning_rate": 1.6202052597819114e-06, "loss": 0.805, "step": 56795 }, { "epoch": 0.6922355063190865, "grad_norm": 1.882493495941162, "learning_rate": 1.6198845413726749e-06, "loss": 0.9476, "step": 56800 }, { "epoch": 0.6922964425432343, "grad_norm": 1.827882170677185, "learning_rate": 1.6195638229634383e-06, "loss": 0.8638, "step": 56805 }, { "epoch": 0.692357378767382, "grad_norm": 1.9041506052017212, "learning_rate": 1.6192431045542016e-06, "loss": 0.7879, "step": 56810 }, { "epoch": 0.6924183149915298, "grad_norm": 1.972607135772705, "learning_rate": 1.6189223861449648e-06, "loss": 0.8266, "step": 56815 }, { "epoch": 0.6924792512156777, "grad_norm": 1.988728642463684, "learning_rate": 1.6186016677357282e-06, "loss": 0.8048, "step": 56820 }, { "epoch": 0.6925401874398255, "grad_norm": 1.7461700439453125, "learning_rate": 1.6182809493264915e-06, "loss": 0.7929, "step": 56825 }, { "epoch": 0.6926011236639733, "grad_norm": 1.7930901050567627, "learning_rate": 1.617960230917255e-06, "loss": 0.7771, "step": 56830 }, { "epoch": 0.6926620598881211, "grad_norm": 1.9947255849838257, "learning_rate": 1.617639512508018e-06, "loss": 0.8094, "step": 56835 }, { "epoch": 0.692722996112269, "grad_norm": 2.248764991760254, "learning_rate": 1.6173187940987814e-06, "loss": 0.792, "step": 56840 }, { "epoch": 0.6927839323364167, "grad_norm": 2.4071550369262695, "learning_rate": 1.6169980756895448e-06, "loss": 0.8535, "step": 56845 }, { "epoch": 0.6928448685605645, "grad_norm": 2.080752372741699, "learning_rate": 1.616677357280308e-06, "loss": 0.8685, "step": 56850 }, { "epoch": 0.6929058047847123, "grad_norm": 2.080401659011841, "learning_rate": 1.6163566388710715e-06, "loss": 0.7805, "step": 56855 }, { "epoch": 0.6929667410088601, "grad_norm": 1.8317991495132446, "learning_rate": 1.6160359204618347e-06, "loss": 0.8345, "step": 56860 }, { "epoch": 0.693027677233008, "grad_norm": 1.981736660003662, "learning_rate": 1.615715202052598e-06, "loss": 0.7921, "step": 56865 }, { "epoch": 0.6930886134571558, "grad_norm": 1.5896878242492676, "learning_rate": 1.6153944836433614e-06, "loss": 0.8093, "step": 56870 }, { "epoch": 0.6931495496813036, "grad_norm": 2.1664013862609863, "learning_rate": 1.6150737652341244e-06, "loss": 0.8145, "step": 56875 }, { "epoch": 0.6932104859054513, "grad_norm": 1.9479609727859497, "learning_rate": 1.6147530468248878e-06, "loss": 0.7901, "step": 56880 }, { "epoch": 0.6932714221295991, "grad_norm": 1.9184901714324951, "learning_rate": 1.6144323284156513e-06, "loss": 0.8826, "step": 56885 }, { "epoch": 0.693332358353747, "grad_norm": 1.9342093467712402, "learning_rate": 1.6141116100064145e-06, "loss": 0.8147, "step": 56890 }, { "epoch": 0.6933932945778948, "grad_norm": 1.9705092906951904, "learning_rate": 1.613790891597178e-06, "loss": 0.8214, "step": 56895 }, { "epoch": 0.6934542308020426, "grad_norm": 1.6886875629425049, "learning_rate": 1.6134701731879412e-06, "loss": 0.8253, "step": 56900 }, { "epoch": 0.6935151670261904, "grad_norm": 2.070749282836914, "learning_rate": 1.6131494547787044e-06, "loss": 0.8402, "step": 56905 }, { "epoch": 0.6935761032503382, "grad_norm": 2.1407902240753174, "learning_rate": 1.6128287363694678e-06, "loss": 0.8479, "step": 56910 }, { "epoch": 0.693637039474486, "grad_norm": 1.6861586570739746, "learning_rate": 1.6125080179602308e-06, "loss": 0.8355, "step": 56915 }, { "epoch": 0.6936979756986338, "grad_norm": 2.1146271228790283, "learning_rate": 1.6121872995509943e-06, "loss": 0.8098, "step": 56920 }, { "epoch": 0.6937589119227816, "grad_norm": 1.7739633321762085, "learning_rate": 1.6118665811417577e-06, "loss": 0.7941, "step": 56925 }, { "epoch": 0.6938198481469294, "grad_norm": 2.0337839126586914, "learning_rate": 1.611545862732521e-06, "loss": 0.8239, "step": 56930 }, { "epoch": 0.6938807843710773, "grad_norm": 1.9171873331069946, "learning_rate": 1.6112251443232844e-06, "loss": 0.8161, "step": 56935 }, { "epoch": 0.6939417205952251, "grad_norm": 2.0135860443115234, "learning_rate": 1.6109044259140476e-06, "loss": 0.8639, "step": 56940 }, { "epoch": 0.6940026568193729, "grad_norm": 2.4541406631469727, "learning_rate": 1.6105837075048108e-06, "loss": 0.766, "step": 56945 }, { "epoch": 0.6940635930435206, "grad_norm": 1.8985923528671265, "learning_rate": 1.6102629890955743e-06, "loss": 0.7939, "step": 56950 }, { "epoch": 0.6941245292676684, "grad_norm": 2.06172251701355, "learning_rate": 1.6099422706863373e-06, "loss": 0.8275, "step": 56955 }, { "epoch": 0.6941854654918163, "grad_norm": 1.9049140214920044, "learning_rate": 1.6096215522771007e-06, "loss": 0.8186, "step": 56960 }, { "epoch": 0.6942464017159641, "grad_norm": 1.7967257499694824, "learning_rate": 1.6093008338678642e-06, "loss": 0.7866, "step": 56965 }, { "epoch": 0.6943073379401119, "grad_norm": 2.0031468868255615, "learning_rate": 1.6089801154586274e-06, "loss": 0.7966, "step": 56970 }, { "epoch": 0.6943682741642597, "grad_norm": 1.719314694404602, "learning_rate": 1.6086593970493909e-06, "loss": 0.7758, "step": 56975 }, { "epoch": 0.6944292103884075, "grad_norm": 1.8618056774139404, "learning_rate": 1.6083386786401543e-06, "loss": 0.9034, "step": 56980 }, { "epoch": 0.6944901466125553, "grad_norm": 2.7535369396209717, "learning_rate": 1.6080179602309173e-06, "loss": 0.9267, "step": 56985 }, { "epoch": 0.6945510828367031, "grad_norm": 1.771642804145813, "learning_rate": 1.6076972418216807e-06, "loss": 0.7583, "step": 56990 }, { "epoch": 0.6946120190608509, "grad_norm": 1.9737329483032227, "learning_rate": 1.607376523412444e-06, "loss": 0.8634, "step": 56995 }, { "epoch": 0.6946729552849987, "grad_norm": 1.758199691772461, "learning_rate": 1.6070558050032072e-06, "loss": 0.7586, "step": 57000 }, { "epoch": 0.6947338915091466, "grad_norm": 1.7948298454284668, "learning_rate": 1.6067350865939706e-06, "loss": 0.9237, "step": 57005 }, { "epoch": 0.6947948277332944, "grad_norm": 2.0059797763824463, "learning_rate": 1.6064143681847339e-06, "loss": 0.8107, "step": 57010 }, { "epoch": 0.6948557639574422, "grad_norm": 1.9343483448028564, "learning_rate": 1.6060936497754973e-06, "loss": 0.806, "step": 57015 }, { "epoch": 0.6949167001815899, "grad_norm": 1.9019511938095093, "learning_rate": 1.6057729313662608e-06, "loss": 0.7769, "step": 57020 }, { "epoch": 0.6949776364057377, "grad_norm": 2.283921003341675, "learning_rate": 1.6054522129570238e-06, "loss": 0.843, "step": 57025 }, { "epoch": 0.6950385726298856, "grad_norm": 1.9970667362213135, "learning_rate": 1.6051314945477872e-06, "loss": 0.7658, "step": 57030 }, { "epoch": 0.6950995088540334, "grad_norm": 2.164294958114624, "learning_rate": 1.6048107761385504e-06, "loss": 0.8189, "step": 57035 }, { "epoch": 0.6951604450781812, "grad_norm": 1.8595515489578247, "learning_rate": 1.6044900577293137e-06, "loss": 0.7976, "step": 57040 }, { "epoch": 0.695221381302329, "grad_norm": 1.8620407581329346, "learning_rate": 1.6041693393200771e-06, "loss": 0.7989, "step": 57045 }, { "epoch": 0.6952823175264767, "grad_norm": 1.8218042850494385, "learning_rate": 1.6038486209108403e-06, "loss": 0.7803, "step": 57050 }, { "epoch": 0.6953432537506246, "grad_norm": 2.0511107444763184, "learning_rate": 1.6035279025016038e-06, "loss": 0.8228, "step": 57055 }, { "epoch": 0.6954041899747724, "grad_norm": 2.0828371047973633, "learning_rate": 1.6032071840923672e-06, "loss": 0.8605, "step": 57060 }, { "epoch": 0.6954651261989202, "grad_norm": 1.9049798250198364, "learning_rate": 1.6028864656831302e-06, "loss": 0.8119, "step": 57065 }, { "epoch": 0.695526062423068, "grad_norm": 1.658818006515503, "learning_rate": 1.6025657472738937e-06, "loss": 0.8131, "step": 57070 }, { "epoch": 0.6955869986472158, "grad_norm": 2.1129822731018066, "learning_rate": 1.602245028864657e-06, "loss": 0.759, "step": 57075 }, { "epoch": 0.6956479348713637, "grad_norm": 2.541712522506714, "learning_rate": 1.6019243104554203e-06, "loss": 0.829, "step": 57080 }, { "epoch": 0.6957088710955114, "grad_norm": 1.88673734664917, "learning_rate": 1.6016035920461836e-06, "loss": 0.8476, "step": 57085 }, { "epoch": 0.6957698073196592, "grad_norm": 1.671320915222168, "learning_rate": 1.6012828736369468e-06, "loss": 0.7722, "step": 57090 }, { "epoch": 0.695830743543807, "grad_norm": 2.1700057983398438, "learning_rate": 1.6009621552277102e-06, "loss": 0.9053, "step": 57095 }, { "epoch": 0.6958916797679549, "grad_norm": 1.7641620635986328, "learning_rate": 1.6006414368184737e-06, "loss": 0.8069, "step": 57100 }, { "epoch": 0.6959526159921027, "grad_norm": 1.805371880531311, "learning_rate": 1.6003207184092367e-06, "loss": 0.8324, "step": 57105 }, { "epoch": 0.6960135522162505, "grad_norm": 1.7508609294891357, "learning_rate": 1.6000000000000001e-06, "loss": 0.8163, "step": 57110 }, { "epoch": 0.6960744884403983, "grad_norm": 1.9619066715240479, "learning_rate": 1.5996792815907636e-06, "loss": 0.8272, "step": 57115 }, { "epoch": 0.696135424664546, "grad_norm": 2.05208420753479, "learning_rate": 1.5993585631815268e-06, "loss": 0.8156, "step": 57120 }, { "epoch": 0.6961963608886939, "grad_norm": 1.9046076536178589, "learning_rate": 1.59903784477229e-06, "loss": 0.866, "step": 57125 }, { "epoch": 0.6962572971128417, "grad_norm": 1.9404762983322144, "learning_rate": 1.5987171263630533e-06, "loss": 0.7609, "step": 57130 }, { "epoch": 0.6963182333369895, "grad_norm": 2.0962252616882324, "learning_rate": 1.5983964079538167e-06, "loss": 0.8198, "step": 57135 }, { "epoch": 0.6963791695611373, "grad_norm": 1.8485013246536255, "learning_rate": 1.5980756895445801e-06, "loss": 0.8795, "step": 57140 }, { "epoch": 0.6964401057852851, "grad_norm": 1.8068900108337402, "learning_rate": 1.5977549711353432e-06, "loss": 0.8186, "step": 57145 }, { "epoch": 0.696501042009433, "grad_norm": 1.7657263278961182, "learning_rate": 1.5974342527261066e-06, "loss": 0.8139, "step": 57150 }, { "epoch": 0.6965619782335807, "grad_norm": 1.8995060920715332, "learning_rate": 1.59711353431687e-06, "loss": 0.8789, "step": 57155 }, { "epoch": 0.6966229144577285, "grad_norm": 1.646668553352356, "learning_rate": 1.5967928159076333e-06, "loss": 0.7849, "step": 57160 }, { "epoch": 0.6966838506818763, "grad_norm": 2.3192083835601807, "learning_rate": 1.5964720974983965e-06, "loss": 0.8819, "step": 57165 }, { "epoch": 0.6967447869060241, "grad_norm": 1.8911787271499634, "learning_rate": 1.5961513790891597e-06, "loss": 0.8586, "step": 57170 }, { "epoch": 0.696805723130172, "grad_norm": 1.7815269231796265, "learning_rate": 1.5958306606799232e-06, "loss": 0.7389, "step": 57175 }, { "epoch": 0.6968666593543198, "grad_norm": 1.805446743965149, "learning_rate": 1.5955099422706866e-06, "loss": 0.8189, "step": 57180 }, { "epoch": 0.6969275955784676, "grad_norm": 2.0661261081695557, "learning_rate": 1.5951892238614496e-06, "loss": 0.8394, "step": 57185 }, { "epoch": 0.6969885318026153, "grad_norm": 1.8006088733673096, "learning_rate": 1.594868505452213e-06, "loss": 0.8799, "step": 57190 }, { "epoch": 0.6970494680267632, "grad_norm": 1.7875633239746094, "learning_rate": 1.5945477870429765e-06, "loss": 0.7941, "step": 57195 }, { "epoch": 0.697110404250911, "grad_norm": 2.095613718032837, "learning_rate": 1.5942270686337397e-06, "loss": 0.8261, "step": 57200 }, { "epoch": 0.6971713404750588, "grad_norm": 1.8097383975982666, "learning_rate": 1.5939063502245032e-06, "loss": 0.8267, "step": 57205 }, { "epoch": 0.6972322766992066, "grad_norm": 1.6992131471633911, "learning_rate": 1.5935856318152662e-06, "loss": 0.7856, "step": 57210 }, { "epoch": 0.6972932129233544, "grad_norm": 1.8450337648391724, "learning_rate": 1.5932649134060296e-06, "loss": 0.8231, "step": 57215 }, { "epoch": 0.6973541491475023, "grad_norm": 2.0296764373779297, "learning_rate": 1.592944194996793e-06, "loss": 0.8367, "step": 57220 }, { "epoch": 0.69741508537165, "grad_norm": 1.731279969215393, "learning_rate": 1.592623476587556e-06, "loss": 0.8755, "step": 57225 }, { "epoch": 0.6974760215957978, "grad_norm": 2.54953670501709, "learning_rate": 1.5923027581783195e-06, "loss": 0.8647, "step": 57230 }, { "epoch": 0.6975369578199456, "grad_norm": 2.3864965438842773, "learning_rate": 1.591982039769083e-06, "loss": 0.8018, "step": 57235 }, { "epoch": 0.6975978940440934, "grad_norm": 1.7730363607406616, "learning_rate": 1.5916613213598462e-06, "loss": 0.815, "step": 57240 }, { "epoch": 0.6976588302682413, "grad_norm": 2.0971837043762207, "learning_rate": 1.5913406029506096e-06, "loss": 0.8413, "step": 57245 }, { "epoch": 0.6977197664923891, "grad_norm": 2.0059397220611572, "learning_rate": 1.5910198845413727e-06, "loss": 0.851, "step": 57250 }, { "epoch": 0.6977807027165369, "grad_norm": 1.8047353029251099, "learning_rate": 1.590699166132136e-06, "loss": 0.9144, "step": 57255 }, { "epoch": 0.6978416389406846, "grad_norm": 2.3348305225372314, "learning_rate": 1.5903784477228995e-06, "loss": 0.7713, "step": 57260 }, { "epoch": 0.6979025751648325, "grad_norm": 2.2057406902313232, "learning_rate": 1.5900577293136626e-06, "loss": 0.8615, "step": 57265 }, { "epoch": 0.6979635113889803, "grad_norm": 1.775564432144165, "learning_rate": 1.589737010904426e-06, "loss": 0.8358, "step": 57270 }, { "epoch": 0.6980244476131281, "grad_norm": 1.9791059494018555, "learning_rate": 1.5894162924951894e-06, "loss": 0.8257, "step": 57275 }, { "epoch": 0.6980853838372759, "grad_norm": 2.37727689743042, "learning_rate": 1.5890955740859527e-06, "loss": 0.8922, "step": 57280 }, { "epoch": 0.6981463200614237, "grad_norm": 2.1078717708587646, "learning_rate": 1.588774855676716e-06, "loss": 0.8106, "step": 57285 }, { "epoch": 0.6982072562855716, "grad_norm": 1.7775026559829712, "learning_rate": 1.5884541372674791e-06, "loss": 0.8527, "step": 57290 }, { "epoch": 0.6982681925097193, "grad_norm": 2.6868233680725098, "learning_rate": 1.5881334188582426e-06, "loss": 0.8098, "step": 57295 }, { "epoch": 0.6983291287338671, "grad_norm": 1.8353817462921143, "learning_rate": 1.587812700449006e-06, "loss": 0.7829, "step": 57300 }, { "epoch": 0.6983900649580149, "grad_norm": 1.913343071937561, "learning_rate": 1.587491982039769e-06, "loss": 0.7989, "step": 57305 }, { "epoch": 0.6984510011821627, "grad_norm": 1.9999642372131348, "learning_rate": 1.5871712636305325e-06, "loss": 0.8205, "step": 57310 }, { "epoch": 0.6985119374063106, "grad_norm": 1.8138872385025024, "learning_rate": 1.586850545221296e-06, "loss": 0.8097, "step": 57315 }, { "epoch": 0.6985728736304584, "grad_norm": 1.9301481246948242, "learning_rate": 1.5865298268120591e-06, "loss": 0.8634, "step": 57320 }, { "epoch": 0.6986338098546062, "grad_norm": 1.824964165687561, "learning_rate": 1.5862091084028226e-06, "loss": 0.7893, "step": 57325 }, { "epoch": 0.6986947460787539, "grad_norm": 1.8582841157913208, "learning_rate": 1.5858883899935856e-06, "loss": 0.8044, "step": 57330 }, { "epoch": 0.6987556823029017, "grad_norm": 2.1685471534729004, "learning_rate": 1.585567671584349e-06, "loss": 0.8612, "step": 57335 }, { "epoch": 0.6988166185270496, "grad_norm": 1.6444790363311768, "learning_rate": 1.5852469531751125e-06, "loss": 0.8242, "step": 57340 }, { "epoch": 0.6988775547511974, "grad_norm": 1.5273398160934448, "learning_rate": 1.5849262347658757e-06, "loss": 0.7239, "step": 57345 }, { "epoch": 0.6989384909753452, "grad_norm": 1.9474743604660034, "learning_rate": 1.584605516356639e-06, "loss": 0.7725, "step": 57350 }, { "epoch": 0.698999427199493, "grad_norm": 1.8310012817382812, "learning_rate": 1.5842847979474024e-06, "loss": 0.89, "step": 57355 }, { "epoch": 0.6990603634236409, "grad_norm": 1.9670759439468384, "learning_rate": 1.5839640795381656e-06, "loss": 0.8182, "step": 57360 }, { "epoch": 0.6991212996477886, "grad_norm": 1.831335425376892, "learning_rate": 1.583643361128929e-06, "loss": 0.8535, "step": 57365 }, { "epoch": 0.6991822358719364, "grad_norm": 1.6498695611953735, "learning_rate": 1.583322642719692e-06, "loss": 0.7772, "step": 57370 }, { "epoch": 0.6992431720960842, "grad_norm": 2.2947335243225098, "learning_rate": 1.5830019243104555e-06, "loss": 0.8677, "step": 57375 }, { "epoch": 0.699304108320232, "grad_norm": 2.0986576080322266, "learning_rate": 1.582681205901219e-06, "loss": 0.8001, "step": 57380 }, { "epoch": 0.6993650445443799, "grad_norm": 1.9837309122085571, "learning_rate": 1.5823604874919822e-06, "loss": 0.8157, "step": 57385 }, { "epoch": 0.6994259807685277, "grad_norm": 1.987069845199585, "learning_rate": 1.5820397690827454e-06, "loss": 0.8653, "step": 57390 }, { "epoch": 0.6994869169926755, "grad_norm": 1.989027738571167, "learning_rate": 1.5817190506735088e-06, "loss": 0.7833, "step": 57395 }, { "epoch": 0.6995478532168232, "grad_norm": 1.7533351182937622, "learning_rate": 1.581398332264272e-06, "loss": 0.8181, "step": 57400 }, { "epoch": 0.699608789440971, "grad_norm": 2.2304556369781494, "learning_rate": 1.5810776138550355e-06, "loss": 0.7841, "step": 57405 }, { "epoch": 0.6996697256651189, "grad_norm": 2.0182785987854004, "learning_rate": 1.580756895445799e-06, "loss": 0.8693, "step": 57410 }, { "epoch": 0.6997306618892667, "grad_norm": 1.7674862146377563, "learning_rate": 1.580436177036562e-06, "loss": 0.8737, "step": 57415 }, { "epoch": 0.6997915981134145, "grad_norm": 1.6954718828201294, "learning_rate": 1.5801154586273254e-06, "loss": 0.8176, "step": 57420 }, { "epoch": 0.6998525343375623, "grad_norm": 1.8441251516342163, "learning_rate": 1.5797947402180886e-06, "loss": 0.794, "step": 57425 }, { "epoch": 0.6999134705617102, "grad_norm": 1.864703893661499, "learning_rate": 1.579474021808852e-06, "loss": 0.824, "step": 57430 }, { "epoch": 0.6999744067858579, "grad_norm": 1.7799506187438965, "learning_rate": 1.5791533033996153e-06, "loss": 0.776, "step": 57435 }, { "epoch": 0.7000353430100057, "grad_norm": 1.7851225137710571, "learning_rate": 1.5788325849903785e-06, "loss": 0.8291, "step": 57440 }, { "epoch": 0.7000962792341535, "grad_norm": 2.2242116928100586, "learning_rate": 1.578511866581142e-06, "loss": 0.8069, "step": 57445 }, { "epoch": 0.7001572154583013, "grad_norm": 2.0429482460021973, "learning_rate": 1.5781911481719054e-06, "loss": 0.8577, "step": 57450 }, { "epoch": 0.7002181516824492, "grad_norm": 2.4630966186523438, "learning_rate": 1.5778704297626684e-06, "loss": 0.8418, "step": 57455 }, { "epoch": 0.700279087906597, "grad_norm": 2.0420756340026855, "learning_rate": 1.5775497113534319e-06, "loss": 0.7684, "step": 57460 }, { "epoch": 0.7003400241307448, "grad_norm": 2.0231218338012695, "learning_rate": 1.577228992944195e-06, "loss": 0.8062, "step": 57465 }, { "epoch": 0.7004009603548925, "grad_norm": 1.6669180393218994, "learning_rate": 1.5769082745349585e-06, "loss": 0.8306, "step": 57470 }, { "epoch": 0.7004618965790403, "grad_norm": 2.2105910778045654, "learning_rate": 1.5765875561257218e-06, "loss": 0.859, "step": 57475 }, { "epoch": 0.7005228328031882, "grad_norm": 2.036858081817627, "learning_rate": 1.576266837716485e-06, "loss": 0.8013, "step": 57480 }, { "epoch": 0.700583769027336, "grad_norm": 1.7976176738739014, "learning_rate": 1.5759461193072484e-06, "loss": 0.8511, "step": 57485 }, { "epoch": 0.7006447052514838, "grad_norm": 1.8040097951889038, "learning_rate": 1.5756254008980119e-06, "loss": 0.8575, "step": 57490 }, { "epoch": 0.7007056414756316, "grad_norm": 1.7123278379440308, "learning_rate": 1.5753046824887749e-06, "loss": 0.8077, "step": 57495 }, { "epoch": 0.7007665776997795, "grad_norm": 1.8934645652770996, "learning_rate": 1.5749839640795383e-06, "loss": 0.8493, "step": 57500 }, { "epoch": 0.7008275139239272, "grad_norm": 2.1955764293670654, "learning_rate": 1.5746632456703015e-06, "loss": 0.7958, "step": 57505 }, { "epoch": 0.700888450148075, "grad_norm": 2.3988113403320312, "learning_rate": 1.574342527261065e-06, "loss": 0.7871, "step": 57510 }, { "epoch": 0.7009493863722228, "grad_norm": 2.3099074363708496, "learning_rate": 1.5740218088518282e-06, "loss": 0.7782, "step": 57515 }, { "epoch": 0.7010103225963706, "grad_norm": 1.6118321418762207, "learning_rate": 1.5737010904425914e-06, "loss": 0.7231, "step": 57520 }, { "epoch": 0.7010712588205185, "grad_norm": 2.3872733116149902, "learning_rate": 1.5733803720333549e-06, "loss": 0.833, "step": 57525 }, { "epoch": 0.7011321950446663, "grad_norm": 1.7785097360610962, "learning_rate": 1.5730596536241183e-06, "loss": 0.777, "step": 57530 }, { "epoch": 0.7011931312688141, "grad_norm": 1.9767820835113525, "learning_rate": 1.5727389352148813e-06, "loss": 0.8076, "step": 57535 }, { "epoch": 0.7012540674929618, "grad_norm": 2.349205732345581, "learning_rate": 1.5724182168056448e-06, "loss": 0.7959, "step": 57540 }, { "epoch": 0.7013150037171096, "grad_norm": 1.87801194190979, "learning_rate": 1.572097498396408e-06, "loss": 0.7428, "step": 57545 }, { "epoch": 0.7013759399412575, "grad_norm": 2.054974317550659, "learning_rate": 1.5717767799871715e-06, "loss": 0.7649, "step": 57550 }, { "epoch": 0.7014368761654053, "grad_norm": 2.2679896354675293, "learning_rate": 1.5714560615779349e-06, "loss": 0.7868, "step": 57555 }, { "epoch": 0.7014978123895531, "grad_norm": 2.1077849864959717, "learning_rate": 1.571135343168698e-06, "loss": 0.8307, "step": 57560 }, { "epoch": 0.7015587486137009, "grad_norm": 2.3080637454986572, "learning_rate": 1.5708146247594613e-06, "loss": 0.8505, "step": 57565 }, { "epoch": 0.7016196848378488, "grad_norm": 1.6192442178726196, "learning_rate": 1.5704939063502248e-06, "loss": 0.8527, "step": 57570 }, { "epoch": 0.7016806210619965, "grad_norm": 3.0934829711914062, "learning_rate": 1.5701731879409878e-06, "loss": 0.7925, "step": 57575 }, { "epoch": 0.7017415572861443, "grad_norm": 2.407789707183838, "learning_rate": 1.5698524695317512e-06, "loss": 0.8704, "step": 57580 }, { "epoch": 0.7018024935102921, "grad_norm": 1.7979133129119873, "learning_rate": 1.5695317511225145e-06, "loss": 0.7646, "step": 57585 }, { "epoch": 0.7018634297344399, "grad_norm": 1.960320234298706, "learning_rate": 1.569211032713278e-06, "loss": 0.827, "step": 57590 }, { "epoch": 0.7019243659585878, "grad_norm": 1.9173818826675415, "learning_rate": 1.5688903143040414e-06, "loss": 0.7037, "step": 57595 }, { "epoch": 0.7019853021827356, "grad_norm": 1.8820486068725586, "learning_rate": 1.5685695958948044e-06, "loss": 0.8777, "step": 57600 }, { "epoch": 0.7020462384068834, "grad_norm": 1.8295077085494995, "learning_rate": 1.5682488774855678e-06, "loss": 0.8152, "step": 57605 }, { "epoch": 0.7021071746310311, "grad_norm": 1.8924087285995483, "learning_rate": 1.5679281590763313e-06, "loss": 0.8594, "step": 57610 }, { "epoch": 0.7021681108551789, "grad_norm": 1.9588804244995117, "learning_rate": 1.5676074406670943e-06, "loss": 0.8715, "step": 57615 }, { "epoch": 0.7022290470793268, "grad_norm": 1.414381980895996, "learning_rate": 1.5672867222578577e-06, "loss": 0.7672, "step": 57620 }, { "epoch": 0.7022899833034746, "grad_norm": 2.6011414527893066, "learning_rate": 1.566966003848621e-06, "loss": 0.8512, "step": 57625 }, { "epoch": 0.7023509195276224, "grad_norm": 1.6929799318313599, "learning_rate": 1.5666452854393844e-06, "loss": 0.808, "step": 57630 }, { "epoch": 0.7024118557517702, "grad_norm": 1.9481585025787354, "learning_rate": 1.5663245670301478e-06, "loss": 0.7762, "step": 57635 }, { "epoch": 0.702472791975918, "grad_norm": 1.9861923456192017, "learning_rate": 1.5660038486209108e-06, "loss": 0.7462, "step": 57640 }, { "epoch": 0.7025337282000658, "grad_norm": 1.982728123664856, "learning_rate": 1.5656831302116743e-06, "loss": 0.7563, "step": 57645 }, { "epoch": 0.7025946644242136, "grad_norm": 2.002032995223999, "learning_rate": 1.5653624118024377e-06, "loss": 0.8093, "step": 57650 }, { "epoch": 0.7026556006483614, "grad_norm": 1.7950363159179688, "learning_rate": 1.5650416933932007e-06, "loss": 0.8616, "step": 57655 }, { "epoch": 0.7027165368725092, "grad_norm": 1.7686941623687744, "learning_rate": 1.5647209749839642e-06, "loss": 0.8327, "step": 57660 }, { "epoch": 0.7027774730966571, "grad_norm": 1.8587133884429932, "learning_rate": 1.5644002565747274e-06, "loss": 0.838, "step": 57665 }, { "epoch": 0.7028384093208049, "grad_norm": 2.4730706214904785, "learning_rate": 1.5640795381654908e-06, "loss": 0.8296, "step": 57670 }, { "epoch": 0.7028993455449527, "grad_norm": 1.9739755392074585, "learning_rate": 1.5637588197562543e-06, "loss": 0.9132, "step": 57675 }, { "epoch": 0.7029602817691004, "grad_norm": 1.9200326204299927, "learning_rate": 1.5634381013470173e-06, "loss": 0.7922, "step": 57680 }, { "epoch": 0.7030212179932482, "grad_norm": 1.8535776138305664, "learning_rate": 1.5631173829377807e-06, "loss": 0.7779, "step": 57685 }, { "epoch": 0.7030821542173961, "grad_norm": 1.744005799293518, "learning_rate": 1.5627966645285442e-06, "loss": 0.8418, "step": 57690 }, { "epoch": 0.7031430904415439, "grad_norm": 2.2532424926757812, "learning_rate": 1.5624759461193074e-06, "loss": 0.8705, "step": 57695 }, { "epoch": 0.7032040266656917, "grad_norm": 2.088327407836914, "learning_rate": 1.5621552277100706e-06, "loss": 0.8116, "step": 57700 }, { "epoch": 0.7032649628898395, "grad_norm": 2.0932531356811523, "learning_rate": 1.561834509300834e-06, "loss": 0.8327, "step": 57705 }, { "epoch": 0.7033258991139874, "grad_norm": 1.9559056758880615, "learning_rate": 1.5615137908915973e-06, "loss": 0.8128, "step": 57710 }, { "epoch": 0.7033868353381351, "grad_norm": 1.9152337312698364, "learning_rate": 1.5611930724823607e-06, "loss": 0.8032, "step": 57715 }, { "epoch": 0.7034477715622829, "grad_norm": 1.6168745756149292, "learning_rate": 1.5608723540731238e-06, "loss": 0.8187, "step": 57720 }, { "epoch": 0.7035087077864307, "grad_norm": 1.8391386270523071, "learning_rate": 1.5605516356638872e-06, "loss": 0.8792, "step": 57725 }, { "epoch": 0.7035696440105785, "grad_norm": 1.9944828748703003, "learning_rate": 1.5602309172546506e-06, "loss": 0.789, "step": 57730 }, { "epoch": 0.7036305802347264, "grad_norm": 1.9136146306991577, "learning_rate": 1.5599101988454139e-06, "loss": 0.8177, "step": 57735 }, { "epoch": 0.7036915164588742, "grad_norm": 1.8659989833831787, "learning_rate": 1.559589480436177e-06, "loss": 0.7792, "step": 57740 }, { "epoch": 0.703752452683022, "grad_norm": 2.1516716480255127, "learning_rate": 1.5592687620269405e-06, "loss": 0.8726, "step": 57745 }, { "epoch": 0.7038133889071697, "grad_norm": 1.7993537187576294, "learning_rate": 1.5589480436177038e-06, "loss": 0.8242, "step": 57750 }, { "epoch": 0.7038743251313175, "grad_norm": 2.1481173038482666, "learning_rate": 1.5586273252084672e-06, "loss": 0.8272, "step": 57755 }, { "epoch": 0.7039352613554654, "grad_norm": 2.6236109733581543, "learning_rate": 1.5583066067992302e-06, "loss": 0.8186, "step": 57760 }, { "epoch": 0.7039961975796132, "grad_norm": 2.091019868850708, "learning_rate": 1.5579858883899937e-06, "loss": 0.7334, "step": 57765 }, { "epoch": 0.704057133803761, "grad_norm": 1.767900824546814, "learning_rate": 1.557665169980757e-06, "loss": 0.7672, "step": 57770 }, { "epoch": 0.7041180700279088, "grad_norm": 1.9993146657943726, "learning_rate": 1.5573444515715203e-06, "loss": 0.8483, "step": 57775 }, { "epoch": 0.7041790062520566, "grad_norm": 2.219616413116455, "learning_rate": 1.5570237331622836e-06, "loss": 0.7778, "step": 57780 }, { "epoch": 0.7042399424762044, "grad_norm": 2.5529885292053223, "learning_rate": 1.556703014753047e-06, "loss": 0.8502, "step": 57785 }, { "epoch": 0.7043008787003522, "grad_norm": 2.127140998840332, "learning_rate": 1.5563822963438102e-06, "loss": 0.8218, "step": 57790 }, { "epoch": 0.7043618149245, "grad_norm": 1.9781289100646973, "learning_rate": 1.5560615779345737e-06, "loss": 0.8237, "step": 57795 }, { "epoch": 0.7044227511486478, "grad_norm": 1.6988612413406372, "learning_rate": 1.5557408595253367e-06, "loss": 0.7675, "step": 57800 }, { "epoch": 0.7044836873727957, "grad_norm": 1.8828130960464478, "learning_rate": 1.5554201411161001e-06, "loss": 0.8316, "step": 57805 }, { "epoch": 0.7045446235969435, "grad_norm": 2.050609827041626, "learning_rate": 1.5550994227068636e-06, "loss": 0.825, "step": 57810 }, { "epoch": 0.7046055598210913, "grad_norm": 1.850614309310913, "learning_rate": 1.5547787042976268e-06, "loss": 0.8718, "step": 57815 }, { "epoch": 0.704666496045239, "grad_norm": 2.257258176803589, "learning_rate": 1.5544579858883902e-06, "loss": 0.722, "step": 57820 }, { "epoch": 0.7047274322693868, "grad_norm": 2.5334479808807373, "learning_rate": 1.5541372674791535e-06, "loss": 0.7759, "step": 57825 }, { "epoch": 0.7047883684935347, "grad_norm": 2.4174861907958984, "learning_rate": 1.5538165490699167e-06, "loss": 0.7817, "step": 57830 }, { "epoch": 0.7048493047176825, "grad_norm": 2.1573684215545654, "learning_rate": 1.5534958306606801e-06, "loss": 0.8406, "step": 57835 }, { "epoch": 0.7049102409418303, "grad_norm": 1.976501703262329, "learning_rate": 1.5531751122514432e-06, "loss": 0.8203, "step": 57840 }, { "epoch": 0.7049711771659781, "grad_norm": 1.8319947719573975, "learning_rate": 1.5528543938422066e-06, "loss": 0.7644, "step": 57845 }, { "epoch": 0.705032113390126, "grad_norm": 2.2890679836273193, "learning_rate": 1.55253367543297e-06, "loss": 0.7769, "step": 57850 }, { "epoch": 0.7050930496142737, "grad_norm": 2.0690224170684814, "learning_rate": 1.5522129570237333e-06, "loss": 0.8371, "step": 57855 }, { "epoch": 0.7051539858384215, "grad_norm": 2.108520984649658, "learning_rate": 1.5518922386144967e-06, "loss": 0.7983, "step": 57860 }, { "epoch": 0.7052149220625693, "grad_norm": 1.807822823524475, "learning_rate": 1.55157152020526e-06, "loss": 0.8728, "step": 57865 }, { "epoch": 0.7052758582867171, "grad_norm": 1.7704441547393799, "learning_rate": 1.5512508017960232e-06, "loss": 0.7991, "step": 57870 }, { "epoch": 0.705336794510865, "grad_norm": 2.111325979232788, "learning_rate": 1.5509300833867866e-06, "loss": 0.8051, "step": 57875 }, { "epoch": 0.7053977307350128, "grad_norm": 1.701613187789917, "learning_rate": 1.5506093649775496e-06, "loss": 0.906, "step": 57880 }, { "epoch": 0.7054586669591606, "grad_norm": 2.5640077590942383, "learning_rate": 1.550288646568313e-06, "loss": 0.8307, "step": 57885 }, { "epoch": 0.7055196031833083, "grad_norm": 2.081836223602295, "learning_rate": 1.5499679281590765e-06, "loss": 0.832, "step": 57890 }, { "epoch": 0.7055805394074561, "grad_norm": 1.9763569831848145, "learning_rate": 1.5496472097498397e-06, "loss": 0.8172, "step": 57895 }, { "epoch": 0.705641475631604, "grad_norm": 5.004648208618164, "learning_rate": 1.5493264913406032e-06, "loss": 0.8378, "step": 57900 }, { "epoch": 0.7057024118557518, "grad_norm": 2.0136122703552246, "learning_rate": 1.5490057729313666e-06, "loss": 0.8364, "step": 57905 }, { "epoch": 0.7057633480798996, "grad_norm": 1.8203437328338623, "learning_rate": 1.5486850545221296e-06, "loss": 0.8139, "step": 57910 }, { "epoch": 0.7058242843040474, "grad_norm": 1.8521589040756226, "learning_rate": 1.548364336112893e-06, "loss": 0.833, "step": 57915 }, { "epoch": 0.7058852205281952, "grad_norm": 2.123774528503418, "learning_rate": 1.5480436177036563e-06, "loss": 0.8493, "step": 57920 }, { "epoch": 0.705946156752343, "grad_norm": 1.7414534091949463, "learning_rate": 1.5477228992944195e-06, "loss": 0.8498, "step": 57925 }, { "epoch": 0.7060070929764908, "grad_norm": 2.315842628479004, "learning_rate": 1.547402180885183e-06, "loss": 0.7749, "step": 57930 }, { "epoch": 0.7060680292006386, "grad_norm": 1.7134714126586914, "learning_rate": 1.5470814624759462e-06, "loss": 0.8283, "step": 57935 }, { "epoch": 0.7061289654247864, "grad_norm": 2.0770339965820312, "learning_rate": 1.5467607440667096e-06, "loss": 0.8331, "step": 57940 }, { "epoch": 0.7061899016489342, "grad_norm": 2.4139842987060547, "learning_rate": 1.546440025657473e-06, "loss": 0.8224, "step": 57945 }, { "epoch": 0.7062508378730821, "grad_norm": 1.825634241104126, "learning_rate": 1.546119307248236e-06, "loss": 0.7955, "step": 57950 }, { "epoch": 0.7063117740972299, "grad_norm": 2.4256677627563477, "learning_rate": 1.5457985888389995e-06, "loss": 0.8322, "step": 57955 }, { "epoch": 0.7063727103213776, "grad_norm": 1.8293434381484985, "learning_rate": 1.5454778704297628e-06, "loss": 0.8115, "step": 57960 }, { "epoch": 0.7064336465455254, "grad_norm": 1.948693871498108, "learning_rate": 1.545157152020526e-06, "loss": 0.8143, "step": 57965 }, { "epoch": 0.7064945827696733, "grad_norm": 1.7162121534347534, "learning_rate": 1.5448364336112894e-06, "loss": 0.867, "step": 57970 }, { "epoch": 0.7065555189938211, "grad_norm": 1.8191167116165161, "learning_rate": 1.5445157152020527e-06, "loss": 0.8318, "step": 57975 }, { "epoch": 0.7066164552179689, "grad_norm": 1.8910303115844727, "learning_rate": 1.544194996792816e-06, "loss": 0.8862, "step": 57980 }, { "epoch": 0.7066773914421167, "grad_norm": 1.900795578956604, "learning_rate": 1.5438742783835795e-06, "loss": 0.8306, "step": 57985 }, { "epoch": 0.7067383276662645, "grad_norm": 2.0444254875183105, "learning_rate": 1.5435535599743425e-06, "loss": 0.771, "step": 57990 }, { "epoch": 0.7067992638904123, "grad_norm": 2.1700992584228516, "learning_rate": 1.543232841565106e-06, "loss": 0.8108, "step": 57995 }, { "epoch": 0.7068602001145601, "grad_norm": 1.8455102443695068, "learning_rate": 1.5429121231558692e-06, "loss": 0.8131, "step": 58000 }, { "epoch": 0.7069211363387079, "grad_norm": 1.8348753452301025, "learning_rate": 1.5425914047466324e-06, "loss": 0.7969, "step": 58005 }, { "epoch": 0.7069820725628557, "grad_norm": 1.7242431640625, "learning_rate": 1.5422706863373959e-06, "loss": 0.8472, "step": 58010 }, { "epoch": 0.7070430087870035, "grad_norm": 1.890673041343689, "learning_rate": 1.5419499679281591e-06, "loss": 0.8569, "step": 58015 }, { "epoch": 0.7071039450111514, "grad_norm": 2.2718517780303955, "learning_rate": 1.5416292495189226e-06, "loss": 0.8181, "step": 58020 }, { "epoch": 0.7071648812352991, "grad_norm": 2.2623236179351807, "learning_rate": 1.541308531109686e-06, "loss": 0.8652, "step": 58025 }, { "epoch": 0.7072258174594469, "grad_norm": 2.0124893188476562, "learning_rate": 1.540987812700449e-06, "loss": 0.8921, "step": 58030 }, { "epoch": 0.7072867536835947, "grad_norm": 2.024348020553589, "learning_rate": 1.5406670942912125e-06, "loss": 0.777, "step": 58035 }, { "epoch": 0.7073476899077425, "grad_norm": 1.7843897342681885, "learning_rate": 1.5403463758819759e-06, "loss": 0.8086, "step": 58040 }, { "epoch": 0.7074086261318904, "grad_norm": 2.007394790649414, "learning_rate": 1.5400256574727391e-06, "loss": 0.8983, "step": 58045 }, { "epoch": 0.7074695623560382, "grad_norm": 2.2281317710876465, "learning_rate": 1.5397049390635024e-06, "loss": 0.8345, "step": 58050 }, { "epoch": 0.707530498580186, "grad_norm": 1.780848503112793, "learning_rate": 1.5393842206542656e-06, "loss": 0.7878, "step": 58055 }, { "epoch": 0.7075914348043337, "grad_norm": 1.788552165031433, "learning_rate": 1.539063502245029e-06, "loss": 0.7867, "step": 58060 }, { "epoch": 0.7076523710284816, "grad_norm": 2.052578926086426, "learning_rate": 1.5387427838357925e-06, "loss": 0.7781, "step": 58065 }, { "epoch": 0.7077133072526294, "grad_norm": 2.2940642833709717, "learning_rate": 1.5384220654265555e-06, "loss": 0.8382, "step": 58070 }, { "epoch": 0.7077742434767772, "grad_norm": 1.8490371704101562, "learning_rate": 1.538101347017319e-06, "loss": 0.8301, "step": 58075 }, { "epoch": 0.707835179700925, "grad_norm": 1.6956703662872314, "learning_rate": 1.5377806286080824e-06, "loss": 0.8258, "step": 58080 }, { "epoch": 0.7078961159250728, "grad_norm": 1.8609817028045654, "learning_rate": 1.5374599101988456e-06, "loss": 0.836, "step": 58085 }, { "epoch": 0.7079570521492207, "grad_norm": 2.1158435344696045, "learning_rate": 1.5371391917896088e-06, "loss": 0.8139, "step": 58090 }, { "epoch": 0.7080179883733684, "grad_norm": 2.3773961067199707, "learning_rate": 1.536818473380372e-06, "loss": 0.7865, "step": 58095 }, { "epoch": 0.7080789245975162, "grad_norm": 1.707395315170288, "learning_rate": 1.5364977549711355e-06, "loss": 0.8182, "step": 58100 }, { "epoch": 0.708139860821664, "grad_norm": 2.1832921504974365, "learning_rate": 1.536177036561899e-06, "loss": 0.8423, "step": 58105 }, { "epoch": 0.7082007970458118, "grad_norm": 1.9765281677246094, "learning_rate": 1.535856318152662e-06, "loss": 0.8114, "step": 58110 }, { "epoch": 0.7082617332699597, "grad_norm": 2.1611313819885254, "learning_rate": 1.5355355997434254e-06, "loss": 0.8727, "step": 58115 }, { "epoch": 0.7083226694941075, "grad_norm": 2.5224077701568604, "learning_rate": 1.5352148813341888e-06, "loss": 0.8618, "step": 58120 }, { "epoch": 0.7083836057182553, "grad_norm": 2.14748215675354, "learning_rate": 1.534894162924952e-06, "loss": 0.8277, "step": 58125 }, { "epoch": 0.708444541942403, "grad_norm": 2.1908774375915527, "learning_rate": 1.5345734445157153e-06, "loss": 0.7945, "step": 58130 }, { "epoch": 0.7085054781665509, "grad_norm": 1.7835196256637573, "learning_rate": 1.5342527261064785e-06, "loss": 0.7627, "step": 58135 }, { "epoch": 0.7085664143906987, "grad_norm": 1.6870436668395996, "learning_rate": 1.533932007697242e-06, "loss": 0.7932, "step": 58140 }, { "epoch": 0.7086273506148465, "grad_norm": 2.07351016998291, "learning_rate": 1.5336112892880054e-06, "loss": 0.8245, "step": 58145 }, { "epoch": 0.7086882868389943, "grad_norm": 2.3547167778015137, "learning_rate": 1.5332905708787684e-06, "loss": 0.8001, "step": 58150 }, { "epoch": 0.7087492230631421, "grad_norm": 2.0685298442840576, "learning_rate": 1.5329698524695318e-06, "loss": 0.8437, "step": 58155 }, { "epoch": 0.70881015928729, "grad_norm": 1.8714234828948975, "learning_rate": 1.5326491340602953e-06, "loss": 0.8131, "step": 58160 }, { "epoch": 0.7088710955114377, "grad_norm": 1.9668214321136475, "learning_rate": 1.5323284156510585e-06, "loss": 0.8339, "step": 58165 }, { "epoch": 0.7089320317355855, "grad_norm": 1.8256853818893433, "learning_rate": 1.532007697241822e-06, "loss": 0.8145, "step": 58170 }, { "epoch": 0.7089929679597333, "grad_norm": 1.7596994638442993, "learning_rate": 1.531686978832585e-06, "loss": 0.7563, "step": 58175 }, { "epoch": 0.7090539041838811, "grad_norm": 1.93454909324646, "learning_rate": 1.5313662604233484e-06, "loss": 0.7797, "step": 58180 }, { "epoch": 0.709114840408029, "grad_norm": 1.9967341423034668, "learning_rate": 1.5310455420141119e-06, "loss": 0.8237, "step": 58185 }, { "epoch": 0.7091757766321768, "grad_norm": 1.9675358533859253, "learning_rate": 1.5307248236048749e-06, "loss": 0.841, "step": 58190 }, { "epoch": 0.7092367128563246, "grad_norm": 1.5769848823547363, "learning_rate": 1.5304041051956383e-06, "loss": 0.8155, "step": 58195 }, { "epoch": 0.7092976490804723, "grad_norm": 1.7601451873779297, "learning_rate": 1.5300833867864017e-06, "loss": 0.7473, "step": 58200 }, { "epoch": 0.7093585853046201, "grad_norm": 2.166179656982422, "learning_rate": 1.529762668377165e-06, "loss": 0.7899, "step": 58205 }, { "epoch": 0.709419521528768, "grad_norm": 2.198047637939453, "learning_rate": 1.5294419499679284e-06, "loss": 0.7844, "step": 58210 }, { "epoch": 0.7094804577529158, "grad_norm": 1.9540258646011353, "learning_rate": 1.5291212315586914e-06, "loss": 0.824, "step": 58215 }, { "epoch": 0.7095413939770636, "grad_norm": 2.07073712348938, "learning_rate": 1.5288005131494549e-06, "loss": 0.9133, "step": 58220 }, { "epoch": 0.7096023302012114, "grad_norm": 2.1429390907287598, "learning_rate": 1.5284797947402183e-06, "loss": 0.8551, "step": 58225 }, { "epoch": 0.7096632664253593, "grad_norm": 1.9558606147766113, "learning_rate": 1.5281590763309813e-06, "loss": 0.8896, "step": 58230 }, { "epoch": 0.709724202649507, "grad_norm": 2.2590250968933105, "learning_rate": 1.5278383579217448e-06, "loss": 0.7554, "step": 58235 }, { "epoch": 0.7097851388736548, "grad_norm": 1.7782593965530396, "learning_rate": 1.5275176395125082e-06, "loss": 0.8657, "step": 58240 }, { "epoch": 0.7098460750978026, "grad_norm": 1.9476113319396973, "learning_rate": 1.5271969211032714e-06, "loss": 0.8413, "step": 58245 }, { "epoch": 0.7099070113219504, "grad_norm": 1.7274223566055298, "learning_rate": 1.5268762026940349e-06, "loss": 0.7775, "step": 58250 }, { "epoch": 0.7099679475460983, "grad_norm": 2.162196159362793, "learning_rate": 1.526555484284798e-06, "loss": 0.8624, "step": 58255 }, { "epoch": 0.7100288837702461, "grad_norm": 2.2565462589263916, "learning_rate": 1.5262347658755613e-06, "loss": 0.8896, "step": 58260 }, { "epoch": 0.7100898199943939, "grad_norm": 1.7012321949005127, "learning_rate": 1.5259140474663248e-06, "loss": 0.854, "step": 58265 }, { "epoch": 0.7101507562185416, "grad_norm": 2.5226263999938965, "learning_rate": 1.525593329057088e-06, "loss": 0.8459, "step": 58270 }, { "epoch": 0.7102116924426894, "grad_norm": 2.5767669677734375, "learning_rate": 1.5252726106478512e-06, "loss": 0.8511, "step": 58275 }, { "epoch": 0.7102726286668373, "grad_norm": 1.8755948543548584, "learning_rate": 1.5249518922386147e-06, "loss": 0.8419, "step": 58280 }, { "epoch": 0.7103335648909851, "grad_norm": 1.8540147542953491, "learning_rate": 1.524631173829378e-06, "loss": 0.7747, "step": 58285 }, { "epoch": 0.7103945011151329, "grad_norm": 1.8495315313339233, "learning_rate": 1.5243104554201413e-06, "loss": 0.839, "step": 58290 }, { "epoch": 0.7104554373392807, "grad_norm": 2.094374895095825, "learning_rate": 1.5239897370109044e-06, "loss": 0.8598, "step": 58295 }, { "epoch": 0.7105163735634286, "grad_norm": 2.188899517059326, "learning_rate": 1.5236690186016678e-06, "loss": 0.8447, "step": 58300 }, { "epoch": 0.7105773097875763, "grad_norm": 1.699817419052124, "learning_rate": 1.5233483001924312e-06, "loss": 0.8072, "step": 58305 }, { "epoch": 0.7106382460117241, "grad_norm": 2.004582405090332, "learning_rate": 1.5230275817831945e-06, "loss": 0.8034, "step": 58310 }, { "epoch": 0.7106991822358719, "grad_norm": 1.7750225067138672, "learning_rate": 1.5227068633739577e-06, "loss": 0.7284, "step": 58315 }, { "epoch": 0.7107601184600197, "grad_norm": 1.7517040967941284, "learning_rate": 1.5223861449647211e-06, "loss": 0.9069, "step": 58320 }, { "epoch": 0.7108210546841676, "grad_norm": 2.1068055629730225, "learning_rate": 1.5220654265554844e-06, "loss": 0.7869, "step": 58325 }, { "epoch": 0.7108819909083154, "grad_norm": 1.880547285079956, "learning_rate": 1.5217447081462478e-06, "loss": 0.7429, "step": 58330 }, { "epoch": 0.7109429271324632, "grad_norm": 1.9211461544036865, "learning_rate": 1.5214239897370112e-06, "loss": 0.804, "step": 58335 }, { "epoch": 0.7110038633566109, "grad_norm": 2.524951934814453, "learning_rate": 1.5211032713277743e-06, "loss": 0.8314, "step": 58340 }, { "epoch": 0.7110647995807587, "grad_norm": 1.8706293106079102, "learning_rate": 1.5207825529185377e-06, "loss": 0.8202, "step": 58345 }, { "epoch": 0.7111257358049066, "grad_norm": 2.1789135932922363, "learning_rate": 1.520461834509301e-06, "loss": 0.8089, "step": 58350 }, { "epoch": 0.7111866720290544, "grad_norm": 2.072819471359253, "learning_rate": 1.5201411161000642e-06, "loss": 0.8324, "step": 58355 }, { "epoch": 0.7112476082532022, "grad_norm": 1.7918521165847778, "learning_rate": 1.5198203976908276e-06, "loss": 0.857, "step": 58360 }, { "epoch": 0.71130854447735, "grad_norm": 1.8426588773727417, "learning_rate": 1.5194996792815908e-06, "loss": 0.8341, "step": 58365 }, { "epoch": 0.7113694807014979, "grad_norm": 2.382622480392456, "learning_rate": 1.5191789608723543e-06, "loss": 0.7553, "step": 58370 }, { "epoch": 0.7114304169256456, "grad_norm": 1.8089406490325928, "learning_rate": 1.5188582424631177e-06, "loss": 0.7693, "step": 58375 }, { "epoch": 0.7114913531497934, "grad_norm": 2.391420841217041, "learning_rate": 1.5185375240538807e-06, "loss": 0.7611, "step": 58380 }, { "epoch": 0.7115522893739412, "grad_norm": 1.8332403898239136, "learning_rate": 1.5182168056446442e-06, "loss": 0.7988, "step": 58385 }, { "epoch": 0.711613225598089, "grad_norm": 1.9673285484313965, "learning_rate": 1.5178960872354074e-06, "loss": 0.7845, "step": 58390 }, { "epoch": 0.7116741618222369, "grad_norm": 2.0713090896606445, "learning_rate": 1.5175753688261708e-06, "loss": 0.7918, "step": 58395 }, { "epoch": 0.7117350980463847, "grad_norm": 2.1043405532836914, "learning_rate": 1.517254650416934e-06, "loss": 0.851, "step": 58400 }, { "epoch": 0.7117960342705325, "grad_norm": 1.8926585912704468, "learning_rate": 1.5169339320076973e-06, "loss": 0.8143, "step": 58405 }, { "epoch": 0.7118569704946802, "grad_norm": 1.6630375385284424, "learning_rate": 1.5166132135984607e-06, "loss": 0.8471, "step": 58410 }, { "epoch": 0.711917906718828, "grad_norm": 2.403546094894409, "learning_rate": 1.5162924951892242e-06, "loss": 0.8248, "step": 58415 }, { "epoch": 0.7119788429429759, "grad_norm": 2.420487880706787, "learning_rate": 1.5159717767799872e-06, "loss": 0.7733, "step": 58420 }, { "epoch": 0.7120397791671237, "grad_norm": 1.860264539718628, "learning_rate": 1.5156510583707506e-06, "loss": 0.7909, "step": 58425 }, { "epoch": 0.7121007153912715, "grad_norm": 2.184436798095703, "learning_rate": 1.5153303399615139e-06, "loss": 0.859, "step": 58430 }, { "epoch": 0.7121616516154193, "grad_norm": 2.1913249492645264, "learning_rate": 1.5150096215522773e-06, "loss": 0.7902, "step": 58435 }, { "epoch": 0.7122225878395672, "grad_norm": 1.628176212310791, "learning_rate": 1.5146889031430405e-06, "loss": 0.7853, "step": 58440 }, { "epoch": 0.7122835240637149, "grad_norm": 1.9386484622955322, "learning_rate": 1.5143681847338038e-06, "loss": 0.7916, "step": 58445 }, { "epoch": 0.7123444602878627, "grad_norm": 2.2862367630004883, "learning_rate": 1.5140474663245672e-06, "loss": 0.7598, "step": 58450 }, { "epoch": 0.7124053965120105, "grad_norm": 1.889776587486267, "learning_rate": 1.5137267479153306e-06, "loss": 0.8154, "step": 58455 }, { "epoch": 0.7124663327361583, "grad_norm": 1.8179371356964111, "learning_rate": 1.5134060295060937e-06, "loss": 0.8261, "step": 58460 }, { "epoch": 0.7125272689603062, "grad_norm": 2.2858705520629883, "learning_rate": 1.513085311096857e-06, "loss": 0.8955, "step": 58465 }, { "epoch": 0.712588205184454, "grad_norm": 1.7684396505355835, "learning_rate": 1.5127645926876203e-06, "loss": 0.8148, "step": 58470 }, { "epoch": 0.7126491414086018, "grad_norm": 2.0750246047973633, "learning_rate": 1.5124438742783838e-06, "loss": 0.7807, "step": 58475 }, { "epoch": 0.7127100776327495, "grad_norm": 2.0567758083343506, "learning_rate": 1.512123155869147e-06, "loss": 0.789, "step": 58480 }, { "epoch": 0.7127710138568973, "grad_norm": 2.01407790184021, "learning_rate": 1.5118024374599102e-06, "loss": 0.8258, "step": 58485 }, { "epoch": 0.7128319500810452, "grad_norm": 1.7081704139709473, "learning_rate": 1.5114817190506737e-06, "loss": 0.7653, "step": 58490 }, { "epoch": 0.712892886305193, "grad_norm": 2.024900197982788, "learning_rate": 1.511161000641437e-06, "loss": 0.8772, "step": 58495 }, { "epoch": 0.7129538225293408, "grad_norm": 1.8925291299819946, "learning_rate": 1.5108402822322001e-06, "loss": 0.8577, "step": 58500 }, { "epoch": 0.7130147587534886, "grad_norm": 1.885335087776184, "learning_rate": 1.5105195638229636e-06, "loss": 0.8113, "step": 58505 }, { "epoch": 0.7130756949776365, "grad_norm": 2.059206008911133, "learning_rate": 1.5101988454137268e-06, "loss": 0.823, "step": 58510 }, { "epoch": 0.7131366312017842, "grad_norm": 1.7847979068756104, "learning_rate": 1.5098781270044902e-06, "loss": 0.7549, "step": 58515 }, { "epoch": 0.713197567425932, "grad_norm": 1.9996930360794067, "learning_rate": 1.5095574085952537e-06, "loss": 0.8015, "step": 58520 }, { "epoch": 0.7132585036500798, "grad_norm": 2.069979190826416, "learning_rate": 1.5092366901860167e-06, "loss": 0.793, "step": 58525 }, { "epoch": 0.7133194398742276, "grad_norm": 2.6117780208587646, "learning_rate": 1.5089159717767801e-06, "loss": 0.7775, "step": 58530 }, { "epoch": 0.7133803760983755, "grad_norm": 1.764378547668457, "learning_rate": 1.5085952533675436e-06, "loss": 0.785, "step": 58535 }, { "epoch": 0.7134413123225233, "grad_norm": 1.8597770929336548, "learning_rate": 1.5082745349583066e-06, "loss": 0.8174, "step": 58540 }, { "epoch": 0.7135022485466711, "grad_norm": 1.6866815090179443, "learning_rate": 1.50795381654907e-06, "loss": 0.8237, "step": 58545 }, { "epoch": 0.7135631847708188, "grad_norm": 1.9538711309432983, "learning_rate": 1.5076330981398333e-06, "loss": 0.801, "step": 58550 }, { "epoch": 0.7136241209949666, "grad_norm": 1.7525378465652466, "learning_rate": 1.5073123797305967e-06, "loss": 0.8135, "step": 58555 }, { "epoch": 0.7136850572191145, "grad_norm": 2.101203441619873, "learning_rate": 1.5069916613213601e-06, "loss": 0.7455, "step": 58560 }, { "epoch": 0.7137459934432623, "grad_norm": 1.751790165901184, "learning_rate": 1.5066709429121231e-06, "loss": 0.7703, "step": 58565 }, { "epoch": 0.7138069296674101, "grad_norm": 1.8620060682296753, "learning_rate": 1.5063502245028866e-06, "loss": 0.7868, "step": 58570 }, { "epoch": 0.7138678658915579, "grad_norm": 1.9789958000183105, "learning_rate": 1.50602950609365e-06, "loss": 0.799, "step": 58575 }, { "epoch": 0.7139288021157058, "grad_norm": 2.4708690643310547, "learning_rate": 1.505708787684413e-06, "loss": 0.7531, "step": 58580 }, { "epoch": 0.7139897383398535, "grad_norm": 1.863277792930603, "learning_rate": 1.5053880692751765e-06, "loss": 0.8346, "step": 58585 }, { "epoch": 0.7140506745640013, "grad_norm": 1.9674599170684814, "learning_rate": 1.5050673508659397e-06, "loss": 0.8396, "step": 58590 }, { "epoch": 0.7141116107881491, "grad_norm": 1.7446808815002441, "learning_rate": 1.5047466324567032e-06, "loss": 0.8, "step": 58595 }, { "epoch": 0.7141725470122969, "grad_norm": 2.3247992992401123, "learning_rate": 1.5044259140474666e-06, "loss": 0.8695, "step": 58600 }, { "epoch": 0.7142334832364448, "grad_norm": 2.3385562896728516, "learning_rate": 1.5041051956382296e-06, "loss": 0.8693, "step": 58605 }, { "epoch": 0.7142944194605926, "grad_norm": 2.110330820083618, "learning_rate": 1.503784477228993e-06, "loss": 0.832, "step": 58610 }, { "epoch": 0.7143553556847404, "grad_norm": 1.9050143957138062, "learning_rate": 1.5034637588197565e-06, "loss": 0.8749, "step": 58615 }, { "epoch": 0.7144162919088881, "grad_norm": 2.0187158584594727, "learning_rate": 1.5031430404105197e-06, "loss": 0.7618, "step": 58620 }, { "epoch": 0.7144772281330359, "grad_norm": 2.46805477142334, "learning_rate": 1.502822322001283e-06, "loss": 0.8423, "step": 58625 }, { "epoch": 0.7145381643571838, "grad_norm": 2.062246561050415, "learning_rate": 1.5025016035920464e-06, "loss": 0.8437, "step": 58630 }, { "epoch": 0.7145991005813316, "grad_norm": 2.1264777183532715, "learning_rate": 1.5021808851828096e-06, "loss": 0.8319, "step": 58635 }, { "epoch": 0.7146600368054794, "grad_norm": 1.759499430656433, "learning_rate": 1.501860166773573e-06, "loss": 0.8821, "step": 58640 }, { "epoch": 0.7147209730296272, "grad_norm": 2.070668935775757, "learning_rate": 1.501539448364336e-06, "loss": 0.7952, "step": 58645 }, { "epoch": 0.714781909253775, "grad_norm": 2.3524227142333984, "learning_rate": 1.5012187299550995e-06, "loss": 0.8712, "step": 58650 }, { "epoch": 0.7148428454779228, "grad_norm": 2.6574151515960693, "learning_rate": 1.500898011545863e-06, "loss": 0.906, "step": 58655 }, { "epoch": 0.7149037817020706, "grad_norm": 1.5203009843826294, "learning_rate": 1.5005772931366262e-06, "loss": 0.7419, "step": 58660 }, { "epoch": 0.7149647179262184, "grad_norm": 1.9629521369934082, "learning_rate": 1.5002565747273894e-06, "loss": 0.7612, "step": 58665 }, { "epoch": 0.7150256541503662, "grad_norm": 1.82509446144104, "learning_rate": 1.4999358563181529e-06, "loss": 0.8657, "step": 58670 }, { "epoch": 0.715086590374514, "grad_norm": 1.6841527223587036, "learning_rate": 1.499615137908916e-06, "loss": 0.8546, "step": 58675 }, { "epoch": 0.7151475265986619, "grad_norm": 1.9769867658615112, "learning_rate": 1.4992944194996795e-06, "loss": 0.8553, "step": 58680 }, { "epoch": 0.7152084628228097, "grad_norm": 1.9645750522613525, "learning_rate": 1.4989737010904425e-06, "loss": 0.8444, "step": 58685 }, { "epoch": 0.7152693990469574, "grad_norm": 2.0516440868377686, "learning_rate": 1.498652982681206e-06, "loss": 0.765, "step": 58690 }, { "epoch": 0.7153303352711052, "grad_norm": 1.8294379711151123, "learning_rate": 1.4983322642719694e-06, "loss": 0.8252, "step": 58695 }, { "epoch": 0.7153912714952531, "grad_norm": 2.024747133255005, "learning_rate": 1.4980115458627326e-06, "loss": 0.7738, "step": 58700 }, { "epoch": 0.7154522077194009, "grad_norm": 2.2494688034057617, "learning_rate": 1.4976908274534959e-06, "loss": 0.8253, "step": 58705 }, { "epoch": 0.7155131439435487, "grad_norm": 1.9397228956222534, "learning_rate": 1.4973701090442593e-06, "loss": 0.8492, "step": 58710 }, { "epoch": 0.7155740801676965, "grad_norm": 2.4515268802642822, "learning_rate": 1.4970493906350225e-06, "loss": 0.8302, "step": 58715 }, { "epoch": 0.7156350163918443, "grad_norm": 1.6654361486434937, "learning_rate": 1.496728672225786e-06, "loss": 0.8435, "step": 58720 }, { "epoch": 0.7156959526159921, "grad_norm": 1.9872817993164062, "learning_rate": 1.496407953816549e-06, "loss": 0.8137, "step": 58725 }, { "epoch": 0.7157568888401399, "grad_norm": 1.7268716096878052, "learning_rate": 1.4960872354073124e-06, "loss": 0.7821, "step": 58730 }, { "epoch": 0.7158178250642877, "grad_norm": 1.835816740989685, "learning_rate": 1.4957665169980759e-06, "loss": 0.8266, "step": 58735 }, { "epoch": 0.7158787612884355, "grad_norm": 1.8108054399490356, "learning_rate": 1.4954457985888391e-06, "loss": 0.7827, "step": 58740 }, { "epoch": 0.7159396975125834, "grad_norm": 2.05092716217041, "learning_rate": 1.4951250801796026e-06, "loss": 0.8592, "step": 58745 }, { "epoch": 0.7160006337367312, "grad_norm": 2.583759307861328, "learning_rate": 1.4948043617703658e-06, "loss": 0.8797, "step": 58750 }, { "epoch": 0.716061569960879, "grad_norm": 2.4591236114501953, "learning_rate": 1.494483643361129e-06, "loss": 0.8526, "step": 58755 }, { "epoch": 0.7161225061850267, "grad_norm": 2.041060447692871, "learning_rate": 1.4941629249518924e-06, "loss": 0.7289, "step": 58760 }, { "epoch": 0.7161834424091745, "grad_norm": 1.9951212406158447, "learning_rate": 1.4938422065426555e-06, "loss": 0.7024, "step": 58765 }, { "epoch": 0.7162443786333224, "grad_norm": 1.7640612125396729, "learning_rate": 1.493521488133419e-06, "loss": 0.8419, "step": 58770 }, { "epoch": 0.7163053148574702, "grad_norm": 2.0565857887268066, "learning_rate": 1.4932007697241823e-06, "loss": 0.8226, "step": 58775 }, { "epoch": 0.716366251081618, "grad_norm": 1.9773485660552979, "learning_rate": 1.4928800513149456e-06, "loss": 0.865, "step": 58780 }, { "epoch": 0.7164271873057658, "grad_norm": 2.458989381790161, "learning_rate": 1.492559332905709e-06, "loss": 0.766, "step": 58785 }, { "epoch": 0.7164881235299136, "grad_norm": 1.8313539028167725, "learning_rate": 1.4922386144964722e-06, "loss": 0.7921, "step": 58790 }, { "epoch": 0.7165490597540614, "grad_norm": 2.126636028289795, "learning_rate": 1.4919178960872355e-06, "loss": 0.7795, "step": 58795 }, { "epoch": 0.7166099959782092, "grad_norm": 2.2928552627563477, "learning_rate": 1.491597177677999e-06, "loss": 0.7969, "step": 58800 }, { "epoch": 0.716670932202357, "grad_norm": 1.9472414255142212, "learning_rate": 1.491276459268762e-06, "loss": 0.8363, "step": 58805 }, { "epoch": 0.7167318684265048, "grad_norm": 2.242194652557373, "learning_rate": 1.4909557408595254e-06, "loss": 0.7964, "step": 58810 }, { "epoch": 0.7167928046506526, "grad_norm": 1.8975239992141724, "learning_rate": 1.4906350224502888e-06, "loss": 0.8534, "step": 58815 }, { "epoch": 0.7168537408748005, "grad_norm": 2.622406005859375, "learning_rate": 1.490314304041052e-06, "loss": 0.8504, "step": 58820 }, { "epoch": 0.7169146770989483, "grad_norm": 2.144824504852295, "learning_rate": 1.4899935856318155e-06, "loss": 0.8218, "step": 58825 }, { "epoch": 0.716975613323096, "grad_norm": 1.9618996381759644, "learning_rate": 1.4896728672225787e-06, "loss": 0.8141, "step": 58830 }, { "epoch": 0.7170365495472438, "grad_norm": 2.109537124633789, "learning_rate": 1.489352148813342e-06, "loss": 0.7586, "step": 58835 }, { "epoch": 0.7170974857713917, "grad_norm": 1.870995044708252, "learning_rate": 1.4890314304041054e-06, "loss": 0.8105, "step": 58840 }, { "epoch": 0.7171584219955395, "grad_norm": 1.9429898262023926, "learning_rate": 1.4887107119948684e-06, "loss": 0.7924, "step": 58845 }, { "epoch": 0.7172193582196873, "grad_norm": 2.19963002204895, "learning_rate": 1.4883899935856318e-06, "loss": 0.8636, "step": 58850 }, { "epoch": 0.7172802944438351, "grad_norm": 2.1401398181915283, "learning_rate": 1.4880692751763953e-06, "loss": 0.7708, "step": 58855 }, { "epoch": 0.7173412306679829, "grad_norm": 2.0663836002349854, "learning_rate": 1.4877485567671585e-06, "loss": 0.9014, "step": 58860 }, { "epoch": 0.7174021668921307, "grad_norm": 2.0025200843811035, "learning_rate": 1.487427838357922e-06, "loss": 0.819, "step": 58865 }, { "epoch": 0.7174631031162785, "grad_norm": 1.8613038063049316, "learning_rate": 1.4871071199486854e-06, "loss": 0.8553, "step": 58870 }, { "epoch": 0.7175240393404263, "grad_norm": 2.2502057552337646, "learning_rate": 1.4867864015394484e-06, "loss": 0.8149, "step": 58875 }, { "epoch": 0.7175849755645741, "grad_norm": 1.9454950094223022, "learning_rate": 1.4864656831302118e-06, "loss": 0.8282, "step": 58880 }, { "epoch": 0.717645911788722, "grad_norm": 1.7813833951950073, "learning_rate": 1.486144964720975e-06, "loss": 0.7909, "step": 58885 }, { "epoch": 0.7177068480128698, "grad_norm": 2.1317050457000732, "learning_rate": 1.4858242463117383e-06, "loss": 0.7737, "step": 58890 }, { "epoch": 0.7177677842370176, "grad_norm": 1.7356892824172974, "learning_rate": 1.4855035279025017e-06, "loss": 0.8124, "step": 58895 }, { "epoch": 0.7178287204611653, "grad_norm": 1.743566870689392, "learning_rate": 1.485182809493265e-06, "loss": 0.8204, "step": 58900 }, { "epoch": 0.7178896566853131, "grad_norm": 2.0063397884368896, "learning_rate": 1.4848620910840284e-06, "loss": 0.8313, "step": 58905 }, { "epoch": 0.717950592909461, "grad_norm": 2.096139430999756, "learning_rate": 1.4845413726747918e-06, "loss": 0.8156, "step": 58910 }, { "epoch": 0.7180115291336088, "grad_norm": 1.9042487144470215, "learning_rate": 1.4842206542655549e-06, "loss": 0.7539, "step": 58915 }, { "epoch": 0.7180724653577566, "grad_norm": 2.229937791824341, "learning_rate": 1.4838999358563183e-06, "loss": 0.7726, "step": 58920 }, { "epoch": 0.7181334015819044, "grad_norm": 2.001971483230591, "learning_rate": 1.4835792174470817e-06, "loss": 0.8488, "step": 58925 }, { "epoch": 0.7181943378060522, "grad_norm": 1.9331485033035278, "learning_rate": 1.4832584990378448e-06, "loss": 0.8315, "step": 58930 }, { "epoch": 0.7182552740302, "grad_norm": 1.956465482711792, "learning_rate": 1.4829377806286082e-06, "loss": 0.8024, "step": 58935 }, { "epoch": 0.7183162102543478, "grad_norm": 2.0345609188079834, "learning_rate": 1.4826170622193714e-06, "loss": 0.8074, "step": 58940 }, { "epoch": 0.7183771464784956, "grad_norm": 1.7455912828445435, "learning_rate": 1.4822963438101349e-06, "loss": 0.8565, "step": 58945 }, { "epoch": 0.7184380827026434, "grad_norm": 1.9930280447006226, "learning_rate": 1.4819756254008983e-06, "loss": 0.8419, "step": 58950 }, { "epoch": 0.7184990189267912, "grad_norm": 1.8949863910675049, "learning_rate": 1.4816549069916613e-06, "loss": 0.9095, "step": 58955 }, { "epoch": 0.7185599551509391, "grad_norm": 2.023573637008667, "learning_rate": 1.4813341885824248e-06, "loss": 0.8031, "step": 58960 }, { "epoch": 0.7186208913750868, "grad_norm": 2.0605010986328125, "learning_rate": 1.4810134701731882e-06, "loss": 0.8372, "step": 58965 }, { "epoch": 0.7186818275992346, "grad_norm": 2.382049798965454, "learning_rate": 1.4806927517639514e-06, "loss": 0.8591, "step": 58970 }, { "epoch": 0.7187427638233824, "grad_norm": 2.0065062046051025, "learning_rate": 1.4803720333547147e-06, "loss": 0.8115, "step": 58975 }, { "epoch": 0.7188037000475302, "grad_norm": 1.875223994255066, "learning_rate": 1.4800513149454779e-06, "loss": 0.8446, "step": 58980 }, { "epoch": 0.7188646362716781, "grad_norm": 1.9256014823913574, "learning_rate": 1.4797305965362413e-06, "loss": 0.8458, "step": 58985 }, { "epoch": 0.7189255724958259, "grad_norm": 2.018601655960083, "learning_rate": 1.4794098781270048e-06, "loss": 0.8262, "step": 58990 }, { "epoch": 0.7189865087199737, "grad_norm": 1.698576807975769, "learning_rate": 1.4790891597177678e-06, "loss": 0.8779, "step": 58995 }, { "epoch": 0.7190474449441214, "grad_norm": 2.010769844055176, "learning_rate": 1.4787684413085312e-06, "loss": 0.7991, "step": 59000 }, { "epoch": 0.7191083811682693, "grad_norm": 2.0654537677764893, "learning_rate": 1.4784477228992947e-06, "loss": 0.8628, "step": 59005 }, { "epoch": 0.7191693173924171, "grad_norm": 1.757219910621643, "learning_rate": 1.478127004490058e-06, "loss": 0.772, "step": 59010 }, { "epoch": 0.7192302536165649, "grad_norm": 1.5581581592559814, "learning_rate": 1.4778062860808211e-06, "loss": 0.735, "step": 59015 }, { "epoch": 0.7192911898407127, "grad_norm": 2.181320905685425, "learning_rate": 1.4774855676715844e-06, "loss": 0.8712, "step": 59020 }, { "epoch": 0.7193521260648605, "grad_norm": 1.9301354885101318, "learning_rate": 1.4771648492623478e-06, "loss": 0.8208, "step": 59025 }, { "epoch": 0.7194130622890084, "grad_norm": 2.3362088203430176, "learning_rate": 1.4768441308531112e-06, "loss": 0.8674, "step": 59030 }, { "epoch": 0.7194739985131561, "grad_norm": 2.2107255458831787, "learning_rate": 1.4765234124438743e-06, "loss": 0.8594, "step": 59035 }, { "epoch": 0.7195349347373039, "grad_norm": 1.9615906476974487, "learning_rate": 1.4762026940346377e-06, "loss": 0.6984, "step": 59040 }, { "epoch": 0.7195958709614517, "grad_norm": 2.1557586193084717, "learning_rate": 1.4758819756254011e-06, "loss": 0.8488, "step": 59045 }, { "epoch": 0.7196568071855995, "grad_norm": 1.8159440755844116, "learning_rate": 1.4755612572161644e-06, "loss": 0.7756, "step": 59050 }, { "epoch": 0.7197177434097474, "grad_norm": 2.0394909381866455, "learning_rate": 1.4752405388069276e-06, "loss": 0.8364, "step": 59055 }, { "epoch": 0.7197786796338952, "grad_norm": 1.9112122058868408, "learning_rate": 1.4749198203976908e-06, "loss": 0.8449, "step": 59060 }, { "epoch": 0.719839615858043, "grad_norm": 2.354869842529297, "learning_rate": 1.4745991019884543e-06, "loss": 0.8402, "step": 59065 }, { "epoch": 0.7199005520821907, "grad_norm": 2.150254487991333, "learning_rate": 1.4742783835792177e-06, "loss": 0.8598, "step": 59070 }, { "epoch": 0.7199614883063385, "grad_norm": 2.056412935256958, "learning_rate": 1.4739576651699807e-06, "loss": 0.7424, "step": 59075 }, { "epoch": 0.7200224245304864, "grad_norm": 2.0154993534088135, "learning_rate": 1.4736369467607442e-06, "loss": 0.7986, "step": 59080 }, { "epoch": 0.7200833607546342, "grad_norm": 2.100926160812378, "learning_rate": 1.4733162283515076e-06, "loss": 0.8258, "step": 59085 }, { "epoch": 0.720144296978782, "grad_norm": 1.779685139656067, "learning_rate": 1.4729955099422708e-06, "loss": 0.7986, "step": 59090 }, { "epoch": 0.7202052332029298, "grad_norm": 1.9898090362548828, "learning_rate": 1.4726747915330343e-06, "loss": 0.8092, "step": 59095 }, { "epoch": 0.7202661694270777, "grad_norm": 2.065312623977661, "learning_rate": 1.4723540731237973e-06, "loss": 0.8028, "step": 59100 }, { "epoch": 0.7203271056512254, "grad_norm": 1.7854264974594116, "learning_rate": 1.4720333547145607e-06, "loss": 0.8393, "step": 59105 }, { "epoch": 0.7203880418753732, "grad_norm": 2.2656469345092773, "learning_rate": 1.4717126363053242e-06, "loss": 0.8143, "step": 59110 }, { "epoch": 0.720448978099521, "grad_norm": 2.595928430557251, "learning_rate": 1.4713919178960872e-06, "loss": 0.8795, "step": 59115 }, { "epoch": 0.7205099143236688, "grad_norm": 1.8543386459350586, "learning_rate": 1.4710711994868506e-06, "loss": 0.8587, "step": 59120 }, { "epoch": 0.7205708505478167, "grad_norm": 1.912438154220581, "learning_rate": 1.470750481077614e-06, "loss": 0.8483, "step": 59125 }, { "epoch": 0.7206317867719645, "grad_norm": 1.9139900207519531, "learning_rate": 1.4704297626683773e-06, "loss": 0.883, "step": 59130 }, { "epoch": 0.7206927229961123, "grad_norm": 1.7740620374679565, "learning_rate": 1.4701090442591407e-06, "loss": 0.8516, "step": 59135 }, { "epoch": 0.72075365922026, "grad_norm": 1.8322564363479614, "learning_rate": 1.4697883258499037e-06, "loss": 0.7825, "step": 59140 }, { "epoch": 0.7208145954444078, "grad_norm": 2.004843235015869, "learning_rate": 1.4694676074406672e-06, "loss": 0.7684, "step": 59145 }, { "epoch": 0.7208755316685557, "grad_norm": 2.193406581878662, "learning_rate": 1.4691468890314306e-06, "loss": 0.7968, "step": 59150 }, { "epoch": 0.7209364678927035, "grad_norm": 2.255450963973999, "learning_rate": 1.4688261706221936e-06, "loss": 0.7811, "step": 59155 }, { "epoch": 0.7209974041168513, "grad_norm": 2.1191458702087402, "learning_rate": 1.468505452212957e-06, "loss": 0.7448, "step": 59160 }, { "epoch": 0.7210583403409991, "grad_norm": 1.929756760597229, "learning_rate": 1.4681847338037205e-06, "loss": 0.8221, "step": 59165 }, { "epoch": 0.721119276565147, "grad_norm": 2.137157440185547, "learning_rate": 1.4678640153944838e-06, "loss": 0.8199, "step": 59170 }, { "epoch": 0.7211802127892947, "grad_norm": 1.9034978151321411, "learning_rate": 1.4675432969852472e-06, "loss": 0.8616, "step": 59175 }, { "epoch": 0.7212411490134425, "grad_norm": 2.309828042984009, "learning_rate": 1.4672225785760102e-06, "loss": 0.8332, "step": 59180 }, { "epoch": 0.7213020852375903, "grad_norm": 1.9708017110824585, "learning_rate": 1.4669018601667737e-06, "loss": 0.8043, "step": 59185 }, { "epoch": 0.7213630214617381, "grad_norm": 2.0561933517456055, "learning_rate": 1.466581141757537e-06, "loss": 0.8103, "step": 59190 }, { "epoch": 0.721423957685886, "grad_norm": 2.231905698776245, "learning_rate": 1.4662604233483001e-06, "loss": 0.8115, "step": 59195 }, { "epoch": 0.7214848939100338, "grad_norm": 1.7709243297576904, "learning_rate": 1.4659397049390635e-06, "loss": 0.8321, "step": 59200 }, { "epoch": 0.7215458301341816, "grad_norm": 2.037231206893921, "learning_rate": 1.465618986529827e-06, "loss": 0.8188, "step": 59205 }, { "epoch": 0.7216067663583293, "grad_norm": 2.079669713973999, "learning_rate": 1.4652982681205902e-06, "loss": 0.8252, "step": 59210 }, { "epoch": 0.7216677025824771, "grad_norm": 2.2025578022003174, "learning_rate": 1.4649775497113537e-06, "loss": 0.8344, "step": 59215 }, { "epoch": 0.721728638806625, "grad_norm": 1.969208002090454, "learning_rate": 1.464656831302117e-06, "loss": 0.8264, "step": 59220 }, { "epoch": 0.7217895750307728, "grad_norm": 2.4875645637512207, "learning_rate": 1.4643361128928801e-06, "loss": 0.8726, "step": 59225 }, { "epoch": 0.7218505112549206, "grad_norm": 2.0775952339172363, "learning_rate": 1.4640153944836436e-06, "loss": 0.798, "step": 59230 }, { "epoch": 0.7219114474790684, "grad_norm": 2.2343320846557617, "learning_rate": 1.4636946760744068e-06, "loss": 0.7586, "step": 59235 }, { "epoch": 0.7219723837032163, "grad_norm": 1.9168739318847656, "learning_rate": 1.46337395766517e-06, "loss": 0.7602, "step": 59240 }, { "epoch": 0.722033319927364, "grad_norm": 2.2840116024017334, "learning_rate": 1.4630532392559335e-06, "loss": 0.7748, "step": 59245 }, { "epoch": 0.7220942561515118, "grad_norm": 1.6790990829467773, "learning_rate": 1.4627325208466967e-06, "loss": 0.7842, "step": 59250 }, { "epoch": 0.7221551923756596, "grad_norm": 1.756090760231018, "learning_rate": 1.4624118024374601e-06, "loss": 0.7985, "step": 59255 }, { "epoch": 0.7222161285998074, "grad_norm": 1.8759959936141968, "learning_rate": 1.4620910840282236e-06, "loss": 0.7778, "step": 59260 }, { "epoch": 0.7222770648239553, "grad_norm": 2.363032579421997, "learning_rate": 1.4617703656189866e-06, "loss": 0.8307, "step": 59265 }, { "epoch": 0.7223380010481031, "grad_norm": 2.23979115486145, "learning_rate": 1.46144964720975e-06, "loss": 0.8837, "step": 59270 }, { "epoch": 0.7223989372722509, "grad_norm": 1.9229251146316528, "learning_rate": 1.4611289288005132e-06, "loss": 0.8114, "step": 59275 }, { "epoch": 0.7224598734963986, "grad_norm": 1.6620315313339233, "learning_rate": 1.4608082103912765e-06, "loss": 0.856, "step": 59280 }, { "epoch": 0.7225208097205464, "grad_norm": 2.0420236587524414, "learning_rate": 1.46048749198204e-06, "loss": 0.8226, "step": 59285 }, { "epoch": 0.7225817459446943, "grad_norm": 1.9457714557647705, "learning_rate": 1.4601667735728031e-06, "loss": 0.8047, "step": 59290 }, { "epoch": 0.7226426821688421, "grad_norm": 1.6671950817108154, "learning_rate": 1.4598460551635666e-06, "loss": 0.8021, "step": 59295 }, { "epoch": 0.7227036183929899, "grad_norm": 2.1711912155151367, "learning_rate": 1.45952533675433e-06, "loss": 0.7736, "step": 59300 }, { "epoch": 0.7227645546171377, "grad_norm": 1.8708328008651733, "learning_rate": 1.459204618345093e-06, "loss": 0.7991, "step": 59305 }, { "epoch": 0.7228254908412856, "grad_norm": 1.8115707635879517, "learning_rate": 1.4588838999358565e-06, "loss": 0.7953, "step": 59310 }, { "epoch": 0.7228864270654333, "grad_norm": 1.6646497249603271, "learning_rate": 1.4585631815266197e-06, "loss": 0.7664, "step": 59315 }, { "epoch": 0.7229473632895811, "grad_norm": 1.9246257543563843, "learning_rate": 1.4582424631173832e-06, "loss": 0.8701, "step": 59320 }, { "epoch": 0.7230082995137289, "grad_norm": 1.8322795629501343, "learning_rate": 1.4579217447081464e-06, "loss": 0.8166, "step": 59325 }, { "epoch": 0.7230692357378767, "grad_norm": 1.7582505941390991, "learning_rate": 1.4576010262989096e-06, "loss": 0.7757, "step": 59330 }, { "epoch": 0.7231301719620246, "grad_norm": 1.692950963973999, "learning_rate": 1.457280307889673e-06, "loss": 0.8697, "step": 59335 }, { "epoch": 0.7231911081861724, "grad_norm": 2.0875766277313232, "learning_rate": 1.4569595894804365e-06, "loss": 0.8353, "step": 59340 }, { "epoch": 0.7232520444103202, "grad_norm": 2.266695022583008, "learning_rate": 1.4566388710711995e-06, "loss": 0.8352, "step": 59345 }, { "epoch": 0.7233129806344679, "grad_norm": 2.0455071926116943, "learning_rate": 1.456318152661963e-06, "loss": 0.7759, "step": 59350 }, { "epoch": 0.7233739168586157, "grad_norm": 1.9314274787902832, "learning_rate": 1.4559974342527262e-06, "loss": 0.7785, "step": 59355 }, { "epoch": 0.7234348530827636, "grad_norm": 2.217707872390747, "learning_rate": 1.4556767158434896e-06, "loss": 0.8403, "step": 59360 }, { "epoch": 0.7234957893069114, "grad_norm": 1.757001519203186, "learning_rate": 1.4553559974342528e-06, "loss": 0.8293, "step": 59365 }, { "epoch": 0.7235567255310592, "grad_norm": 1.905394434928894, "learning_rate": 1.455035279025016e-06, "loss": 0.7974, "step": 59370 }, { "epoch": 0.723617661755207, "grad_norm": 2.0146279335021973, "learning_rate": 1.4547145606157795e-06, "loss": 0.8334, "step": 59375 }, { "epoch": 0.7236785979793549, "grad_norm": 1.7962546348571777, "learning_rate": 1.454393842206543e-06, "loss": 0.8502, "step": 59380 }, { "epoch": 0.7237395342035026, "grad_norm": 1.6563974618911743, "learning_rate": 1.454073123797306e-06, "loss": 0.8676, "step": 59385 }, { "epoch": 0.7238004704276504, "grad_norm": 1.9057002067565918, "learning_rate": 1.4537524053880694e-06, "loss": 0.8135, "step": 59390 }, { "epoch": 0.7238614066517982, "grad_norm": 2.266738176345825, "learning_rate": 1.4534316869788326e-06, "loss": 0.8441, "step": 59395 }, { "epoch": 0.723922342875946, "grad_norm": 1.9285868406295776, "learning_rate": 1.453110968569596e-06, "loss": 0.8216, "step": 59400 }, { "epoch": 0.7239832791000939, "grad_norm": 2.08528208732605, "learning_rate": 1.4527902501603593e-06, "loss": 0.8323, "step": 59405 }, { "epoch": 0.7240442153242417, "grad_norm": 1.7459394931793213, "learning_rate": 1.4524695317511225e-06, "loss": 0.8642, "step": 59410 }, { "epoch": 0.7241051515483895, "grad_norm": 1.8603613376617432, "learning_rate": 1.452148813341886e-06, "loss": 0.828, "step": 59415 }, { "epoch": 0.7241660877725372, "grad_norm": 2.088646650314331, "learning_rate": 1.4518280949326494e-06, "loss": 0.7376, "step": 59420 }, { "epoch": 0.724227023996685, "grad_norm": 2.102606773376465, "learning_rate": 1.4515073765234124e-06, "loss": 0.9119, "step": 59425 }, { "epoch": 0.7242879602208329, "grad_norm": 2.4064879417419434, "learning_rate": 1.4511866581141759e-06, "loss": 0.8723, "step": 59430 }, { "epoch": 0.7243488964449807, "grad_norm": 2.067845344543457, "learning_rate": 1.450865939704939e-06, "loss": 0.8695, "step": 59435 }, { "epoch": 0.7244098326691285, "grad_norm": 1.9279690980911255, "learning_rate": 1.4505452212957025e-06, "loss": 0.8742, "step": 59440 }, { "epoch": 0.7244707688932763, "grad_norm": 1.8279240131378174, "learning_rate": 1.450224502886466e-06, "loss": 0.7555, "step": 59445 }, { "epoch": 0.7245317051174242, "grad_norm": 1.7090110778808594, "learning_rate": 1.449903784477229e-06, "loss": 0.782, "step": 59450 }, { "epoch": 0.7245926413415719, "grad_norm": 1.9758217334747314, "learning_rate": 1.4495830660679924e-06, "loss": 0.834, "step": 59455 }, { "epoch": 0.7246535775657197, "grad_norm": 2.1442995071411133, "learning_rate": 1.4492623476587559e-06, "loss": 0.8753, "step": 59460 }, { "epoch": 0.7247145137898675, "grad_norm": 1.9724262952804565, "learning_rate": 1.448941629249519e-06, "loss": 0.8328, "step": 59465 }, { "epoch": 0.7247754500140153, "grad_norm": 2.216078042984009, "learning_rate": 1.4486209108402823e-06, "loss": 0.7917, "step": 59470 }, { "epoch": 0.7248363862381632, "grad_norm": 1.7445149421691895, "learning_rate": 1.4483001924310456e-06, "loss": 0.8072, "step": 59475 }, { "epoch": 0.724897322462311, "grad_norm": 1.9482779502868652, "learning_rate": 1.447979474021809e-06, "loss": 0.8315, "step": 59480 }, { "epoch": 0.7249582586864588, "grad_norm": 1.9278192520141602, "learning_rate": 1.4476587556125724e-06, "loss": 0.863, "step": 59485 }, { "epoch": 0.7250191949106065, "grad_norm": 1.8751753568649292, "learning_rate": 1.4473380372033355e-06, "loss": 0.8339, "step": 59490 }, { "epoch": 0.7250801311347543, "grad_norm": 2.3462014198303223, "learning_rate": 1.447017318794099e-06, "loss": 0.7927, "step": 59495 }, { "epoch": 0.7251410673589022, "grad_norm": 1.821169376373291, "learning_rate": 1.4466966003848623e-06, "loss": 0.8447, "step": 59500 }, { "epoch": 0.72520200358305, "grad_norm": 1.8311755657196045, "learning_rate": 1.4463758819756254e-06, "loss": 0.861, "step": 59505 }, { "epoch": 0.7252629398071978, "grad_norm": 1.789228916168213, "learning_rate": 1.4460551635663888e-06, "loss": 0.8522, "step": 59510 }, { "epoch": 0.7253238760313456, "grad_norm": 1.7220431566238403, "learning_rate": 1.4457344451571522e-06, "loss": 0.7409, "step": 59515 }, { "epoch": 0.7253848122554934, "grad_norm": 2.143697738647461, "learning_rate": 1.4454137267479155e-06, "loss": 0.8493, "step": 59520 }, { "epoch": 0.7254457484796412, "grad_norm": 2.1022043228149414, "learning_rate": 1.445093008338679e-06, "loss": 0.7702, "step": 59525 }, { "epoch": 0.725506684703789, "grad_norm": 1.8270777463912964, "learning_rate": 1.444772289929442e-06, "loss": 0.8379, "step": 59530 }, { "epoch": 0.7255676209279368, "grad_norm": 1.9165652990341187, "learning_rate": 1.4444515715202054e-06, "loss": 0.8235, "step": 59535 }, { "epoch": 0.7256285571520846, "grad_norm": 1.7931801080703735, "learning_rate": 1.4441308531109688e-06, "loss": 0.8585, "step": 59540 }, { "epoch": 0.7256894933762325, "grad_norm": 2.3895838260650635, "learning_rate": 1.4438101347017318e-06, "loss": 0.8495, "step": 59545 }, { "epoch": 0.7257504296003803, "grad_norm": 1.7527145147323608, "learning_rate": 1.4434894162924953e-06, "loss": 0.7731, "step": 59550 }, { "epoch": 0.7258113658245281, "grad_norm": 1.760299801826477, "learning_rate": 1.4431686978832587e-06, "loss": 0.8092, "step": 59555 }, { "epoch": 0.7258723020486758, "grad_norm": 1.902235984802246, "learning_rate": 1.442847979474022e-06, "loss": 0.806, "step": 59560 }, { "epoch": 0.7259332382728236, "grad_norm": 2.1558563709259033, "learning_rate": 1.4425272610647854e-06, "loss": 0.9213, "step": 59565 }, { "epoch": 0.7259941744969715, "grad_norm": 1.7761318683624268, "learning_rate": 1.4422065426555484e-06, "loss": 0.7865, "step": 59570 }, { "epoch": 0.7260551107211193, "grad_norm": 2.1005442142486572, "learning_rate": 1.4418858242463118e-06, "loss": 0.7827, "step": 59575 }, { "epoch": 0.7261160469452671, "grad_norm": 1.7647671699523926, "learning_rate": 1.4415651058370753e-06, "loss": 0.8243, "step": 59580 }, { "epoch": 0.7261769831694149, "grad_norm": 2.2607476711273193, "learning_rate": 1.4412443874278385e-06, "loss": 0.8179, "step": 59585 }, { "epoch": 0.7262379193935627, "grad_norm": 1.8440512418746948, "learning_rate": 1.4409236690186017e-06, "loss": 0.8342, "step": 59590 }, { "epoch": 0.7262988556177105, "grad_norm": 1.7427011728286743, "learning_rate": 1.4406029506093652e-06, "loss": 0.7696, "step": 59595 }, { "epoch": 0.7263597918418583, "grad_norm": 1.7787764072418213, "learning_rate": 1.4402822322001284e-06, "loss": 0.8142, "step": 59600 }, { "epoch": 0.7264207280660061, "grad_norm": 1.8938043117523193, "learning_rate": 1.4399615137908918e-06, "loss": 0.8641, "step": 59605 }, { "epoch": 0.7264816642901539, "grad_norm": 1.8903347253799438, "learning_rate": 1.4396407953816549e-06, "loss": 0.7949, "step": 59610 }, { "epoch": 0.7265426005143018, "grad_norm": 1.9611505270004272, "learning_rate": 1.4393200769724183e-06, "loss": 0.7688, "step": 59615 }, { "epoch": 0.7266035367384496, "grad_norm": 1.6259602308273315, "learning_rate": 1.4389993585631817e-06, "loss": 0.8183, "step": 59620 }, { "epoch": 0.7266644729625974, "grad_norm": 1.9256559610366821, "learning_rate": 1.438678640153945e-06, "loss": 0.7433, "step": 59625 }, { "epoch": 0.7267254091867451, "grad_norm": 2.0991592407226562, "learning_rate": 1.4383579217447082e-06, "loss": 0.7651, "step": 59630 }, { "epoch": 0.7267863454108929, "grad_norm": 2.3171653747558594, "learning_rate": 1.4380372033354716e-06, "loss": 0.851, "step": 59635 }, { "epoch": 0.7268472816350408, "grad_norm": 1.8948791027069092, "learning_rate": 1.4377164849262349e-06, "loss": 0.7185, "step": 59640 }, { "epoch": 0.7269082178591886, "grad_norm": 2.0368199348449707, "learning_rate": 1.4373957665169983e-06, "loss": 0.8042, "step": 59645 }, { "epoch": 0.7269691540833364, "grad_norm": 2.203745126724243, "learning_rate": 1.4370750481077613e-06, "loss": 0.8476, "step": 59650 }, { "epoch": 0.7270300903074842, "grad_norm": 2.1233696937561035, "learning_rate": 1.4367543296985248e-06, "loss": 0.8835, "step": 59655 }, { "epoch": 0.727091026531632, "grad_norm": 3.1267566680908203, "learning_rate": 1.4364336112892882e-06, "loss": 0.8312, "step": 59660 }, { "epoch": 0.7271519627557798, "grad_norm": 2.273411750793457, "learning_rate": 1.4361128928800514e-06, "loss": 0.8429, "step": 59665 }, { "epoch": 0.7272128989799276, "grad_norm": 1.9642976522445679, "learning_rate": 1.4357921744708147e-06, "loss": 0.7291, "step": 59670 }, { "epoch": 0.7272738352040754, "grad_norm": 2.032576322555542, "learning_rate": 1.435471456061578e-06, "loss": 0.9262, "step": 59675 }, { "epoch": 0.7273347714282232, "grad_norm": 1.8862481117248535, "learning_rate": 1.4351507376523413e-06, "loss": 0.789, "step": 59680 }, { "epoch": 0.727395707652371, "grad_norm": 2.109161138534546, "learning_rate": 1.4348300192431048e-06, "loss": 0.8184, "step": 59685 }, { "epoch": 0.7274566438765189, "grad_norm": 1.8415005207061768, "learning_rate": 1.4345093008338678e-06, "loss": 0.8202, "step": 59690 }, { "epoch": 0.7275175801006667, "grad_norm": 2.470315933227539, "learning_rate": 1.4341885824246312e-06, "loss": 0.8562, "step": 59695 }, { "epoch": 0.7275785163248144, "grad_norm": 2.103677272796631, "learning_rate": 1.4338678640153947e-06, "loss": 0.8009, "step": 59700 }, { "epoch": 0.7276394525489622, "grad_norm": 1.8952313661575317, "learning_rate": 1.4335471456061579e-06, "loss": 0.9244, "step": 59705 }, { "epoch": 0.72770038877311, "grad_norm": 1.9557443857192993, "learning_rate": 1.4332264271969213e-06, "loss": 0.859, "step": 59710 }, { "epoch": 0.7277613249972579, "grad_norm": 1.7666912078857422, "learning_rate": 1.4329057087876846e-06, "loss": 0.819, "step": 59715 }, { "epoch": 0.7278222612214057, "grad_norm": 1.9159129858016968, "learning_rate": 1.4325849903784478e-06, "loss": 0.761, "step": 59720 }, { "epoch": 0.7278831974455535, "grad_norm": 1.8847696781158447, "learning_rate": 1.4322642719692112e-06, "loss": 0.825, "step": 59725 }, { "epoch": 0.7279441336697013, "grad_norm": 1.7781577110290527, "learning_rate": 1.4319435535599742e-06, "loss": 0.7844, "step": 59730 }, { "epoch": 0.728005069893849, "grad_norm": 2.021486759185791, "learning_rate": 1.4316228351507377e-06, "loss": 0.847, "step": 59735 }, { "epoch": 0.7280660061179969, "grad_norm": 2.0078163146972656, "learning_rate": 1.4313021167415011e-06, "loss": 0.7895, "step": 59740 }, { "epoch": 0.7281269423421447, "grad_norm": 2.0448110103607178, "learning_rate": 1.4309813983322644e-06, "loss": 0.9212, "step": 59745 }, { "epoch": 0.7281878785662925, "grad_norm": 2.0033414363861084, "learning_rate": 1.4306606799230278e-06, "loss": 0.8603, "step": 59750 }, { "epoch": 0.7282488147904403, "grad_norm": 1.8138340711593628, "learning_rate": 1.430339961513791e-06, "loss": 0.8428, "step": 59755 }, { "epoch": 0.7283097510145882, "grad_norm": 1.936225414276123, "learning_rate": 1.4300192431045542e-06, "loss": 0.8615, "step": 59760 }, { "epoch": 0.728370687238736, "grad_norm": 2.0664727687835693, "learning_rate": 1.4296985246953177e-06, "loss": 0.8249, "step": 59765 }, { "epoch": 0.7284316234628837, "grad_norm": 2.401362657546997, "learning_rate": 1.4293778062860807e-06, "loss": 0.8193, "step": 59770 }, { "epoch": 0.7284925596870315, "grad_norm": 1.7068524360656738, "learning_rate": 1.4290570878768441e-06, "loss": 0.8198, "step": 59775 }, { "epoch": 0.7285534959111793, "grad_norm": 2.7759697437286377, "learning_rate": 1.4287363694676076e-06, "loss": 0.9069, "step": 59780 }, { "epoch": 0.7286144321353272, "grad_norm": 1.9510127305984497, "learning_rate": 1.4284156510583708e-06, "loss": 0.832, "step": 59785 }, { "epoch": 0.728675368359475, "grad_norm": 2.2744154930114746, "learning_rate": 1.4280949326491343e-06, "loss": 0.9033, "step": 59790 }, { "epoch": 0.7287363045836228, "grad_norm": 2.0769269466400146, "learning_rate": 1.4277742142398977e-06, "loss": 0.8065, "step": 59795 }, { "epoch": 0.7287972408077706, "grad_norm": 2.1512720584869385, "learning_rate": 1.4274534958306607e-06, "loss": 0.7942, "step": 59800 }, { "epoch": 0.7288581770319184, "grad_norm": 1.924644112586975, "learning_rate": 1.4271327774214242e-06, "loss": 0.8738, "step": 59805 }, { "epoch": 0.7289191132560662, "grad_norm": 1.7436467409133911, "learning_rate": 1.4268120590121874e-06, "loss": 0.8307, "step": 59810 }, { "epoch": 0.728980049480214, "grad_norm": 2.2117600440979004, "learning_rate": 1.4264913406029506e-06, "loss": 0.8992, "step": 59815 }, { "epoch": 0.7290409857043618, "grad_norm": 1.9060542583465576, "learning_rate": 1.426170622193714e-06, "loss": 0.8436, "step": 59820 }, { "epoch": 0.7291019219285096, "grad_norm": 2.235309600830078, "learning_rate": 1.4258499037844773e-06, "loss": 0.7801, "step": 59825 }, { "epoch": 0.7291628581526575, "grad_norm": 1.7408603429794312, "learning_rate": 1.4255291853752407e-06, "loss": 0.7981, "step": 59830 }, { "epoch": 0.7292237943768053, "grad_norm": 2.214686155319214, "learning_rate": 1.4252084669660042e-06, "loss": 0.8409, "step": 59835 }, { "epoch": 0.729284730600953, "grad_norm": 1.8346563577651978, "learning_rate": 1.4248877485567672e-06, "loss": 0.7971, "step": 59840 }, { "epoch": 0.7293456668251008, "grad_norm": 2.010483503341675, "learning_rate": 1.4245670301475306e-06, "loss": 0.8068, "step": 59845 }, { "epoch": 0.7294066030492486, "grad_norm": 2.115633249282837, "learning_rate": 1.424246311738294e-06, "loss": 0.9485, "step": 59850 }, { "epoch": 0.7294675392733965, "grad_norm": 2.0915770530700684, "learning_rate": 1.423925593329057e-06, "loss": 0.8578, "step": 59855 }, { "epoch": 0.7295284754975443, "grad_norm": 1.8661129474639893, "learning_rate": 1.4236048749198205e-06, "loss": 0.8249, "step": 59860 }, { "epoch": 0.7295894117216921, "grad_norm": 2.1734888553619385, "learning_rate": 1.4232841565105837e-06, "loss": 0.7921, "step": 59865 }, { "epoch": 0.7296503479458399, "grad_norm": 2.2185325622558594, "learning_rate": 1.4229634381013472e-06, "loss": 0.7736, "step": 59870 }, { "epoch": 0.7297112841699877, "grad_norm": 2.0364832878112793, "learning_rate": 1.4226427196921106e-06, "loss": 0.8714, "step": 59875 }, { "epoch": 0.7297722203941355, "grad_norm": 1.8850148916244507, "learning_rate": 1.4223220012828736e-06, "loss": 0.7904, "step": 59880 }, { "epoch": 0.7298331566182833, "grad_norm": 1.7414246797561646, "learning_rate": 1.422001282873637e-06, "loss": 0.8457, "step": 59885 }, { "epoch": 0.7298940928424311, "grad_norm": 1.8367582559585571, "learning_rate": 1.4216805644644005e-06, "loss": 0.745, "step": 59890 }, { "epoch": 0.7299550290665789, "grad_norm": 2.043874979019165, "learning_rate": 1.4213598460551635e-06, "loss": 0.7589, "step": 59895 }, { "epoch": 0.7300159652907268, "grad_norm": 1.8741843700408936, "learning_rate": 1.421039127645927e-06, "loss": 0.8238, "step": 59900 }, { "epoch": 0.7300769015148746, "grad_norm": 2.2021050453186035, "learning_rate": 1.4207184092366902e-06, "loss": 0.9174, "step": 59905 }, { "epoch": 0.7301378377390223, "grad_norm": 2.0214085578918457, "learning_rate": 1.4203976908274536e-06, "loss": 0.9258, "step": 59910 }, { "epoch": 0.7301987739631701, "grad_norm": 1.761662483215332, "learning_rate": 1.420076972418217e-06, "loss": 0.8748, "step": 59915 }, { "epoch": 0.730259710187318, "grad_norm": 2.6610240936279297, "learning_rate": 1.41975625400898e-06, "loss": 0.7576, "step": 59920 }, { "epoch": 0.7303206464114658, "grad_norm": 2.0647528171539307, "learning_rate": 1.4194355355997435e-06, "loss": 0.7626, "step": 59925 }, { "epoch": 0.7303815826356136, "grad_norm": 1.964037299156189, "learning_rate": 1.419114817190507e-06, "loss": 0.7916, "step": 59930 }, { "epoch": 0.7304425188597614, "grad_norm": 2.2712976932525635, "learning_rate": 1.4187940987812702e-06, "loss": 0.8084, "step": 59935 }, { "epoch": 0.7305034550839091, "grad_norm": 2.2787437438964844, "learning_rate": 1.4184733803720334e-06, "loss": 0.8502, "step": 59940 }, { "epoch": 0.730564391308057, "grad_norm": 1.7768572568893433, "learning_rate": 1.4181526619627967e-06, "loss": 0.813, "step": 59945 }, { "epoch": 0.7306253275322048, "grad_norm": 1.8832849264144897, "learning_rate": 1.4178319435535601e-06, "loss": 0.842, "step": 59950 }, { "epoch": 0.7306862637563526, "grad_norm": 1.7756186723709106, "learning_rate": 1.4175112251443236e-06, "loss": 0.7889, "step": 59955 }, { "epoch": 0.7307471999805004, "grad_norm": 1.9194923639297485, "learning_rate": 1.4171905067350866e-06, "loss": 0.8946, "step": 59960 }, { "epoch": 0.7308081362046482, "grad_norm": 1.8732365369796753, "learning_rate": 1.41686978832585e-06, "loss": 0.7058, "step": 59965 }, { "epoch": 0.7308690724287961, "grad_norm": 2.1596992015838623, "learning_rate": 1.4165490699166134e-06, "loss": 0.7797, "step": 59970 }, { "epoch": 0.7309300086529438, "grad_norm": 2.038355827331543, "learning_rate": 1.4162283515073767e-06, "loss": 0.8223, "step": 59975 }, { "epoch": 0.7309909448770916, "grad_norm": 1.8618754148483276, "learning_rate": 1.41590763309814e-06, "loss": 0.8343, "step": 59980 }, { "epoch": 0.7310518811012394, "grad_norm": 1.9863982200622559, "learning_rate": 1.4155869146889031e-06, "loss": 0.8471, "step": 59985 }, { "epoch": 0.7311128173253872, "grad_norm": 2.32041072845459, "learning_rate": 1.4152661962796666e-06, "loss": 0.8179, "step": 59990 }, { "epoch": 0.7311737535495351, "grad_norm": 2.2512898445129395, "learning_rate": 1.41494547787043e-06, "loss": 0.7864, "step": 59995 }, { "epoch": 0.7312346897736829, "grad_norm": 2.164707899093628, "learning_rate": 1.414624759461193e-06, "loss": 0.8728, "step": 60000 }, { "epoch": 0.7312956259978307, "grad_norm": 2.060391426086426, "learning_rate": 1.4143040410519565e-06, "loss": 0.7767, "step": 60005 }, { "epoch": 0.7313565622219784, "grad_norm": 2.336700916290283, "learning_rate": 1.41398332264272e-06, "loss": 0.8121, "step": 60010 }, { "epoch": 0.7314174984461262, "grad_norm": 1.8121873140335083, "learning_rate": 1.4136626042334831e-06, "loss": 0.8525, "step": 60015 }, { "epoch": 0.7314784346702741, "grad_norm": 1.715057611465454, "learning_rate": 1.4133418858242464e-06, "loss": 0.8268, "step": 60020 }, { "epoch": 0.7315393708944219, "grad_norm": 1.968381404876709, "learning_rate": 1.4130211674150096e-06, "loss": 0.7779, "step": 60025 }, { "epoch": 0.7316003071185697, "grad_norm": 1.8229551315307617, "learning_rate": 1.412700449005773e-06, "loss": 0.8084, "step": 60030 }, { "epoch": 0.7316612433427175, "grad_norm": 2.2378993034362793, "learning_rate": 1.4123797305965365e-06, "loss": 0.8229, "step": 60035 }, { "epoch": 0.7317221795668654, "grad_norm": 1.716870665550232, "learning_rate": 1.4120590121872995e-06, "loss": 0.8521, "step": 60040 }, { "epoch": 0.7317831157910131, "grad_norm": 2.107835531234741, "learning_rate": 1.411738293778063e-06, "loss": 0.8423, "step": 60045 }, { "epoch": 0.7318440520151609, "grad_norm": 2.2385313510894775, "learning_rate": 1.4114175753688264e-06, "loss": 0.8464, "step": 60050 }, { "epoch": 0.7319049882393087, "grad_norm": 1.806329607963562, "learning_rate": 1.4110968569595896e-06, "loss": 0.7626, "step": 60055 }, { "epoch": 0.7319659244634565, "grad_norm": 2.223665714263916, "learning_rate": 1.410776138550353e-06, "loss": 0.8121, "step": 60060 }, { "epoch": 0.7320268606876044, "grad_norm": 2.2062976360321045, "learning_rate": 1.410455420141116e-06, "loss": 0.8948, "step": 60065 }, { "epoch": 0.7320877969117522, "grad_norm": 2.0474376678466797, "learning_rate": 1.4101347017318795e-06, "loss": 0.8376, "step": 60070 }, { "epoch": 0.7321487331359, "grad_norm": 2.1644933223724365, "learning_rate": 1.409813983322643e-06, "loss": 0.8454, "step": 60075 }, { "epoch": 0.7322096693600477, "grad_norm": 2.0892786979675293, "learning_rate": 1.409493264913406e-06, "loss": 0.9183, "step": 60080 }, { "epoch": 0.7322706055841955, "grad_norm": 1.7345609664916992, "learning_rate": 1.4091725465041694e-06, "loss": 0.8195, "step": 60085 }, { "epoch": 0.7323315418083434, "grad_norm": 1.9267405271530151, "learning_rate": 1.4088518280949328e-06, "loss": 0.721, "step": 60090 }, { "epoch": 0.7323924780324912, "grad_norm": 1.6760910749435425, "learning_rate": 1.408531109685696e-06, "loss": 0.7483, "step": 60095 }, { "epoch": 0.732453414256639, "grad_norm": 1.5650475025177002, "learning_rate": 1.4082103912764595e-06, "loss": 0.896, "step": 60100 }, { "epoch": 0.7325143504807868, "grad_norm": 1.9721170663833618, "learning_rate": 1.4078896728672225e-06, "loss": 0.7987, "step": 60105 }, { "epoch": 0.7325752867049347, "grad_norm": 1.888549566268921, "learning_rate": 1.407568954457986e-06, "loss": 0.8027, "step": 60110 }, { "epoch": 0.7326362229290824, "grad_norm": 1.8359739780426025, "learning_rate": 1.4072482360487494e-06, "loss": 0.7485, "step": 60115 }, { "epoch": 0.7326971591532302, "grad_norm": 1.5821515321731567, "learning_rate": 1.4069275176395124e-06, "loss": 0.7935, "step": 60120 }, { "epoch": 0.732758095377378, "grad_norm": 1.924489140510559, "learning_rate": 1.4066067992302759e-06, "loss": 0.829, "step": 60125 }, { "epoch": 0.7328190316015258, "grad_norm": 2.317410945892334, "learning_rate": 1.4062860808210393e-06, "loss": 0.7969, "step": 60130 }, { "epoch": 0.7328799678256737, "grad_norm": 1.906912922859192, "learning_rate": 1.4059653624118025e-06, "loss": 0.7925, "step": 60135 }, { "epoch": 0.7329409040498215, "grad_norm": 1.9516466856002808, "learning_rate": 1.405644644002566e-06, "loss": 0.7996, "step": 60140 }, { "epoch": 0.7330018402739693, "grad_norm": 1.9961153268814087, "learning_rate": 1.4053239255933294e-06, "loss": 0.8669, "step": 60145 }, { "epoch": 0.733062776498117, "grad_norm": 1.9768146276474, "learning_rate": 1.4050032071840924e-06, "loss": 0.8406, "step": 60150 }, { "epoch": 0.7331237127222648, "grad_norm": 1.8152923583984375, "learning_rate": 1.4046824887748559e-06, "loss": 0.7001, "step": 60155 }, { "epoch": 0.7331846489464127, "grad_norm": 2.1604676246643066, "learning_rate": 1.404361770365619e-06, "loss": 0.8694, "step": 60160 }, { "epoch": 0.7332455851705605, "grad_norm": 1.937525987625122, "learning_rate": 1.4040410519563823e-06, "loss": 0.857, "step": 60165 }, { "epoch": 0.7333065213947083, "grad_norm": 1.8771204948425293, "learning_rate": 1.4037203335471458e-06, "loss": 0.8267, "step": 60170 }, { "epoch": 0.7333674576188561, "grad_norm": 1.839439034461975, "learning_rate": 1.403399615137909e-06, "loss": 0.8464, "step": 60175 }, { "epoch": 0.733428393843004, "grad_norm": 1.6410188674926758, "learning_rate": 1.4030788967286724e-06, "loss": 0.792, "step": 60180 }, { "epoch": 0.7334893300671517, "grad_norm": 1.752280592918396, "learning_rate": 1.4027581783194359e-06, "loss": 0.7931, "step": 60185 }, { "epoch": 0.7335502662912995, "grad_norm": 2.2783446311950684, "learning_rate": 1.4024374599101989e-06, "loss": 0.8375, "step": 60190 }, { "epoch": 0.7336112025154473, "grad_norm": 1.790690541267395, "learning_rate": 1.4021167415009623e-06, "loss": 0.8504, "step": 60195 }, { "epoch": 0.7336721387395951, "grad_norm": 2.107468843460083, "learning_rate": 1.4017960230917256e-06, "loss": 0.8396, "step": 60200 }, { "epoch": 0.733733074963743, "grad_norm": 1.7567752599716187, "learning_rate": 1.4014753046824888e-06, "loss": 0.8231, "step": 60205 }, { "epoch": 0.7337940111878908, "grad_norm": 1.8830718994140625, "learning_rate": 1.4011545862732522e-06, "loss": 0.7762, "step": 60210 }, { "epoch": 0.7338549474120386, "grad_norm": 1.7821033000946045, "learning_rate": 1.4008338678640155e-06, "loss": 0.7884, "step": 60215 }, { "epoch": 0.7339158836361863, "grad_norm": 2.0042972564697266, "learning_rate": 1.400513149454779e-06, "loss": 0.7696, "step": 60220 }, { "epoch": 0.7339768198603341, "grad_norm": 2.029481887817383, "learning_rate": 1.4001924310455423e-06, "loss": 0.8676, "step": 60225 }, { "epoch": 0.734037756084482, "grad_norm": 2.314965009689331, "learning_rate": 1.3998717126363054e-06, "loss": 0.8473, "step": 60230 }, { "epoch": 0.7340986923086298, "grad_norm": 2.182689666748047, "learning_rate": 1.3995509942270688e-06, "loss": 0.798, "step": 60235 }, { "epoch": 0.7341596285327776, "grad_norm": 2.201582908630371, "learning_rate": 1.399230275817832e-06, "loss": 0.7799, "step": 60240 }, { "epoch": 0.7342205647569254, "grad_norm": 2.136090040206909, "learning_rate": 1.3989095574085953e-06, "loss": 0.8349, "step": 60245 }, { "epoch": 0.7342815009810733, "grad_norm": 2.3580105304718018, "learning_rate": 1.3985888389993587e-06, "loss": 0.827, "step": 60250 }, { "epoch": 0.734342437205221, "grad_norm": 1.9585447311401367, "learning_rate": 1.398268120590122e-06, "loss": 0.8491, "step": 60255 }, { "epoch": 0.7344033734293688, "grad_norm": 1.7611119747161865, "learning_rate": 1.3979474021808854e-06, "loss": 0.7683, "step": 60260 }, { "epoch": 0.7344643096535166, "grad_norm": 2.113089084625244, "learning_rate": 1.3976266837716488e-06, "loss": 0.8731, "step": 60265 }, { "epoch": 0.7345252458776644, "grad_norm": 1.9010823965072632, "learning_rate": 1.3973059653624118e-06, "loss": 0.8008, "step": 60270 }, { "epoch": 0.7345861821018123, "grad_norm": 1.8034449815750122, "learning_rate": 1.3969852469531753e-06, "loss": 0.8645, "step": 60275 }, { "epoch": 0.7346471183259601, "grad_norm": 2.3139138221740723, "learning_rate": 1.3966645285439385e-06, "loss": 0.89, "step": 60280 }, { "epoch": 0.7347080545501079, "grad_norm": 2.1790308952331543, "learning_rate": 1.396343810134702e-06, "loss": 0.8123, "step": 60285 }, { "epoch": 0.7347689907742556, "grad_norm": 1.780450701713562, "learning_rate": 1.3960230917254652e-06, "loss": 0.8259, "step": 60290 }, { "epoch": 0.7348299269984034, "grad_norm": 2.1178743839263916, "learning_rate": 1.3957023733162284e-06, "loss": 0.8476, "step": 60295 }, { "epoch": 0.7348908632225513, "grad_norm": 2.0845987796783447, "learning_rate": 1.3953816549069918e-06, "loss": 0.8467, "step": 60300 }, { "epoch": 0.7349517994466991, "grad_norm": 1.9992084503173828, "learning_rate": 1.3950609364977553e-06, "loss": 0.8136, "step": 60305 }, { "epoch": 0.7350127356708469, "grad_norm": 1.7756825685501099, "learning_rate": 1.3947402180885183e-06, "loss": 0.7484, "step": 60310 }, { "epoch": 0.7350736718949947, "grad_norm": 2.051669120788574, "learning_rate": 1.3944194996792817e-06, "loss": 0.9159, "step": 60315 }, { "epoch": 0.7351346081191426, "grad_norm": 1.8991382122039795, "learning_rate": 1.394098781270045e-06, "loss": 0.8292, "step": 60320 }, { "epoch": 0.7351955443432903, "grad_norm": 1.9474207162857056, "learning_rate": 1.3937780628608084e-06, "loss": 0.9022, "step": 60325 }, { "epoch": 0.7352564805674381, "grad_norm": 2.086987257003784, "learning_rate": 1.3934573444515716e-06, "loss": 0.8587, "step": 60330 }, { "epoch": 0.7353174167915859, "grad_norm": 1.7270569801330566, "learning_rate": 1.3931366260423348e-06, "loss": 0.8428, "step": 60335 }, { "epoch": 0.7353783530157337, "grad_norm": 1.7472058534622192, "learning_rate": 1.3928159076330983e-06, "loss": 0.7842, "step": 60340 }, { "epoch": 0.7354392892398816, "grad_norm": 1.9845517873764038, "learning_rate": 1.3924951892238617e-06, "loss": 0.8159, "step": 60345 }, { "epoch": 0.7355002254640294, "grad_norm": 1.8904314041137695, "learning_rate": 1.3921744708146247e-06, "loss": 0.8062, "step": 60350 }, { "epoch": 0.7355611616881772, "grad_norm": 1.8538440465927124, "learning_rate": 1.3918537524053882e-06, "loss": 0.8057, "step": 60355 }, { "epoch": 0.7356220979123249, "grad_norm": 1.7265788316726685, "learning_rate": 1.3915330339961514e-06, "loss": 0.7352, "step": 60360 }, { "epoch": 0.7356830341364727, "grad_norm": 1.7076889276504517, "learning_rate": 1.3912123155869149e-06, "loss": 0.7991, "step": 60365 }, { "epoch": 0.7357439703606206, "grad_norm": 1.804194688796997, "learning_rate": 1.390891597177678e-06, "loss": 0.7814, "step": 60370 }, { "epoch": 0.7358049065847684, "grad_norm": 1.7999413013458252, "learning_rate": 1.3905708787684413e-06, "loss": 0.8443, "step": 60375 }, { "epoch": 0.7358658428089162, "grad_norm": 2.5326995849609375, "learning_rate": 1.3902501603592048e-06, "loss": 0.8549, "step": 60380 }, { "epoch": 0.735926779033064, "grad_norm": 1.691551685333252, "learning_rate": 1.3899294419499682e-06, "loss": 0.8594, "step": 60385 }, { "epoch": 0.7359877152572118, "grad_norm": 1.8626823425292969, "learning_rate": 1.3896087235407312e-06, "loss": 0.8261, "step": 60390 }, { "epoch": 0.7360486514813596, "grad_norm": 2.0941128730773926, "learning_rate": 1.3892880051314946e-06, "loss": 0.7602, "step": 60395 }, { "epoch": 0.7361095877055074, "grad_norm": 2.090728759765625, "learning_rate": 1.3889672867222579e-06, "loss": 0.8321, "step": 60400 }, { "epoch": 0.7361705239296552, "grad_norm": 2.06493878364563, "learning_rate": 1.3886465683130213e-06, "loss": 0.8172, "step": 60405 }, { "epoch": 0.736231460153803, "grad_norm": 2.170060873031616, "learning_rate": 1.3883258499037848e-06, "loss": 0.8421, "step": 60410 }, { "epoch": 0.7362923963779509, "grad_norm": 1.7880197763442993, "learning_rate": 1.3880051314945478e-06, "loss": 0.8362, "step": 60415 }, { "epoch": 0.7363533326020987, "grad_norm": 2.0642900466918945, "learning_rate": 1.3876844130853112e-06, "loss": 0.8246, "step": 60420 }, { "epoch": 0.7364142688262465, "grad_norm": 1.8305330276489258, "learning_rate": 1.3873636946760747e-06, "loss": 0.7777, "step": 60425 }, { "epoch": 0.7364752050503942, "grad_norm": 1.9382822513580322, "learning_rate": 1.3870429762668377e-06, "loss": 0.8499, "step": 60430 }, { "epoch": 0.736536141274542, "grad_norm": 1.8982021808624268, "learning_rate": 1.3867222578576011e-06, "loss": 0.7654, "step": 60435 }, { "epoch": 0.7365970774986899, "grad_norm": 2.267705202102661, "learning_rate": 1.3864015394483646e-06, "loss": 0.7779, "step": 60440 }, { "epoch": 0.7366580137228377, "grad_norm": 1.8612569570541382, "learning_rate": 1.3860808210391278e-06, "loss": 0.8235, "step": 60445 }, { "epoch": 0.7367189499469855, "grad_norm": 1.9644914865493774, "learning_rate": 1.3857601026298912e-06, "loss": 0.7726, "step": 60450 }, { "epoch": 0.7367798861711333, "grad_norm": 1.915135145187378, "learning_rate": 1.3854393842206542e-06, "loss": 0.7998, "step": 60455 }, { "epoch": 0.7368408223952811, "grad_norm": 1.795220136642456, "learning_rate": 1.3851186658114177e-06, "loss": 0.8443, "step": 60460 }, { "epoch": 0.7369017586194289, "grad_norm": 1.6806331872940063, "learning_rate": 1.3847979474021811e-06, "loss": 0.8509, "step": 60465 }, { "epoch": 0.7369626948435767, "grad_norm": 1.6487205028533936, "learning_rate": 1.3844772289929441e-06, "loss": 0.8182, "step": 60470 }, { "epoch": 0.7370236310677245, "grad_norm": 2.045947313308716, "learning_rate": 1.3841565105837076e-06, "loss": 0.855, "step": 60475 }, { "epoch": 0.7370845672918723, "grad_norm": 1.6653205156326294, "learning_rate": 1.383835792174471e-06, "loss": 0.8198, "step": 60480 }, { "epoch": 0.7371455035160202, "grad_norm": 1.7401244640350342, "learning_rate": 1.3835150737652342e-06, "loss": 0.7992, "step": 60485 }, { "epoch": 0.737206439740168, "grad_norm": 2.1609745025634766, "learning_rate": 1.3831943553559977e-06, "loss": 0.8673, "step": 60490 }, { "epoch": 0.7372673759643158, "grad_norm": 2.2152364253997803, "learning_rate": 1.3828736369467607e-06, "loss": 0.8348, "step": 60495 }, { "epoch": 0.7373283121884635, "grad_norm": 2.4626176357269287, "learning_rate": 1.3825529185375241e-06, "loss": 0.7498, "step": 60500 }, { "epoch": 0.7373892484126113, "grad_norm": 2.1917498111724854, "learning_rate": 1.3822322001282876e-06, "loss": 0.8429, "step": 60505 }, { "epoch": 0.7374501846367592, "grad_norm": 1.797481656074524, "learning_rate": 1.3819114817190508e-06, "loss": 0.8421, "step": 60510 }, { "epoch": 0.737511120860907, "grad_norm": 3.0606167316436768, "learning_rate": 1.381590763309814e-06, "loss": 0.8245, "step": 60515 }, { "epoch": 0.7375720570850548, "grad_norm": 1.9462676048278809, "learning_rate": 1.3812700449005775e-06, "loss": 0.8192, "step": 60520 }, { "epoch": 0.7376329933092026, "grad_norm": 1.7692674398422241, "learning_rate": 1.3809493264913407e-06, "loss": 0.8602, "step": 60525 }, { "epoch": 0.7376939295333504, "grad_norm": 1.7978425025939941, "learning_rate": 1.3806286080821041e-06, "loss": 0.8432, "step": 60530 }, { "epoch": 0.7377548657574982, "grad_norm": 1.9593526124954224, "learning_rate": 1.3803078896728672e-06, "loss": 0.8526, "step": 60535 }, { "epoch": 0.737815801981646, "grad_norm": 1.865776777267456, "learning_rate": 1.3799871712636306e-06, "loss": 0.7656, "step": 60540 }, { "epoch": 0.7378767382057938, "grad_norm": 1.8516294956207275, "learning_rate": 1.379666452854394e-06, "loss": 0.7245, "step": 60545 }, { "epoch": 0.7379376744299416, "grad_norm": 1.948732852935791, "learning_rate": 1.3793457344451573e-06, "loss": 0.7997, "step": 60550 }, { "epoch": 0.7379986106540894, "grad_norm": 1.803027868270874, "learning_rate": 1.3790250160359205e-06, "loss": 0.826, "step": 60555 }, { "epoch": 0.7380595468782373, "grad_norm": 1.9703500270843506, "learning_rate": 1.378704297626684e-06, "loss": 0.7905, "step": 60560 }, { "epoch": 0.7381204831023851, "grad_norm": 1.7452462911605835, "learning_rate": 1.3783835792174472e-06, "loss": 0.8121, "step": 60565 }, { "epoch": 0.7381814193265328, "grad_norm": 2.017023801803589, "learning_rate": 1.3780628608082106e-06, "loss": 0.8366, "step": 60570 }, { "epoch": 0.7382423555506806, "grad_norm": 2.0441253185272217, "learning_rate": 1.3777421423989736e-06, "loss": 0.8345, "step": 60575 }, { "epoch": 0.7383032917748285, "grad_norm": 1.9776746034622192, "learning_rate": 1.377421423989737e-06, "loss": 0.7795, "step": 60580 }, { "epoch": 0.7383642279989763, "grad_norm": 2.2386696338653564, "learning_rate": 1.3771007055805005e-06, "loss": 0.8004, "step": 60585 }, { "epoch": 0.7384251642231241, "grad_norm": 2.3605105876922607, "learning_rate": 1.3767799871712637e-06, "loss": 0.8516, "step": 60590 }, { "epoch": 0.7384861004472719, "grad_norm": 1.9220820665359497, "learning_rate": 1.376459268762027e-06, "loss": 0.809, "step": 60595 }, { "epoch": 0.7385470366714197, "grad_norm": 1.7198795080184937, "learning_rate": 1.3761385503527904e-06, "loss": 0.7627, "step": 60600 }, { "epoch": 0.7386079728955675, "grad_norm": 2.265780448913574, "learning_rate": 1.3758178319435536e-06, "loss": 0.7373, "step": 60605 }, { "epoch": 0.7386689091197153, "grad_norm": 1.7226685285568237, "learning_rate": 1.375497113534317e-06, "loss": 0.7857, "step": 60610 }, { "epoch": 0.7387298453438631, "grad_norm": 2.0815365314483643, "learning_rate": 1.37517639512508e-06, "loss": 0.7605, "step": 60615 }, { "epoch": 0.7387907815680109, "grad_norm": 1.8643248081207275, "learning_rate": 1.3748556767158435e-06, "loss": 0.7652, "step": 60620 }, { "epoch": 0.7388517177921587, "grad_norm": 1.9118446111679077, "learning_rate": 1.374534958306607e-06, "loss": 0.8772, "step": 60625 }, { "epoch": 0.7389126540163066, "grad_norm": 1.9710568189620972, "learning_rate": 1.3742142398973702e-06, "loss": 0.82, "step": 60630 }, { "epoch": 0.7389735902404544, "grad_norm": 1.7194199562072754, "learning_rate": 1.3738935214881336e-06, "loss": 0.8187, "step": 60635 }, { "epoch": 0.7390345264646021, "grad_norm": 1.9347357749938965, "learning_rate": 1.3735728030788969e-06, "loss": 0.7481, "step": 60640 }, { "epoch": 0.7390954626887499, "grad_norm": 1.7957971096038818, "learning_rate": 1.37325208466966e-06, "loss": 0.7732, "step": 60645 }, { "epoch": 0.7391563989128977, "grad_norm": 1.9699028730392456, "learning_rate": 1.3729313662604235e-06, "loss": 0.8496, "step": 60650 }, { "epoch": 0.7392173351370456, "grad_norm": 1.6311103105545044, "learning_rate": 1.3726106478511866e-06, "loss": 0.8195, "step": 60655 }, { "epoch": 0.7392782713611934, "grad_norm": 2.262430429458618, "learning_rate": 1.37228992944195e-06, "loss": 0.8308, "step": 60660 }, { "epoch": 0.7393392075853412, "grad_norm": 1.9300189018249512, "learning_rate": 1.3719692110327134e-06, "loss": 0.7365, "step": 60665 }, { "epoch": 0.739400143809489, "grad_norm": 1.569970965385437, "learning_rate": 1.3716484926234767e-06, "loss": 0.7509, "step": 60670 }, { "epoch": 0.7394610800336368, "grad_norm": 1.7936530113220215, "learning_rate": 1.3713277742142401e-06, "loss": 0.7443, "step": 60675 }, { "epoch": 0.7395220162577846, "grad_norm": 1.9359204769134521, "learning_rate": 1.3710070558050033e-06, "loss": 0.7456, "step": 60680 }, { "epoch": 0.7395829524819324, "grad_norm": 1.956742286682129, "learning_rate": 1.3706863373957666e-06, "loss": 0.7832, "step": 60685 }, { "epoch": 0.7396438887060802, "grad_norm": 1.6071388721466064, "learning_rate": 1.37036561898653e-06, "loss": 0.7914, "step": 60690 }, { "epoch": 0.739704824930228, "grad_norm": 2.020033597946167, "learning_rate": 1.370044900577293e-06, "loss": 0.8416, "step": 60695 }, { "epoch": 0.7397657611543759, "grad_norm": 1.9166765213012695, "learning_rate": 1.3697241821680565e-06, "loss": 0.7787, "step": 60700 }, { "epoch": 0.7398266973785237, "grad_norm": 1.831972360610962, "learning_rate": 1.36940346375882e-06, "loss": 0.7432, "step": 60705 }, { "epoch": 0.7398876336026714, "grad_norm": 2.018664598464966, "learning_rate": 1.3690827453495831e-06, "loss": 0.8996, "step": 60710 }, { "epoch": 0.7399485698268192, "grad_norm": 1.880078673362732, "learning_rate": 1.3687620269403466e-06, "loss": 0.8031, "step": 60715 }, { "epoch": 0.740009506050967, "grad_norm": 1.768462896347046, "learning_rate": 1.3684413085311098e-06, "loss": 0.804, "step": 60720 }, { "epoch": 0.7400704422751149, "grad_norm": 1.8944993019104004, "learning_rate": 1.368120590121873e-06, "loss": 0.841, "step": 60725 }, { "epoch": 0.7401313784992627, "grad_norm": 1.7850124835968018, "learning_rate": 1.3677998717126365e-06, "loss": 0.7946, "step": 60730 }, { "epoch": 0.7401923147234105, "grad_norm": 1.6553164720535278, "learning_rate": 1.3674791533034e-06, "loss": 0.7944, "step": 60735 }, { "epoch": 0.7402532509475583, "grad_norm": 2.7673263549804688, "learning_rate": 1.367158434894163e-06, "loss": 0.8322, "step": 60740 }, { "epoch": 0.740314187171706, "grad_norm": 2.7061924934387207, "learning_rate": 1.3668377164849264e-06, "loss": 0.7833, "step": 60745 }, { "epoch": 0.7403751233958539, "grad_norm": 2.419049024581909, "learning_rate": 1.3665169980756896e-06, "loss": 0.8503, "step": 60750 }, { "epoch": 0.7404360596200017, "grad_norm": 1.7669435739517212, "learning_rate": 1.366196279666453e-06, "loss": 0.7543, "step": 60755 }, { "epoch": 0.7404969958441495, "grad_norm": 1.9588993787765503, "learning_rate": 1.3658755612572165e-06, "loss": 0.7573, "step": 60760 }, { "epoch": 0.7405579320682973, "grad_norm": 2.2685904502868652, "learning_rate": 1.3655548428479795e-06, "loss": 0.7984, "step": 60765 }, { "epoch": 0.7406188682924452, "grad_norm": 1.925361156463623, "learning_rate": 1.365234124438743e-06, "loss": 0.9289, "step": 60770 }, { "epoch": 0.740679804516593, "grad_norm": 1.8194090127944946, "learning_rate": 1.3649134060295064e-06, "loss": 0.8138, "step": 60775 }, { "epoch": 0.7407407407407407, "grad_norm": 1.8004541397094727, "learning_rate": 1.3645926876202694e-06, "loss": 0.7898, "step": 60780 }, { "epoch": 0.7408016769648885, "grad_norm": 1.8783776760101318, "learning_rate": 1.3642719692110328e-06, "loss": 0.909, "step": 60785 }, { "epoch": 0.7408626131890363, "grad_norm": 2.3202528953552246, "learning_rate": 1.363951250801796e-06, "loss": 0.9045, "step": 60790 }, { "epoch": 0.7409235494131842, "grad_norm": 1.9468317031860352, "learning_rate": 1.3636305323925595e-06, "loss": 0.7727, "step": 60795 }, { "epoch": 0.740984485637332, "grad_norm": 2.0248520374298096, "learning_rate": 1.363309813983323e-06, "loss": 0.7932, "step": 60800 }, { "epoch": 0.7410454218614798, "grad_norm": 1.8828761577606201, "learning_rate": 1.362989095574086e-06, "loss": 0.7819, "step": 60805 }, { "epoch": 0.7411063580856276, "grad_norm": 1.9662730693817139, "learning_rate": 1.3626683771648494e-06, "loss": 0.837, "step": 60810 }, { "epoch": 0.7411672943097753, "grad_norm": 2.807849407196045, "learning_rate": 1.3623476587556128e-06, "loss": 0.8491, "step": 60815 }, { "epoch": 0.7412282305339232, "grad_norm": 2.1470375061035156, "learning_rate": 1.3620269403463759e-06, "loss": 0.8635, "step": 60820 }, { "epoch": 0.741289166758071, "grad_norm": 1.9301533699035645, "learning_rate": 1.3617062219371393e-06, "loss": 0.7681, "step": 60825 }, { "epoch": 0.7413501029822188, "grad_norm": 1.9597655534744263, "learning_rate": 1.3613855035279025e-06, "loss": 0.7651, "step": 60830 }, { "epoch": 0.7414110392063666, "grad_norm": 2.1702983379364014, "learning_rate": 1.361064785118666e-06, "loss": 0.8476, "step": 60835 }, { "epoch": 0.7414719754305145, "grad_norm": 1.957263469696045, "learning_rate": 1.3607440667094294e-06, "loss": 0.8116, "step": 60840 }, { "epoch": 0.7415329116546623, "grad_norm": 1.815290927886963, "learning_rate": 1.3604233483001924e-06, "loss": 0.7974, "step": 60845 }, { "epoch": 0.74159384787881, "grad_norm": 1.961964726448059, "learning_rate": 1.3601026298909559e-06, "loss": 0.8871, "step": 60850 }, { "epoch": 0.7416547841029578, "grad_norm": 1.674619197845459, "learning_rate": 1.3597819114817193e-06, "loss": 0.7894, "step": 60855 }, { "epoch": 0.7417157203271056, "grad_norm": 1.936191439628601, "learning_rate": 1.3594611930724825e-06, "loss": 0.8005, "step": 60860 }, { "epoch": 0.7417766565512535, "grad_norm": 2.102827310562134, "learning_rate": 1.3591404746632458e-06, "loss": 0.8634, "step": 60865 }, { "epoch": 0.7418375927754013, "grad_norm": 2.015963554382324, "learning_rate": 1.358819756254009e-06, "loss": 0.817, "step": 60870 }, { "epoch": 0.7418985289995491, "grad_norm": 1.9417366981506348, "learning_rate": 1.3584990378447724e-06, "loss": 0.808, "step": 60875 }, { "epoch": 0.7419594652236968, "grad_norm": 1.8750818967819214, "learning_rate": 1.3581783194355359e-06, "loss": 0.7865, "step": 60880 }, { "epoch": 0.7420204014478446, "grad_norm": 1.8237110376358032, "learning_rate": 1.3578576010262989e-06, "loss": 0.7009, "step": 60885 }, { "epoch": 0.7420813376719925, "grad_norm": 1.9596444368362427, "learning_rate": 1.3575368826170623e-06, "loss": 0.8254, "step": 60890 }, { "epoch": 0.7421422738961403, "grad_norm": 1.926351547241211, "learning_rate": 1.3572161642078258e-06, "loss": 0.8155, "step": 60895 }, { "epoch": 0.7422032101202881, "grad_norm": 2.05839204788208, "learning_rate": 1.356895445798589e-06, "loss": 0.8049, "step": 60900 }, { "epoch": 0.7422641463444359, "grad_norm": 1.72355055809021, "learning_rate": 1.3565747273893522e-06, "loss": 0.8369, "step": 60905 }, { "epoch": 0.7423250825685838, "grad_norm": 1.8007054328918457, "learning_rate": 1.3562540089801154e-06, "loss": 0.8603, "step": 60910 }, { "epoch": 0.7423860187927315, "grad_norm": 2.2257487773895264, "learning_rate": 1.3559332905708789e-06, "loss": 0.7867, "step": 60915 }, { "epoch": 0.7424469550168793, "grad_norm": 1.675079345703125, "learning_rate": 1.3556125721616423e-06, "loss": 0.8383, "step": 60920 }, { "epoch": 0.7425078912410271, "grad_norm": 1.8061761856079102, "learning_rate": 1.3552918537524053e-06, "loss": 0.774, "step": 60925 }, { "epoch": 0.7425688274651749, "grad_norm": 2.6724324226379395, "learning_rate": 1.3549711353431688e-06, "loss": 0.8059, "step": 60930 }, { "epoch": 0.7426297636893228, "grad_norm": 2.052997350692749, "learning_rate": 1.3546504169339322e-06, "loss": 0.8407, "step": 60935 }, { "epoch": 0.7426906999134706, "grad_norm": 1.9819458723068237, "learning_rate": 1.3543296985246955e-06, "loss": 0.855, "step": 60940 }, { "epoch": 0.7427516361376184, "grad_norm": 1.6830066442489624, "learning_rate": 1.3540089801154587e-06, "loss": 0.8633, "step": 60945 }, { "epoch": 0.7428125723617661, "grad_norm": 2.1836659908294678, "learning_rate": 1.353688261706222e-06, "loss": 0.8689, "step": 60950 }, { "epoch": 0.7428735085859139, "grad_norm": 2.4961330890655518, "learning_rate": 1.3533675432969854e-06, "loss": 0.8165, "step": 60955 }, { "epoch": 0.7429344448100618, "grad_norm": 2.114297389984131, "learning_rate": 1.3530468248877488e-06, "loss": 0.8808, "step": 60960 }, { "epoch": 0.7429953810342096, "grad_norm": 2.06319522857666, "learning_rate": 1.3527261064785118e-06, "loss": 0.8796, "step": 60965 }, { "epoch": 0.7430563172583574, "grad_norm": 1.6126433610916138, "learning_rate": 1.3524053880692752e-06, "loss": 0.8206, "step": 60970 }, { "epoch": 0.7431172534825052, "grad_norm": 1.8881968259811401, "learning_rate": 1.3520846696600387e-06, "loss": 0.8628, "step": 60975 }, { "epoch": 0.7431781897066531, "grad_norm": 2.1511764526367188, "learning_rate": 1.351763951250802e-06, "loss": 0.8395, "step": 60980 }, { "epoch": 0.7432391259308008, "grad_norm": 1.6295682191848755, "learning_rate": 1.3514432328415654e-06, "loss": 0.7338, "step": 60985 }, { "epoch": 0.7433000621549486, "grad_norm": 2.0654149055480957, "learning_rate": 1.3511225144323284e-06, "loss": 0.7694, "step": 60990 }, { "epoch": 0.7433609983790964, "grad_norm": 2.005711555480957, "learning_rate": 1.3508017960230918e-06, "loss": 0.8261, "step": 60995 }, { "epoch": 0.7434219346032442, "grad_norm": 1.8853516578674316, "learning_rate": 1.3504810776138553e-06, "loss": 0.7784, "step": 61000 }, { "epoch": 0.7434828708273921, "grad_norm": 1.706517219543457, "learning_rate": 1.3501603592046183e-06, "loss": 0.7575, "step": 61005 }, { "epoch": 0.7435438070515399, "grad_norm": 2.0328481197357178, "learning_rate": 1.3498396407953817e-06, "loss": 0.7929, "step": 61010 }, { "epoch": 0.7436047432756877, "grad_norm": 1.60240638256073, "learning_rate": 1.3495189223861452e-06, "loss": 0.8652, "step": 61015 }, { "epoch": 0.7436656794998354, "grad_norm": 2.0845160484313965, "learning_rate": 1.3491982039769084e-06, "loss": 0.834, "step": 61020 }, { "epoch": 0.7437266157239832, "grad_norm": 2.0081844329833984, "learning_rate": 1.3488774855676718e-06, "loss": 0.8099, "step": 61025 }, { "epoch": 0.7437875519481311, "grad_norm": 1.829818606376648, "learning_rate": 1.348556767158435e-06, "loss": 0.8054, "step": 61030 }, { "epoch": 0.7438484881722789, "grad_norm": 2.022913694381714, "learning_rate": 1.3482360487491983e-06, "loss": 0.8316, "step": 61035 }, { "epoch": 0.7439094243964267, "grad_norm": 1.7779395580291748, "learning_rate": 1.3479153303399617e-06, "loss": 0.7591, "step": 61040 }, { "epoch": 0.7439703606205745, "grad_norm": 2.144845485687256, "learning_rate": 1.3475946119307247e-06, "loss": 0.8167, "step": 61045 }, { "epoch": 0.7440312968447224, "grad_norm": 2.2502503395080566, "learning_rate": 1.3472738935214882e-06, "loss": 0.7807, "step": 61050 }, { "epoch": 0.7440922330688701, "grad_norm": 1.8246405124664307, "learning_rate": 1.3469531751122516e-06, "loss": 0.7756, "step": 61055 }, { "epoch": 0.7441531692930179, "grad_norm": 1.6382648944854736, "learning_rate": 1.3466324567030148e-06, "loss": 0.8337, "step": 61060 }, { "epoch": 0.7442141055171657, "grad_norm": 1.8221741914749146, "learning_rate": 1.3463117382937783e-06, "loss": 0.807, "step": 61065 }, { "epoch": 0.7442750417413135, "grad_norm": 2.128478765487671, "learning_rate": 1.3459910198845415e-06, "loss": 0.8405, "step": 61070 }, { "epoch": 0.7443359779654614, "grad_norm": 1.804455280303955, "learning_rate": 1.3456703014753047e-06, "loss": 0.7945, "step": 61075 }, { "epoch": 0.7443969141896092, "grad_norm": 1.829723834991455, "learning_rate": 1.3453495830660682e-06, "loss": 0.8118, "step": 61080 }, { "epoch": 0.744457850413757, "grad_norm": 2.2628767490386963, "learning_rate": 1.3450288646568312e-06, "loss": 0.826, "step": 61085 }, { "epoch": 0.7445187866379047, "grad_norm": 2.241823434829712, "learning_rate": 1.3447081462475946e-06, "loss": 0.7548, "step": 61090 }, { "epoch": 0.7445797228620525, "grad_norm": 2.6068952083587646, "learning_rate": 1.344387427838358e-06, "loss": 0.7977, "step": 61095 }, { "epoch": 0.7446406590862004, "grad_norm": 2.1025259494781494, "learning_rate": 1.3440667094291213e-06, "loss": 0.8596, "step": 61100 }, { "epoch": 0.7447015953103482, "grad_norm": 2.019489049911499, "learning_rate": 1.3437459910198847e-06, "loss": 0.8052, "step": 61105 }, { "epoch": 0.744762531534496, "grad_norm": 2.2767977714538574, "learning_rate": 1.3434252726106482e-06, "loss": 0.8133, "step": 61110 }, { "epoch": 0.7448234677586438, "grad_norm": 1.699639081954956, "learning_rate": 1.3431045542014112e-06, "loss": 0.8311, "step": 61115 }, { "epoch": 0.7448844039827917, "grad_norm": 1.8541921377182007, "learning_rate": 1.3427838357921746e-06, "loss": 0.8998, "step": 61120 }, { "epoch": 0.7449453402069394, "grad_norm": 1.9394534826278687, "learning_rate": 1.3424631173829379e-06, "loss": 0.8334, "step": 61125 }, { "epoch": 0.7450062764310872, "grad_norm": 1.924345850944519, "learning_rate": 1.342142398973701e-06, "loss": 0.7702, "step": 61130 }, { "epoch": 0.745067212655235, "grad_norm": 2.4276981353759766, "learning_rate": 1.3418216805644645e-06, "loss": 0.8111, "step": 61135 }, { "epoch": 0.7451281488793828, "grad_norm": 1.9454196691513062, "learning_rate": 1.3415009621552278e-06, "loss": 0.7598, "step": 61140 }, { "epoch": 0.7451890851035307, "grad_norm": 2.131795644760132, "learning_rate": 1.3411802437459912e-06, "loss": 0.83, "step": 61145 }, { "epoch": 0.7452500213276785, "grad_norm": 2.171393632888794, "learning_rate": 1.3408595253367547e-06, "loss": 0.8518, "step": 61150 }, { "epoch": 0.7453109575518263, "grad_norm": 1.9505702257156372, "learning_rate": 1.3405388069275177e-06, "loss": 0.8379, "step": 61155 }, { "epoch": 0.745371893775974, "grad_norm": 1.8809400796890259, "learning_rate": 1.3402180885182811e-06, "loss": 0.834, "step": 61160 }, { "epoch": 0.7454328300001218, "grad_norm": 2.1000776290893555, "learning_rate": 1.3398973701090443e-06, "loss": 0.7737, "step": 61165 }, { "epoch": 0.7454937662242697, "grad_norm": 1.8332387208938599, "learning_rate": 1.3395766516998076e-06, "loss": 0.8588, "step": 61170 }, { "epoch": 0.7455547024484175, "grad_norm": 2.8059771060943604, "learning_rate": 1.339255933290571e-06, "loss": 0.874, "step": 61175 }, { "epoch": 0.7456156386725653, "grad_norm": 2.2103769779205322, "learning_rate": 1.3389352148813342e-06, "loss": 0.8128, "step": 61180 }, { "epoch": 0.7456765748967131, "grad_norm": 1.9307140111923218, "learning_rate": 1.3386144964720977e-06, "loss": 0.8694, "step": 61185 }, { "epoch": 0.745737511120861, "grad_norm": 1.9561611413955688, "learning_rate": 1.3382937780628611e-06, "loss": 0.7959, "step": 61190 }, { "epoch": 0.7457984473450087, "grad_norm": 1.8357939720153809, "learning_rate": 1.3379730596536241e-06, "loss": 0.7342, "step": 61195 }, { "epoch": 0.7458593835691565, "grad_norm": 1.926099181175232, "learning_rate": 1.3376523412443876e-06, "loss": 0.8016, "step": 61200 }, { "epoch": 0.7459203197933043, "grad_norm": 2.062563419342041, "learning_rate": 1.3373316228351508e-06, "loss": 0.8456, "step": 61205 }, { "epoch": 0.7459812560174521, "grad_norm": 2.154473304748535, "learning_rate": 1.3370109044259142e-06, "loss": 0.8235, "step": 61210 }, { "epoch": 0.7460421922416, "grad_norm": 2.3371877670288086, "learning_rate": 1.3366901860166775e-06, "loss": 0.9047, "step": 61215 }, { "epoch": 0.7461031284657478, "grad_norm": 1.7325736284255981, "learning_rate": 1.3363694676074407e-06, "loss": 0.734, "step": 61220 }, { "epoch": 0.7461640646898956, "grad_norm": 2.1396408081054688, "learning_rate": 1.3360487491982041e-06, "loss": 0.8798, "step": 61225 }, { "epoch": 0.7462250009140433, "grad_norm": 1.7938145399093628, "learning_rate": 1.3357280307889676e-06, "loss": 0.8367, "step": 61230 }, { "epoch": 0.7462859371381911, "grad_norm": 1.9822911024093628, "learning_rate": 1.3354073123797306e-06, "loss": 0.8197, "step": 61235 }, { "epoch": 0.746346873362339, "grad_norm": 2.1676008701324463, "learning_rate": 1.335086593970494e-06, "loss": 0.8561, "step": 61240 }, { "epoch": 0.7464078095864868, "grad_norm": 2.2527272701263428, "learning_rate": 1.3347658755612573e-06, "loss": 0.8263, "step": 61245 }, { "epoch": 0.7464687458106346, "grad_norm": 2.315279483795166, "learning_rate": 1.3344451571520207e-06, "loss": 0.8267, "step": 61250 }, { "epoch": 0.7465296820347824, "grad_norm": 1.9234389066696167, "learning_rate": 1.334124438742784e-06, "loss": 0.8076, "step": 61255 }, { "epoch": 0.7465906182589302, "grad_norm": 1.6944398880004883, "learning_rate": 1.3338037203335472e-06, "loss": 0.8046, "step": 61260 }, { "epoch": 0.746651554483078, "grad_norm": 1.9615540504455566, "learning_rate": 1.3334830019243106e-06, "loss": 0.8369, "step": 61265 }, { "epoch": 0.7467124907072258, "grad_norm": 3.405982255935669, "learning_rate": 1.333162283515074e-06, "loss": 0.7943, "step": 61270 }, { "epoch": 0.7467734269313736, "grad_norm": 2.3637139797210693, "learning_rate": 1.332841565105837e-06, "loss": 0.7939, "step": 61275 }, { "epoch": 0.7468343631555214, "grad_norm": 1.9455939531326294, "learning_rate": 1.3325208466966005e-06, "loss": 0.8728, "step": 61280 }, { "epoch": 0.7468952993796693, "grad_norm": 1.6781718730926514, "learning_rate": 1.3322001282873637e-06, "loss": 0.8097, "step": 61285 }, { "epoch": 0.7469562356038171, "grad_norm": 1.8795744180679321, "learning_rate": 1.3318794098781272e-06, "loss": 0.7489, "step": 61290 }, { "epoch": 0.7470171718279649, "grad_norm": 1.8349130153656006, "learning_rate": 1.3315586914688904e-06, "loss": 0.7882, "step": 61295 }, { "epoch": 0.7470781080521126, "grad_norm": 2.0075371265411377, "learning_rate": 1.3312379730596536e-06, "loss": 0.7805, "step": 61300 }, { "epoch": 0.7471390442762604, "grad_norm": 2.0843279361724854, "learning_rate": 1.330917254650417e-06, "loss": 0.7847, "step": 61305 }, { "epoch": 0.7471999805004083, "grad_norm": 1.7348918914794922, "learning_rate": 1.3305965362411805e-06, "loss": 0.8591, "step": 61310 }, { "epoch": 0.7472609167245561, "grad_norm": 2.388806104660034, "learning_rate": 1.3302758178319435e-06, "loss": 0.7556, "step": 61315 }, { "epoch": 0.7473218529487039, "grad_norm": 1.7771834135055542, "learning_rate": 1.329955099422707e-06, "loss": 0.7607, "step": 61320 }, { "epoch": 0.7473827891728517, "grad_norm": 1.7995651960372925, "learning_rate": 1.3296343810134702e-06, "loss": 0.8364, "step": 61325 }, { "epoch": 0.7474437253969995, "grad_norm": 1.6838880777359009, "learning_rate": 1.3293136626042336e-06, "loss": 0.8185, "step": 61330 }, { "epoch": 0.7475046616211473, "grad_norm": 1.9313960075378418, "learning_rate": 1.328992944194997e-06, "loss": 0.893, "step": 61335 }, { "epoch": 0.7475655978452951, "grad_norm": 1.8657175302505493, "learning_rate": 1.32867222578576e-06, "loss": 0.7962, "step": 61340 }, { "epoch": 0.7476265340694429, "grad_norm": 2.0177719593048096, "learning_rate": 1.3283515073765235e-06, "loss": 0.7791, "step": 61345 }, { "epoch": 0.7476874702935907, "grad_norm": 2.017685890197754, "learning_rate": 1.328030788967287e-06, "loss": 0.8414, "step": 61350 }, { "epoch": 0.7477484065177386, "grad_norm": 1.9203286170959473, "learning_rate": 1.32771007055805e-06, "loss": 0.8148, "step": 61355 }, { "epoch": 0.7478093427418864, "grad_norm": 2.30593204498291, "learning_rate": 1.3273893521488134e-06, "loss": 0.842, "step": 61360 }, { "epoch": 0.7478702789660342, "grad_norm": 1.694484829902649, "learning_rate": 1.3270686337395769e-06, "loss": 0.7937, "step": 61365 }, { "epoch": 0.7479312151901819, "grad_norm": 1.9213858842849731, "learning_rate": 1.32674791533034e-06, "loss": 0.8775, "step": 61370 }, { "epoch": 0.7479921514143297, "grad_norm": 1.9239790439605713, "learning_rate": 1.3264271969211035e-06, "loss": 0.799, "step": 61375 }, { "epoch": 0.7480530876384776, "grad_norm": 1.4777308702468872, "learning_rate": 1.3261064785118666e-06, "loss": 0.745, "step": 61380 }, { "epoch": 0.7481140238626254, "grad_norm": 1.8301647901535034, "learning_rate": 1.32578576010263e-06, "loss": 0.8014, "step": 61385 }, { "epoch": 0.7481749600867732, "grad_norm": 1.7888689041137695, "learning_rate": 1.3254650416933934e-06, "loss": 0.7473, "step": 61390 }, { "epoch": 0.748235896310921, "grad_norm": 1.890734076499939, "learning_rate": 1.3251443232841565e-06, "loss": 0.7746, "step": 61395 }, { "epoch": 0.7482968325350688, "grad_norm": 2.074004888534546, "learning_rate": 1.3248236048749199e-06, "loss": 0.8623, "step": 61400 }, { "epoch": 0.7483577687592166, "grad_norm": 2.009535551071167, "learning_rate": 1.3245028864656833e-06, "loss": 0.8085, "step": 61405 }, { "epoch": 0.7484187049833644, "grad_norm": 1.7055331468582153, "learning_rate": 1.3241821680564466e-06, "loss": 0.8091, "step": 61410 }, { "epoch": 0.7484796412075122, "grad_norm": 1.817762017250061, "learning_rate": 1.32386144964721e-06, "loss": 0.7891, "step": 61415 }, { "epoch": 0.74854057743166, "grad_norm": 1.9287854433059692, "learning_rate": 1.323540731237973e-06, "loss": 0.8002, "step": 61420 }, { "epoch": 0.7486015136558078, "grad_norm": 1.644722819328308, "learning_rate": 1.3232200128287365e-06, "loss": 0.7651, "step": 61425 }, { "epoch": 0.7486624498799557, "grad_norm": 1.8383010625839233, "learning_rate": 1.3228992944195e-06, "loss": 0.8589, "step": 61430 }, { "epoch": 0.7487233861041035, "grad_norm": 2.0041210651397705, "learning_rate": 1.322578576010263e-06, "loss": 0.8591, "step": 61435 }, { "epoch": 0.7487843223282512, "grad_norm": 1.8930821418762207, "learning_rate": 1.3222578576010264e-06, "loss": 0.8579, "step": 61440 }, { "epoch": 0.748845258552399, "grad_norm": 2.410187244415283, "learning_rate": 1.3219371391917898e-06, "loss": 0.8567, "step": 61445 }, { "epoch": 0.7489061947765469, "grad_norm": 2.077029228210449, "learning_rate": 1.321616420782553e-06, "loss": 0.8477, "step": 61450 }, { "epoch": 0.7489671310006947, "grad_norm": 2.054358720779419, "learning_rate": 1.3212957023733165e-06, "loss": 0.8805, "step": 61455 }, { "epoch": 0.7490280672248425, "grad_norm": 1.8211551904678345, "learning_rate": 1.3209749839640795e-06, "loss": 0.7709, "step": 61460 }, { "epoch": 0.7490890034489903, "grad_norm": 2.1775383949279785, "learning_rate": 1.320654265554843e-06, "loss": 0.8475, "step": 61465 }, { "epoch": 0.7491499396731381, "grad_norm": 2.0337061882019043, "learning_rate": 1.3203335471456064e-06, "loss": 0.7487, "step": 61470 }, { "epoch": 0.7492108758972859, "grad_norm": 2.081373929977417, "learning_rate": 1.3200128287363696e-06, "loss": 0.845, "step": 61475 }, { "epoch": 0.7492718121214337, "grad_norm": 2.041383743286133, "learning_rate": 1.3196921103271328e-06, "loss": 0.7669, "step": 61480 }, { "epoch": 0.7493327483455815, "grad_norm": 1.9162812232971191, "learning_rate": 1.3193713919178963e-06, "loss": 0.8692, "step": 61485 }, { "epoch": 0.7493936845697293, "grad_norm": 1.94984769821167, "learning_rate": 1.3190506735086595e-06, "loss": 0.8066, "step": 61490 }, { "epoch": 0.7494546207938771, "grad_norm": 2.0345046520233154, "learning_rate": 1.318729955099423e-06, "loss": 0.7946, "step": 61495 }, { "epoch": 0.749515557018025, "grad_norm": 1.6866304874420166, "learning_rate": 1.318409236690186e-06, "loss": 0.8649, "step": 61500 }, { "epoch": 0.7495764932421728, "grad_norm": 2.135105609893799, "learning_rate": 1.3180885182809494e-06, "loss": 0.8738, "step": 61505 }, { "epoch": 0.7496374294663205, "grad_norm": 1.9880049228668213, "learning_rate": 1.3177677998717128e-06, "loss": 0.8324, "step": 61510 }, { "epoch": 0.7496983656904683, "grad_norm": 1.7711620330810547, "learning_rate": 1.317447081462476e-06, "loss": 0.7934, "step": 61515 }, { "epoch": 0.7497593019146161, "grad_norm": 1.9418003559112549, "learning_rate": 1.3171263630532393e-06, "loss": 0.7943, "step": 61520 }, { "epoch": 0.749820238138764, "grad_norm": 2.164428472518921, "learning_rate": 1.3168056446440027e-06, "loss": 0.8175, "step": 61525 }, { "epoch": 0.7498811743629118, "grad_norm": 1.9685472249984741, "learning_rate": 1.316484926234766e-06, "loss": 0.8028, "step": 61530 }, { "epoch": 0.7499421105870596, "grad_norm": 2.024712085723877, "learning_rate": 1.3161642078255294e-06, "loss": 0.8709, "step": 61535 }, { "epoch": 0.7500030468112074, "grad_norm": 2.7803094387054443, "learning_rate": 1.3158434894162924e-06, "loss": 0.8699, "step": 61540 }, { "epoch": 0.7500639830353552, "grad_norm": 2.321070432662964, "learning_rate": 1.3155227710070558e-06, "loss": 0.8457, "step": 61545 }, { "epoch": 0.750124919259503, "grad_norm": 2.2356033325195312, "learning_rate": 1.3152020525978193e-06, "loss": 0.8328, "step": 61550 }, { "epoch": 0.7501858554836508, "grad_norm": 2.315829038619995, "learning_rate": 1.3148813341885825e-06, "loss": 0.8401, "step": 61555 }, { "epoch": 0.7502467917077986, "grad_norm": 1.9910595417022705, "learning_rate": 1.3145606157793457e-06, "loss": 0.78, "step": 61560 }, { "epoch": 0.7503077279319464, "grad_norm": 1.7012914419174194, "learning_rate": 1.3142398973701092e-06, "loss": 0.7891, "step": 61565 }, { "epoch": 0.7503686641560943, "grad_norm": 2.0307347774505615, "learning_rate": 1.3139191789608724e-06, "loss": 0.8347, "step": 61570 }, { "epoch": 0.7504296003802421, "grad_norm": 1.7249146699905396, "learning_rate": 1.3135984605516359e-06, "loss": 0.8421, "step": 61575 }, { "epoch": 0.7504905366043898, "grad_norm": 2.119168758392334, "learning_rate": 1.3132777421423989e-06, "loss": 0.8699, "step": 61580 }, { "epoch": 0.7505514728285376, "grad_norm": 1.7400764226913452, "learning_rate": 1.3129570237331623e-06, "loss": 0.7752, "step": 61585 }, { "epoch": 0.7506124090526854, "grad_norm": 2.5770680904388428, "learning_rate": 1.3126363053239258e-06, "loss": 0.8785, "step": 61590 }, { "epoch": 0.7506733452768333, "grad_norm": 1.811745047569275, "learning_rate": 1.312315586914689e-06, "loss": 0.8417, "step": 61595 }, { "epoch": 0.7507342815009811, "grad_norm": 2.1895384788513184, "learning_rate": 1.3119948685054524e-06, "loss": 0.8248, "step": 61600 }, { "epoch": 0.7507952177251289, "grad_norm": 2.0652294158935547, "learning_rate": 1.3116741500962156e-06, "loss": 0.7997, "step": 61605 }, { "epoch": 0.7508561539492767, "grad_norm": 1.912471890449524, "learning_rate": 1.3113534316869789e-06, "loss": 0.8018, "step": 61610 }, { "epoch": 0.7509170901734245, "grad_norm": 1.8867807388305664, "learning_rate": 1.3110327132777423e-06, "loss": 0.7596, "step": 61615 }, { "epoch": 0.7509780263975723, "grad_norm": 1.7324472665786743, "learning_rate": 1.3107119948685053e-06, "loss": 0.7961, "step": 61620 }, { "epoch": 0.7510389626217201, "grad_norm": 2.115161180496216, "learning_rate": 1.3103912764592688e-06, "loss": 0.8651, "step": 61625 }, { "epoch": 0.7510998988458679, "grad_norm": 1.9337167739868164, "learning_rate": 1.3100705580500322e-06, "loss": 0.8255, "step": 61630 }, { "epoch": 0.7511608350700157, "grad_norm": 2.046262741088867, "learning_rate": 1.3097498396407954e-06, "loss": 0.8402, "step": 61635 }, { "epoch": 0.7512217712941636, "grad_norm": 2.065485954284668, "learning_rate": 1.3094291212315589e-06, "loss": 0.8513, "step": 61640 }, { "epoch": 0.7512827075183114, "grad_norm": 2.0719544887542725, "learning_rate": 1.3091084028223221e-06, "loss": 0.8231, "step": 61645 }, { "epoch": 0.7513436437424591, "grad_norm": 2.1338343620300293, "learning_rate": 1.3087876844130853e-06, "loss": 0.7942, "step": 61650 }, { "epoch": 0.7514045799666069, "grad_norm": 1.8319538831710815, "learning_rate": 1.3084669660038488e-06, "loss": 0.7888, "step": 61655 }, { "epoch": 0.7514655161907547, "grad_norm": 1.879075527191162, "learning_rate": 1.3081462475946122e-06, "loss": 0.7826, "step": 61660 }, { "epoch": 0.7515264524149026, "grad_norm": 1.94846773147583, "learning_rate": 1.3078255291853752e-06, "loss": 0.8461, "step": 61665 }, { "epoch": 0.7515873886390504, "grad_norm": 2.642906427383423, "learning_rate": 1.3075048107761387e-06, "loss": 0.8302, "step": 61670 }, { "epoch": 0.7516483248631982, "grad_norm": 2.3118176460266113, "learning_rate": 1.307184092366902e-06, "loss": 0.8616, "step": 61675 }, { "epoch": 0.751709261087346, "grad_norm": 2.1768271923065186, "learning_rate": 1.3068633739576653e-06, "loss": 0.7277, "step": 61680 }, { "epoch": 0.7517701973114937, "grad_norm": 2.801893711090088, "learning_rate": 1.3065426555484288e-06, "loss": 0.8162, "step": 61685 }, { "epoch": 0.7518311335356416, "grad_norm": 1.9969724416732788, "learning_rate": 1.3062219371391918e-06, "loss": 0.8186, "step": 61690 }, { "epoch": 0.7518920697597894, "grad_norm": 2.234372138977051, "learning_rate": 1.3059012187299552e-06, "loss": 0.7583, "step": 61695 }, { "epoch": 0.7519530059839372, "grad_norm": 1.876022219657898, "learning_rate": 1.3055805003207187e-06, "loss": 0.7837, "step": 61700 }, { "epoch": 0.752013942208085, "grad_norm": 1.8175119161605835, "learning_rate": 1.3052597819114817e-06, "loss": 0.7334, "step": 61705 }, { "epoch": 0.7520748784322329, "grad_norm": 1.8011505603790283, "learning_rate": 1.3049390635022451e-06, "loss": 0.8017, "step": 61710 }, { "epoch": 0.7521358146563807, "grad_norm": 2.295969247817993, "learning_rate": 1.3046183450930084e-06, "loss": 0.7952, "step": 61715 }, { "epoch": 0.7521967508805284, "grad_norm": 1.9330968856811523, "learning_rate": 1.3042976266837718e-06, "loss": 0.8204, "step": 61720 }, { "epoch": 0.7522576871046762, "grad_norm": 2.003614902496338, "learning_rate": 1.3039769082745353e-06, "loss": 0.8349, "step": 61725 }, { "epoch": 0.752318623328824, "grad_norm": 1.8695745468139648, "learning_rate": 1.3036561898652983e-06, "loss": 0.8099, "step": 61730 }, { "epoch": 0.7523795595529719, "grad_norm": 2.134432554244995, "learning_rate": 1.3033354714560617e-06, "loss": 0.8275, "step": 61735 }, { "epoch": 0.7524404957771197, "grad_norm": 2.005610227584839, "learning_rate": 1.3030147530468251e-06, "loss": 0.8848, "step": 61740 }, { "epoch": 0.7525014320012675, "grad_norm": 2.105254650115967, "learning_rate": 1.3026940346375882e-06, "loss": 0.7893, "step": 61745 }, { "epoch": 0.7525623682254153, "grad_norm": 2.0133345127105713, "learning_rate": 1.3023733162283516e-06, "loss": 0.7651, "step": 61750 }, { "epoch": 0.752623304449563, "grad_norm": 2.1157948970794678, "learning_rate": 1.3020525978191148e-06, "loss": 0.8087, "step": 61755 }, { "epoch": 0.7526842406737109, "grad_norm": 1.8541821241378784, "learning_rate": 1.3017318794098783e-06, "loss": 0.7845, "step": 61760 }, { "epoch": 0.7527451768978587, "grad_norm": 1.9056501388549805, "learning_rate": 1.3014111610006417e-06, "loss": 0.8405, "step": 61765 }, { "epoch": 0.7528061131220065, "grad_norm": 2.18485951423645, "learning_rate": 1.3010904425914047e-06, "loss": 0.8039, "step": 61770 }, { "epoch": 0.7528670493461543, "grad_norm": 1.716173529624939, "learning_rate": 1.3007697241821682e-06, "loss": 0.7879, "step": 61775 }, { "epoch": 0.7529279855703022, "grad_norm": 2.278062105178833, "learning_rate": 1.3004490057729316e-06, "loss": 0.8916, "step": 61780 }, { "epoch": 0.75298892179445, "grad_norm": 1.6688281297683716, "learning_rate": 1.3001282873636946e-06, "loss": 0.7659, "step": 61785 }, { "epoch": 0.7530498580185977, "grad_norm": 1.982269048690796, "learning_rate": 1.299807568954458e-06, "loss": 0.7709, "step": 61790 }, { "epoch": 0.7531107942427455, "grad_norm": 1.8400323390960693, "learning_rate": 1.2994868505452213e-06, "loss": 0.8212, "step": 61795 }, { "epoch": 0.7531717304668933, "grad_norm": 1.9149222373962402, "learning_rate": 1.2991661321359847e-06, "loss": 0.8251, "step": 61800 }, { "epoch": 0.7532326666910412, "grad_norm": 1.898220181465149, "learning_rate": 1.2988454137267482e-06, "loss": 0.8201, "step": 61805 }, { "epoch": 0.753293602915189, "grad_norm": 1.727949857711792, "learning_rate": 1.2985246953175112e-06, "loss": 0.7975, "step": 61810 }, { "epoch": 0.7533545391393368, "grad_norm": 1.809380292892456, "learning_rate": 1.2982039769082746e-06, "loss": 0.7879, "step": 61815 }, { "epoch": 0.7534154753634846, "grad_norm": 1.6177589893341064, "learning_rate": 1.297883258499038e-06, "loss": 0.804, "step": 61820 }, { "epoch": 0.7534764115876323, "grad_norm": 2.232550621032715, "learning_rate": 1.2975625400898013e-06, "loss": 0.8181, "step": 61825 }, { "epoch": 0.7535373478117802, "grad_norm": 1.9807438850402832, "learning_rate": 1.2972418216805645e-06, "loss": 0.7849, "step": 61830 }, { "epoch": 0.753598284035928, "grad_norm": 1.8749531507492065, "learning_rate": 1.2969211032713278e-06, "loss": 0.781, "step": 61835 }, { "epoch": 0.7536592202600758, "grad_norm": 2.0971806049346924, "learning_rate": 1.2966003848620912e-06, "loss": 0.8223, "step": 61840 }, { "epoch": 0.7537201564842236, "grad_norm": 1.952202558517456, "learning_rate": 1.2962796664528546e-06, "loss": 0.8369, "step": 61845 }, { "epoch": 0.7537810927083715, "grad_norm": 1.9483195543289185, "learning_rate": 1.2959589480436177e-06, "loss": 0.8776, "step": 61850 }, { "epoch": 0.7538420289325192, "grad_norm": 2.030747890472412, "learning_rate": 1.295638229634381e-06, "loss": 0.8072, "step": 61855 }, { "epoch": 0.753902965156667, "grad_norm": 2.207760810852051, "learning_rate": 1.2953175112251445e-06, "loss": 0.7677, "step": 61860 }, { "epoch": 0.7539639013808148, "grad_norm": 1.8666877746582031, "learning_rate": 1.2949967928159078e-06, "loss": 0.7702, "step": 61865 }, { "epoch": 0.7540248376049626, "grad_norm": 1.9701290130615234, "learning_rate": 1.294676074406671e-06, "loss": 0.7071, "step": 61870 }, { "epoch": 0.7540857738291105, "grad_norm": 1.8986353874206543, "learning_rate": 1.2943553559974342e-06, "loss": 0.819, "step": 61875 }, { "epoch": 0.7541467100532583, "grad_norm": 2.4703640937805176, "learning_rate": 1.2940346375881977e-06, "loss": 0.7911, "step": 61880 }, { "epoch": 0.7542076462774061, "grad_norm": 1.8782848119735718, "learning_rate": 1.293713919178961e-06, "loss": 0.848, "step": 61885 }, { "epoch": 0.7542685825015538, "grad_norm": 1.6821856498718262, "learning_rate": 1.2933932007697241e-06, "loss": 0.8734, "step": 61890 }, { "epoch": 0.7543295187257016, "grad_norm": 1.9057447910308838, "learning_rate": 1.2930724823604876e-06, "loss": 0.8371, "step": 61895 }, { "epoch": 0.7543904549498495, "grad_norm": 1.7416785955429077, "learning_rate": 1.292751763951251e-06, "loss": 0.8709, "step": 61900 }, { "epoch": 0.7544513911739973, "grad_norm": 1.6440541744232178, "learning_rate": 1.2924310455420142e-06, "loss": 0.7677, "step": 61905 }, { "epoch": 0.7545123273981451, "grad_norm": 2.0395028591156006, "learning_rate": 1.2921103271327775e-06, "loss": 0.821, "step": 61910 }, { "epoch": 0.7545732636222929, "grad_norm": 2.001146078109741, "learning_rate": 1.2917896087235407e-06, "loss": 0.8, "step": 61915 }, { "epoch": 0.7546341998464408, "grad_norm": 1.9410879611968994, "learning_rate": 1.2914688903143041e-06, "loss": 0.7855, "step": 61920 }, { "epoch": 0.7546951360705885, "grad_norm": 1.857512354850769, "learning_rate": 1.2911481719050676e-06, "loss": 0.7876, "step": 61925 }, { "epoch": 0.7547560722947363, "grad_norm": 2.2427353858947754, "learning_rate": 1.2908274534958306e-06, "loss": 0.7367, "step": 61930 }, { "epoch": 0.7548170085188841, "grad_norm": 2.3193359375, "learning_rate": 1.290506735086594e-06, "loss": 0.826, "step": 61935 }, { "epoch": 0.7548779447430319, "grad_norm": 1.8235834836959839, "learning_rate": 1.2901860166773575e-06, "loss": 0.8317, "step": 61940 }, { "epoch": 0.7549388809671798, "grad_norm": 1.7518242597579956, "learning_rate": 1.2898652982681207e-06, "loss": 0.7614, "step": 61945 }, { "epoch": 0.7549998171913276, "grad_norm": 1.8538719415664673, "learning_rate": 1.2895445798588841e-06, "loss": 0.83, "step": 61950 }, { "epoch": 0.7550607534154754, "grad_norm": 1.971862554550171, "learning_rate": 1.2892238614496474e-06, "loss": 0.7443, "step": 61955 }, { "epoch": 0.7551216896396231, "grad_norm": 2.0526723861694336, "learning_rate": 1.2889031430404106e-06, "loss": 0.8866, "step": 61960 }, { "epoch": 0.7551826258637709, "grad_norm": 2.3950352668762207, "learning_rate": 1.288582424631174e-06, "loss": 0.813, "step": 61965 }, { "epoch": 0.7552435620879188, "grad_norm": 1.9306966066360474, "learning_rate": 1.288261706221937e-06, "loss": 0.8369, "step": 61970 }, { "epoch": 0.7553044983120666, "grad_norm": 1.9645198583602905, "learning_rate": 1.2879409878127005e-06, "loss": 0.8276, "step": 61975 }, { "epoch": 0.7553654345362144, "grad_norm": 1.7137739658355713, "learning_rate": 1.287620269403464e-06, "loss": 0.8215, "step": 61980 }, { "epoch": 0.7554263707603622, "grad_norm": 1.8743928670883179, "learning_rate": 1.2872995509942272e-06, "loss": 0.8689, "step": 61985 }, { "epoch": 0.75548730698451, "grad_norm": 1.8262789249420166, "learning_rate": 1.2869788325849906e-06, "loss": 0.8023, "step": 61990 }, { "epoch": 0.7555482432086578, "grad_norm": 2.0385239124298096, "learning_rate": 1.2866581141757538e-06, "loss": 0.8508, "step": 61995 }, { "epoch": 0.7556091794328056, "grad_norm": 1.7653263807296753, "learning_rate": 1.286337395766517e-06, "loss": 0.7075, "step": 62000 }, { "epoch": 0.7556701156569534, "grad_norm": 1.8126859664916992, "learning_rate": 1.2860166773572805e-06, "loss": 0.8305, "step": 62005 }, { "epoch": 0.7557310518811012, "grad_norm": 2.072636365890503, "learning_rate": 1.2856959589480435e-06, "loss": 0.713, "step": 62010 }, { "epoch": 0.7557919881052491, "grad_norm": 1.7577738761901855, "learning_rate": 1.285375240538807e-06, "loss": 0.8213, "step": 62015 }, { "epoch": 0.7558529243293969, "grad_norm": 1.9643341302871704, "learning_rate": 1.2850545221295704e-06, "loss": 0.7994, "step": 62020 }, { "epoch": 0.7559138605535447, "grad_norm": 1.912498950958252, "learning_rate": 1.2847338037203336e-06, "loss": 0.7963, "step": 62025 }, { "epoch": 0.7559747967776924, "grad_norm": 2.073051691055298, "learning_rate": 1.284413085311097e-06, "loss": 0.8683, "step": 62030 }, { "epoch": 0.7560357330018402, "grad_norm": 3.1658706665039062, "learning_rate": 1.2840923669018605e-06, "loss": 0.7738, "step": 62035 }, { "epoch": 0.7560966692259881, "grad_norm": 1.7913601398468018, "learning_rate": 1.2837716484926235e-06, "loss": 0.7829, "step": 62040 }, { "epoch": 0.7561576054501359, "grad_norm": 2.0592293739318848, "learning_rate": 1.283450930083387e-06, "loss": 0.7672, "step": 62045 }, { "epoch": 0.7562185416742837, "grad_norm": 1.8294930458068848, "learning_rate": 1.2831302116741502e-06, "loss": 0.893, "step": 62050 }, { "epoch": 0.7562794778984315, "grad_norm": 1.9152289628982544, "learning_rate": 1.2828094932649134e-06, "loss": 0.848, "step": 62055 }, { "epoch": 0.7563404141225794, "grad_norm": 2.1276092529296875, "learning_rate": 1.2824887748556769e-06, "loss": 0.8676, "step": 62060 }, { "epoch": 0.7564013503467271, "grad_norm": 2.0327670574188232, "learning_rate": 1.28216805644644e-06, "loss": 0.7379, "step": 62065 }, { "epoch": 0.7564622865708749, "grad_norm": 1.872633934020996, "learning_rate": 1.2818473380372035e-06, "loss": 0.8711, "step": 62070 }, { "epoch": 0.7565232227950227, "grad_norm": 2.153998374938965, "learning_rate": 1.281526619627967e-06, "loss": 0.831, "step": 62075 }, { "epoch": 0.7565841590191705, "grad_norm": 2.0686757564544678, "learning_rate": 1.28120590121873e-06, "loss": 0.8173, "step": 62080 }, { "epoch": 0.7566450952433184, "grad_norm": 1.9128626585006714, "learning_rate": 1.2808851828094934e-06, "loss": 0.8291, "step": 62085 }, { "epoch": 0.7567060314674662, "grad_norm": 1.7517626285552979, "learning_rate": 1.2805644644002567e-06, "loss": 0.8284, "step": 62090 }, { "epoch": 0.756766967691614, "grad_norm": 1.997151494026184, "learning_rate": 1.2802437459910199e-06, "loss": 0.8382, "step": 62095 }, { "epoch": 0.7568279039157617, "grad_norm": 2.0225324630737305, "learning_rate": 1.2799230275817833e-06, "loss": 0.7933, "step": 62100 }, { "epoch": 0.7568888401399095, "grad_norm": 2.007950782775879, "learning_rate": 1.2796023091725465e-06, "loss": 0.7361, "step": 62105 }, { "epoch": 0.7569497763640574, "grad_norm": 1.834535837173462, "learning_rate": 1.27928159076331e-06, "loss": 0.826, "step": 62110 }, { "epoch": 0.7570107125882052, "grad_norm": 1.674114465713501, "learning_rate": 1.2789608723540734e-06, "loss": 0.7912, "step": 62115 }, { "epoch": 0.757071648812353, "grad_norm": 1.8381586074829102, "learning_rate": 1.2786401539448364e-06, "loss": 0.7744, "step": 62120 }, { "epoch": 0.7571325850365008, "grad_norm": 2.3099453449249268, "learning_rate": 1.2783194355355999e-06, "loss": 0.8635, "step": 62125 }, { "epoch": 0.7571935212606486, "grad_norm": 2.064493417739868, "learning_rate": 1.2779987171263631e-06, "loss": 0.8333, "step": 62130 }, { "epoch": 0.7572544574847964, "grad_norm": 1.9319795370101929, "learning_rate": 1.2776779987171263e-06, "loss": 0.8275, "step": 62135 }, { "epoch": 0.7573153937089442, "grad_norm": 2.4258923530578613, "learning_rate": 1.2773572803078898e-06, "loss": 0.8918, "step": 62140 }, { "epoch": 0.757376329933092, "grad_norm": 2.0396337509155273, "learning_rate": 1.277036561898653e-06, "loss": 0.7586, "step": 62145 }, { "epoch": 0.7574372661572398, "grad_norm": 1.892444372177124, "learning_rate": 1.2767158434894165e-06, "loss": 0.7804, "step": 62150 }, { "epoch": 0.7574982023813877, "grad_norm": 1.790359377861023, "learning_rate": 1.2763951250801799e-06, "loss": 0.8163, "step": 62155 }, { "epoch": 0.7575591386055355, "grad_norm": 2.0172674655914307, "learning_rate": 1.276074406670943e-06, "loss": 0.8107, "step": 62160 }, { "epoch": 0.7576200748296833, "grad_norm": 2.085298776626587, "learning_rate": 1.2757536882617063e-06, "loss": 0.8296, "step": 62165 }, { "epoch": 0.757681011053831, "grad_norm": 2.3243942260742188, "learning_rate": 1.2754329698524696e-06, "loss": 0.7971, "step": 62170 }, { "epoch": 0.7577419472779788, "grad_norm": 2.1458401679992676, "learning_rate": 1.275112251443233e-06, "loss": 0.8673, "step": 62175 }, { "epoch": 0.7578028835021267, "grad_norm": 1.9177857637405396, "learning_rate": 1.2747915330339962e-06, "loss": 0.9021, "step": 62180 }, { "epoch": 0.7578638197262745, "grad_norm": 1.716651439666748, "learning_rate": 1.2744708146247595e-06, "loss": 0.767, "step": 62185 }, { "epoch": 0.7579247559504223, "grad_norm": 1.879143238067627, "learning_rate": 1.274150096215523e-06, "loss": 0.7949, "step": 62190 }, { "epoch": 0.7579856921745701, "grad_norm": 1.7170406579971313, "learning_rate": 1.2738293778062864e-06, "loss": 0.7746, "step": 62195 }, { "epoch": 0.758046628398718, "grad_norm": 1.812424898147583, "learning_rate": 1.2735086593970494e-06, "loss": 0.8138, "step": 62200 }, { "epoch": 0.7581075646228657, "grad_norm": 1.711393117904663, "learning_rate": 1.2731879409878128e-06, "loss": 0.8108, "step": 62205 }, { "epoch": 0.7581685008470135, "grad_norm": 1.976287841796875, "learning_rate": 1.272867222578576e-06, "loss": 0.7974, "step": 62210 }, { "epoch": 0.7582294370711613, "grad_norm": 2.0674264430999756, "learning_rate": 1.2725465041693395e-06, "loss": 0.8757, "step": 62215 }, { "epoch": 0.7582903732953091, "grad_norm": 1.6705114841461182, "learning_rate": 1.2722257857601027e-06, "loss": 0.7774, "step": 62220 }, { "epoch": 0.758351309519457, "grad_norm": 1.9413471221923828, "learning_rate": 1.271905067350866e-06, "loss": 0.8197, "step": 62225 }, { "epoch": 0.7584122457436048, "grad_norm": 1.7123419046401978, "learning_rate": 1.2715843489416294e-06, "loss": 0.8142, "step": 62230 }, { "epoch": 0.7584731819677526, "grad_norm": 2.3045685291290283, "learning_rate": 1.2712636305323928e-06, "loss": 0.8388, "step": 62235 }, { "epoch": 0.7585341181919003, "grad_norm": 1.954019546508789, "learning_rate": 1.2709429121231558e-06, "loss": 0.8768, "step": 62240 }, { "epoch": 0.7585950544160481, "grad_norm": 2.046321153640747, "learning_rate": 1.2706221937139193e-06, "loss": 0.8427, "step": 62245 }, { "epoch": 0.758655990640196, "grad_norm": 1.7440942525863647, "learning_rate": 1.2703014753046827e-06, "loss": 0.8316, "step": 62250 }, { "epoch": 0.7587169268643438, "grad_norm": 1.7465704679489136, "learning_rate": 1.269980756895446e-06, "loss": 0.75, "step": 62255 }, { "epoch": 0.7587778630884916, "grad_norm": 1.8858438730239868, "learning_rate": 1.2696600384862092e-06, "loss": 0.8265, "step": 62260 }, { "epoch": 0.7588387993126394, "grad_norm": 2.1954476833343506, "learning_rate": 1.2693393200769724e-06, "loss": 0.7692, "step": 62265 }, { "epoch": 0.7588997355367872, "grad_norm": 1.9825489521026611, "learning_rate": 1.2690186016677358e-06, "loss": 0.7949, "step": 62270 }, { "epoch": 0.758960671760935, "grad_norm": 1.7999216318130493, "learning_rate": 1.2686978832584993e-06, "loss": 0.8313, "step": 62275 }, { "epoch": 0.7590216079850828, "grad_norm": 1.8086916208267212, "learning_rate": 1.2683771648492623e-06, "loss": 0.7871, "step": 62280 }, { "epoch": 0.7590825442092306, "grad_norm": 1.538988709449768, "learning_rate": 1.2680564464400257e-06, "loss": 0.7242, "step": 62285 }, { "epoch": 0.7591434804333784, "grad_norm": 2.0650668144226074, "learning_rate": 1.2677357280307892e-06, "loss": 0.8639, "step": 62290 }, { "epoch": 0.7592044166575262, "grad_norm": 2.4218690395355225, "learning_rate": 1.2674150096215524e-06, "loss": 0.794, "step": 62295 }, { "epoch": 0.7592653528816741, "grad_norm": 1.941197395324707, "learning_rate": 1.2670942912123158e-06, "loss": 0.8153, "step": 62300 }, { "epoch": 0.7593262891058219, "grad_norm": 1.7996039390563965, "learning_rate": 1.2667735728030789e-06, "loss": 0.7897, "step": 62305 }, { "epoch": 0.7593872253299696, "grad_norm": 2.429030656814575, "learning_rate": 1.2664528543938423e-06, "loss": 0.8645, "step": 62310 }, { "epoch": 0.7594481615541174, "grad_norm": 2.0040950775146484, "learning_rate": 1.2661321359846057e-06, "loss": 0.8549, "step": 62315 }, { "epoch": 0.7595090977782653, "grad_norm": 2.143477439880371, "learning_rate": 1.2658114175753688e-06, "loss": 0.8367, "step": 62320 }, { "epoch": 0.7595700340024131, "grad_norm": 1.8695317506790161, "learning_rate": 1.2654906991661322e-06, "loss": 0.7639, "step": 62325 }, { "epoch": 0.7596309702265609, "grad_norm": 2.2380497455596924, "learning_rate": 1.2651699807568956e-06, "loss": 0.7576, "step": 62330 }, { "epoch": 0.7596919064507087, "grad_norm": 2.414461135864258, "learning_rate": 1.2648492623476589e-06, "loss": 0.8742, "step": 62335 }, { "epoch": 0.7597528426748565, "grad_norm": 2.131288766860962, "learning_rate": 1.2645285439384223e-06, "loss": 0.8563, "step": 62340 }, { "epoch": 0.7598137788990043, "grad_norm": 2.155925989151001, "learning_rate": 1.2642078255291853e-06, "loss": 0.7913, "step": 62345 }, { "epoch": 0.7598747151231521, "grad_norm": 1.7456538677215576, "learning_rate": 1.2638871071199488e-06, "loss": 0.8025, "step": 62350 }, { "epoch": 0.7599356513472999, "grad_norm": 2.3022875785827637, "learning_rate": 1.2635663887107122e-06, "loss": 0.8762, "step": 62355 }, { "epoch": 0.7599965875714477, "grad_norm": 1.8449510335922241, "learning_rate": 1.2632456703014752e-06, "loss": 0.7694, "step": 62360 }, { "epoch": 0.7600575237955955, "grad_norm": 1.7326340675354004, "learning_rate": 1.2629249518922387e-06, "loss": 0.8866, "step": 62365 }, { "epoch": 0.7601184600197434, "grad_norm": 1.8015635013580322, "learning_rate": 1.2626042334830021e-06, "loss": 0.8004, "step": 62370 }, { "epoch": 0.7601793962438912, "grad_norm": 1.890458583831787, "learning_rate": 1.2622835150737653e-06, "loss": 0.8166, "step": 62375 }, { "epoch": 0.7602403324680389, "grad_norm": 1.6186983585357666, "learning_rate": 1.2619627966645288e-06, "loss": 0.8517, "step": 62380 }, { "epoch": 0.7603012686921867, "grad_norm": 1.8561758995056152, "learning_rate": 1.2616420782552918e-06, "loss": 0.8602, "step": 62385 }, { "epoch": 0.7603622049163345, "grad_norm": 1.9428633451461792, "learning_rate": 1.2613213598460552e-06, "loss": 0.8581, "step": 62390 }, { "epoch": 0.7604231411404824, "grad_norm": 1.6179438829421997, "learning_rate": 1.2610006414368187e-06, "loss": 0.7995, "step": 62395 }, { "epoch": 0.7604840773646302, "grad_norm": 1.8962610960006714, "learning_rate": 1.260679923027582e-06, "loss": 0.8308, "step": 62400 }, { "epoch": 0.760545013588778, "grad_norm": 1.807225227355957, "learning_rate": 1.2603592046183451e-06, "loss": 0.7971, "step": 62405 }, { "epoch": 0.7606059498129258, "grad_norm": 2.3389999866485596, "learning_rate": 1.2600384862091086e-06, "loss": 0.8338, "step": 62410 }, { "epoch": 0.7606668860370736, "grad_norm": 2.3960721492767334, "learning_rate": 1.2597177677998718e-06, "loss": 0.8409, "step": 62415 }, { "epoch": 0.7607278222612214, "grad_norm": 2.1456973552703857, "learning_rate": 1.2593970493906352e-06, "loss": 0.9038, "step": 62420 }, { "epoch": 0.7607887584853692, "grad_norm": 2.0898241996765137, "learning_rate": 1.2590763309813983e-06, "loss": 0.7454, "step": 62425 }, { "epoch": 0.760849694709517, "grad_norm": 2.382521629333496, "learning_rate": 1.2587556125721617e-06, "loss": 0.7304, "step": 62430 }, { "epoch": 0.7609106309336648, "grad_norm": 1.9521253108978271, "learning_rate": 1.2584348941629251e-06, "loss": 0.824, "step": 62435 }, { "epoch": 0.7609715671578127, "grad_norm": 2.1795666217803955, "learning_rate": 1.2581141757536884e-06, "loss": 0.818, "step": 62440 }, { "epoch": 0.7610325033819605, "grad_norm": 1.9139156341552734, "learning_rate": 1.2577934573444516e-06, "loss": 0.8158, "step": 62445 }, { "epoch": 0.7610934396061082, "grad_norm": 1.6595041751861572, "learning_rate": 1.257472738935215e-06, "loss": 0.7561, "step": 62450 }, { "epoch": 0.761154375830256, "grad_norm": 2.387251377105713, "learning_rate": 1.2571520205259783e-06, "loss": 0.8191, "step": 62455 }, { "epoch": 0.7612153120544038, "grad_norm": 1.7027453184127808, "learning_rate": 1.2568313021167417e-06, "loss": 0.7614, "step": 62460 }, { "epoch": 0.7612762482785517, "grad_norm": 2.051309108734131, "learning_rate": 1.2565105837075047e-06, "loss": 0.7109, "step": 62465 }, { "epoch": 0.7613371845026995, "grad_norm": 2.0781965255737305, "learning_rate": 1.2561898652982682e-06, "loss": 0.7641, "step": 62470 }, { "epoch": 0.7613981207268473, "grad_norm": 2.357745885848999, "learning_rate": 1.2558691468890316e-06, "loss": 0.7701, "step": 62475 }, { "epoch": 0.7614590569509951, "grad_norm": 1.995831847190857, "learning_rate": 1.2555484284797948e-06, "loss": 0.8642, "step": 62480 }, { "epoch": 0.7615199931751429, "grad_norm": 2.188800096511841, "learning_rate": 1.255227710070558e-06, "loss": 0.7856, "step": 62485 }, { "epoch": 0.7615809293992907, "grad_norm": 2.0146570205688477, "learning_rate": 1.2549069916613215e-06, "loss": 0.8251, "step": 62490 }, { "epoch": 0.7616418656234385, "grad_norm": 1.9727457761764526, "learning_rate": 1.2545862732520847e-06, "loss": 0.8651, "step": 62495 }, { "epoch": 0.7617028018475863, "grad_norm": 1.7226834297180176, "learning_rate": 1.2542655548428482e-06, "loss": 0.807, "step": 62500 }, { "epoch": 0.7617637380717341, "grad_norm": 2.636587619781494, "learning_rate": 1.2539448364336112e-06, "loss": 0.838, "step": 62505 }, { "epoch": 0.761824674295882, "grad_norm": 1.6751716136932373, "learning_rate": 1.2536241180243746e-06, "loss": 0.812, "step": 62510 }, { "epoch": 0.7618856105200298, "grad_norm": 2.0550403594970703, "learning_rate": 1.253303399615138e-06, "loss": 0.8303, "step": 62515 }, { "epoch": 0.7619465467441775, "grad_norm": 2.291471481323242, "learning_rate": 1.2529826812059013e-06, "loss": 0.8693, "step": 62520 }, { "epoch": 0.7620074829683253, "grad_norm": 1.818028211593628, "learning_rate": 1.2526619627966647e-06, "loss": 0.8643, "step": 62525 }, { "epoch": 0.7620684191924731, "grad_norm": 2.1051111221313477, "learning_rate": 1.252341244387428e-06, "loss": 0.8323, "step": 62530 }, { "epoch": 0.762129355416621, "grad_norm": 1.8068064451217651, "learning_rate": 1.2520205259781912e-06, "loss": 0.8759, "step": 62535 }, { "epoch": 0.7621902916407688, "grad_norm": 1.8713263273239136, "learning_rate": 1.2516998075689546e-06, "loss": 0.7759, "step": 62540 }, { "epoch": 0.7622512278649166, "grad_norm": 1.9321340322494507, "learning_rate": 1.251379089159718e-06, "loss": 0.8294, "step": 62545 }, { "epoch": 0.7623121640890644, "grad_norm": 2.0466089248657227, "learning_rate": 1.251058370750481e-06, "loss": 0.8317, "step": 62550 }, { "epoch": 0.7623731003132121, "grad_norm": 1.7203117609024048, "learning_rate": 1.2507376523412445e-06, "loss": 0.7714, "step": 62555 }, { "epoch": 0.76243403653736, "grad_norm": 2.1956920623779297, "learning_rate": 1.2504169339320078e-06, "loss": 0.8357, "step": 62560 }, { "epoch": 0.7624949727615078, "grad_norm": 2.4771149158477783, "learning_rate": 1.2500962155227712e-06, "loss": 0.9135, "step": 62565 }, { "epoch": 0.7625559089856556, "grad_norm": 1.7907638549804688, "learning_rate": 1.2497754971135344e-06, "loss": 0.8181, "step": 62570 }, { "epoch": 0.7626168452098034, "grad_norm": 1.8485901355743408, "learning_rate": 1.2494547787042979e-06, "loss": 0.7978, "step": 62575 }, { "epoch": 0.7626777814339513, "grad_norm": 2.043637990951538, "learning_rate": 1.249134060295061e-06, "loss": 0.8642, "step": 62580 }, { "epoch": 0.7627387176580991, "grad_norm": 1.8614776134490967, "learning_rate": 1.2488133418858243e-06, "loss": 0.7729, "step": 62585 }, { "epoch": 0.7627996538822468, "grad_norm": 2.031440019607544, "learning_rate": 1.2484926234765876e-06, "loss": 0.8376, "step": 62590 }, { "epoch": 0.7628605901063946, "grad_norm": 1.9978288412094116, "learning_rate": 1.248171905067351e-06, "loss": 0.7915, "step": 62595 }, { "epoch": 0.7629215263305424, "grad_norm": 1.9123117923736572, "learning_rate": 1.2478511866581142e-06, "loss": 0.8355, "step": 62600 }, { "epoch": 0.7629824625546903, "grad_norm": 2.2860076427459717, "learning_rate": 1.2475304682488777e-06, "loss": 0.8199, "step": 62605 }, { "epoch": 0.7630433987788381, "grad_norm": 1.7904959917068481, "learning_rate": 1.2472097498396409e-06, "loss": 0.7926, "step": 62610 }, { "epoch": 0.7631043350029859, "grad_norm": 1.8070483207702637, "learning_rate": 1.2468890314304043e-06, "loss": 0.8333, "step": 62615 }, { "epoch": 0.7631652712271337, "grad_norm": 2.2412195205688477, "learning_rate": 1.2465683130211676e-06, "loss": 0.7537, "step": 62620 }, { "epoch": 0.7632262074512814, "grad_norm": 1.841907024383545, "learning_rate": 1.2462475946119308e-06, "loss": 0.7328, "step": 62625 }, { "epoch": 0.7632871436754293, "grad_norm": 2.1775007247924805, "learning_rate": 1.245926876202694e-06, "loss": 0.8073, "step": 62630 }, { "epoch": 0.7633480798995771, "grad_norm": 1.9051806926727295, "learning_rate": 1.2456061577934575e-06, "loss": 0.8326, "step": 62635 }, { "epoch": 0.7634090161237249, "grad_norm": 1.7164887189865112, "learning_rate": 1.2452854393842207e-06, "loss": 0.8252, "step": 62640 }, { "epoch": 0.7634699523478727, "grad_norm": 2.412235736846924, "learning_rate": 1.2449647209749841e-06, "loss": 0.7527, "step": 62645 }, { "epoch": 0.7635308885720206, "grad_norm": 2.3959906101226807, "learning_rate": 1.2446440025657474e-06, "loss": 0.801, "step": 62650 }, { "epoch": 0.7635918247961684, "grad_norm": 2.1958954334259033, "learning_rate": 1.2443232841565108e-06, "loss": 0.7796, "step": 62655 }, { "epoch": 0.7636527610203161, "grad_norm": 2.0264053344726562, "learning_rate": 1.244002565747274e-06, "loss": 0.9077, "step": 62660 }, { "epoch": 0.7637136972444639, "grad_norm": 1.7750908136367798, "learning_rate": 1.2436818473380373e-06, "loss": 0.7882, "step": 62665 }, { "epoch": 0.7637746334686117, "grad_norm": 2.241528272628784, "learning_rate": 1.2433611289288005e-06, "loss": 0.8364, "step": 62670 }, { "epoch": 0.7638355696927596, "grad_norm": 2.1212353706359863, "learning_rate": 1.243040410519564e-06, "loss": 0.8252, "step": 62675 }, { "epoch": 0.7638965059169074, "grad_norm": 2.162877082824707, "learning_rate": 1.2427196921103271e-06, "loss": 0.8555, "step": 62680 }, { "epoch": 0.7639574421410552, "grad_norm": 2.045081377029419, "learning_rate": 1.2423989737010906e-06, "loss": 0.8153, "step": 62685 }, { "epoch": 0.764018378365203, "grad_norm": 1.9717985391616821, "learning_rate": 1.2420782552918538e-06, "loss": 0.7573, "step": 62690 }, { "epoch": 0.7640793145893507, "grad_norm": 1.8549699783325195, "learning_rate": 1.2417575368826173e-06, "loss": 0.8081, "step": 62695 }, { "epoch": 0.7641402508134986, "grad_norm": 2.3663246631622314, "learning_rate": 1.2414368184733805e-06, "loss": 0.8581, "step": 62700 }, { "epoch": 0.7642011870376464, "grad_norm": 1.837059736251831, "learning_rate": 1.2411161000641437e-06, "loss": 0.7682, "step": 62705 }, { "epoch": 0.7642621232617942, "grad_norm": 2.032576322555542, "learning_rate": 1.240795381654907e-06, "loss": 0.6998, "step": 62710 }, { "epoch": 0.764323059485942, "grad_norm": 1.9355090856552124, "learning_rate": 1.2404746632456704e-06, "loss": 0.7607, "step": 62715 }, { "epoch": 0.7643839957100899, "grad_norm": 1.9850060939788818, "learning_rate": 1.2401539448364338e-06, "loss": 0.7743, "step": 62720 }, { "epoch": 0.7644449319342377, "grad_norm": 1.8159130811691284, "learning_rate": 1.239833226427197e-06, "loss": 0.7783, "step": 62725 }, { "epoch": 0.7645058681583854, "grad_norm": 2.623986005783081, "learning_rate": 1.2395125080179603e-06, "loss": 0.7858, "step": 62730 }, { "epoch": 0.7645668043825332, "grad_norm": 1.8646724224090576, "learning_rate": 1.2391917896087237e-06, "loss": 0.8289, "step": 62735 }, { "epoch": 0.764627740606681, "grad_norm": 2.0496437549591064, "learning_rate": 1.238871071199487e-06, "loss": 0.8272, "step": 62740 }, { "epoch": 0.7646886768308289, "grad_norm": 2.066988468170166, "learning_rate": 1.2385503527902502e-06, "loss": 0.8908, "step": 62745 }, { "epoch": 0.7647496130549767, "grad_norm": 1.7880613803863525, "learning_rate": 1.2382296343810136e-06, "loss": 0.7618, "step": 62750 }, { "epoch": 0.7648105492791245, "grad_norm": 1.7740637063980103, "learning_rate": 1.2379089159717768e-06, "loss": 0.841, "step": 62755 }, { "epoch": 0.7648714855032723, "grad_norm": 1.9651083946228027, "learning_rate": 1.2375881975625403e-06, "loss": 0.7955, "step": 62760 }, { "epoch": 0.76493242172742, "grad_norm": 2.350132703781128, "learning_rate": 1.2372674791533035e-06, "loss": 0.7948, "step": 62765 }, { "epoch": 0.7649933579515679, "grad_norm": 1.7532012462615967, "learning_rate": 1.2369467607440667e-06, "loss": 0.7409, "step": 62770 }, { "epoch": 0.7650542941757157, "grad_norm": 1.601028323173523, "learning_rate": 1.2366260423348302e-06, "loss": 0.801, "step": 62775 }, { "epoch": 0.7651152303998635, "grad_norm": 2.198509454727173, "learning_rate": 1.2363053239255934e-06, "loss": 0.7767, "step": 62780 }, { "epoch": 0.7651761666240113, "grad_norm": 1.9880049228668213, "learning_rate": 1.2359846055163566e-06, "loss": 0.7455, "step": 62785 }, { "epoch": 0.7652371028481592, "grad_norm": 1.8604973554611206, "learning_rate": 1.23566388710712e-06, "loss": 0.818, "step": 62790 }, { "epoch": 0.7652980390723069, "grad_norm": 2.1229348182678223, "learning_rate": 1.2353431686978833e-06, "loss": 0.7551, "step": 62795 }, { "epoch": 0.7653589752964547, "grad_norm": 1.8088641166687012, "learning_rate": 1.2350224502886467e-06, "loss": 0.897, "step": 62800 }, { "epoch": 0.7654199115206025, "grad_norm": 1.9515503644943237, "learning_rate": 1.23470173187941e-06, "loss": 0.8477, "step": 62805 }, { "epoch": 0.7654808477447503, "grad_norm": 2.1128108501434326, "learning_rate": 1.2343810134701732e-06, "loss": 0.834, "step": 62810 }, { "epoch": 0.7655417839688982, "grad_norm": 2.146874189376831, "learning_rate": 1.2340602950609366e-06, "loss": 0.849, "step": 62815 }, { "epoch": 0.765602720193046, "grad_norm": 2.9337844848632812, "learning_rate": 1.2337395766516999e-06, "loss": 0.8753, "step": 62820 }, { "epoch": 0.7656636564171938, "grad_norm": 1.8392119407653809, "learning_rate": 1.233418858242463e-06, "loss": 0.767, "step": 62825 }, { "epoch": 0.7657245926413415, "grad_norm": 1.9603404998779297, "learning_rate": 1.2330981398332265e-06, "loss": 0.8215, "step": 62830 }, { "epoch": 0.7657855288654893, "grad_norm": 1.8007458448410034, "learning_rate": 1.2327774214239898e-06, "loss": 0.8252, "step": 62835 }, { "epoch": 0.7658464650896372, "grad_norm": 1.9358710050582886, "learning_rate": 1.2324567030147532e-06, "loss": 0.7248, "step": 62840 }, { "epoch": 0.765907401313785, "grad_norm": 2.381135940551758, "learning_rate": 1.2321359846055164e-06, "loss": 0.8007, "step": 62845 }, { "epoch": 0.7659683375379328, "grad_norm": 2.0579185485839844, "learning_rate": 1.2318152661962799e-06, "loss": 0.7908, "step": 62850 }, { "epoch": 0.7660292737620806, "grad_norm": 1.9707008600234985, "learning_rate": 1.2314945477870431e-06, "loss": 0.8374, "step": 62855 }, { "epoch": 0.7660902099862285, "grad_norm": 1.9433602094650269, "learning_rate": 1.2311738293778063e-06, "loss": 0.8468, "step": 62860 }, { "epoch": 0.7661511462103762, "grad_norm": 1.9858007431030273, "learning_rate": 1.2308531109685696e-06, "loss": 0.8079, "step": 62865 }, { "epoch": 0.766212082434524, "grad_norm": 1.9215357303619385, "learning_rate": 1.230532392559333e-06, "loss": 0.7348, "step": 62870 }, { "epoch": 0.7662730186586718, "grad_norm": 1.690611720085144, "learning_rate": 1.2302116741500964e-06, "loss": 0.8303, "step": 62875 }, { "epoch": 0.7663339548828196, "grad_norm": 2.146005392074585, "learning_rate": 1.2298909557408597e-06, "loss": 0.8307, "step": 62880 }, { "epoch": 0.7663948911069675, "grad_norm": 1.6191012859344482, "learning_rate": 1.229570237331623e-06, "loss": 0.8244, "step": 62885 }, { "epoch": 0.7664558273311153, "grad_norm": 2.0182480812072754, "learning_rate": 1.2292495189223863e-06, "loss": 0.7736, "step": 62890 }, { "epoch": 0.7665167635552631, "grad_norm": 1.9016907215118408, "learning_rate": 1.2289288005131496e-06, "loss": 0.8498, "step": 62895 }, { "epoch": 0.7665776997794108, "grad_norm": 2.1511762142181396, "learning_rate": 1.2286080821039128e-06, "loss": 0.8029, "step": 62900 }, { "epoch": 0.7666386360035586, "grad_norm": 1.8965522050857544, "learning_rate": 1.228287363694676e-06, "loss": 0.8529, "step": 62905 }, { "epoch": 0.7666995722277065, "grad_norm": 2.358980655670166, "learning_rate": 1.2279666452854395e-06, "loss": 0.8523, "step": 62910 }, { "epoch": 0.7667605084518543, "grad_norm": 1.8304779529571533, "learning_rate": 1.227645926876203e-06, "loss": 0.8215, "step": 62915 }, { "epoch": 0.7668214446760021, "grad_norm": 2.252469301223755, "learning_rate": 1.2273252084669661e-06, "loss": 0.768, "step": 62920 }, { "epoch": 0.7668823809001499, "grad_norm": 1.817500352859497, "learning_rate": 1.2270044900577294e-06, "loss": 0.834, "step": 62925 }, { "epoch": 0.7669433171242978, "grad_norm": 1.8270634412765503, "learning_rate": 1.2266837716484928e-06, "loss": 0.8159, "step": 62930 }, { "epoch": 0.7670042533484455, "grad_norm": 2.2577593326568604, "learning_rate": 1.226363053239256e-06, "loss": 0.81, "step": 62935 }, { "epoch": 0.7670651895725933, "grad_norm": 1.9883372783660889, "learning_rate": 1.2260423348300193e-06, "loss": 0.7974, "step": 62940 }, { "epoch": 0.7671261257967411, "grad_norm": 1.946876049041748, "learning_rate": 1.2257216164207827e-06, "loss": 0.8094, "step": 62945 }, { "epoch": 0.7671870620208889, "grad_norm": 1.7715306282043457, "learning_rate": 1.225400898011546e-06, "loss": 0.8116, "step": 62950 }, { "epoch": 0.7672479982450368, "grad_norm": 1.654343843460083, "learning_rate": 1.2250801796023094e-06, "loss": 0.853, "step": 62955 }, { "epoch": 0.7673089344691846, "grad_norm": 1.8244035243988037, "learning_rate": 1.2247594611930726e-06, "loss": 0.7886, "step": 62960 }, { "epoch": 0.7673698706933324, "grad_norm": 1.906187653541565, "learning_rate": 1.2244387427838358e-06, "loss": 0.8912, "step": 62965 }, { "epoch": 0.7674308069174801, "grad_norm": 1.6593284606933594, "learning_rate": 1.2241180243745993e-06, "loss": 0.8076, "step": 62970 }, { "epoch": 0.7674917431416279, "grad_norm": 1.962896704673767, "learning_rate": 1.2237973059653625e-06, "loss": 0.7875, "step": 62975 }, { "epoch": 0.7675526793657758, "grad_norm": 1.7175978422164917, "learning_rate": 1.2234765875561257e-06, "loss": 0.8171, "step": 62980 }, { "epoch": 0.7676136155899236, "grad_norm": 2.092385768890381, "learning_rate": 1.2231558691468892e-06, "loss": 0.8026, "step": 62985 }, { "epoch": 0.7676745518140714, "grad_norm": 1.8867584466934204, "learning_rate": 1.2228351507376524e-06, "loss": 0.831, "step": 62990 }, { "epoch": 0.7677354880382192, "grad_norm": 2.968634605407715, "learning_rate": 1.2225144323284158e-06, "loss": 0.8327, "step": 62995 }, { "epoch": 0.767796424262367, "grad_norm": 1.825283408164978, "learning_rate": 1.222193713919179e-06, "loss": 0.7985, "step": 63000 }, { "epoch": 0.7678573604865148, "grad_norm": 1.9542553424835205, "learning_rate": 1.2218729955099423e-06, "loss": 0.7737, "step": 63005 }, { "epoch": 0.7679182967106626, "grad_norm": 1.9372678995132446, "learning_rate": 1.2215522771007057e-06, "loss": 0.7056, "step": 63010 }, { "epoch": 0.7679792329348104, "grad_norm": 2.1262638568878174, "learning_rate": 1.221231558691469e-06, "loss": 0.8053, "step": 63015 }, { "epoch": 0.7680401691589582, "grad_norm": 1.9613265991210938, "learning_rate": 1.2209108402822322e-06, "loss": 0.7194, "step": 63020 }, { "epoch": 0.768101105383106, "grad_norm": 2.339931011199951, "learning_rate": 1.2205901218729956e-06, "loss": 0.9383, "step": 63025 }, { "epoch": 0.7681620416072539, "grad_norm": 2.0744566917419434, "learning_rate": 1.2202694034637589e-06, "loss": 0.9309, "step": 63030 }, { "epoch": 0.7682229778314017, "grad_norm": 2.102482318878174, "learning_rate": 1.2199486850545223e-06, "loss": 0.8203, "step": 63035 }, { "epoch": 0.7682839140555494, "grad_norm": 2.061511754989624, "learning_rate": 1.2196279666452855e-06, "loss": 0.8611, "step": 63040 }, { "epoch": 0.7683448502796972, "grad_norm": 1.8619343042373657, "learning_rate": 1.2193072482360488e-06, "loss": 0.8774, "step": 63045 }, { "epoch": 0.7684057865038451, "grad_norm": 2.108844041824341, "learning_rate": 1.2189865298268122e-06, "loss": 0.856, "step": 63050 }, { "epoch": 0.7684667227279929, "grad_norm": 1.951623558998108, "learning_rate": 1.2186658114175754e-06, "loss": 0.8317, "step": 63055 }, { "epoch": 0.7685276589521407, "grad_norm": 1.779855489730835, "learning_rate": 1.2183450930083387e-06, "loss": 0.8089, "step": 63060 }, { "epoch": 0.7685885951762885, "grad_norm": 2.5559849739074707, "learning_rate": 1.218024374599102e-06, "loss": 0.8647, "step": 63065 }, { "epoch": 0.7686495314004363, "grad_norm": 1.7111848592758179, "learning_rate": 1.2177036561898655e-06, "loss": 0.8495, "step": 63070 }, { "epoch": 0.7687104676245841, "grad_norm": 1.6274054050445557, "learning_rate": 1.2173829377806288e-06, "loss": 0.8292, "step": 63075 }, { "epoch": 0.7687714038487319, "grad_norm": 1.923490047454834, "learning_rate": 1.217062219371392e-06, "loss": 0.8405, "step": 63080 }, { "epoch": 0.7688323400728797, "grad_norm": 2.2272751331329346, "learning_rate": 1.2167415009621552e-06, "loss": 0.7793, "step": 63085 }, { "epoch": 0.7688932762970275, "grad_norm": 1.7196346521377563, "learning_rate": 1.2164207825529187e-06, "loss": 0.7862, "step": 63090 }, { "epoch": 0.7689542125211754, "grad_norm": 1.9165618419647217, "learning_rate": 1.2161000641436819e-06, "loss": 0.8532, "step": 63095 }, { "epoch": 0.7690151487453232, "grad_norm": 2.0324912071228027, "learning_rate": 1.2157793457344451e-06, "loss": 0.8405, "step": 63100 }, { "epoch": 0.769076084969471, "grad_norm": 1.8775538206100464, "learning_rate": 1.2154586273252086e-06, "loss": 0.7768, "step": 63105 }, { "epoch": 0.7691370211936187, "grad_norm": 1.9145432710647583, "learning_rate": 1.215137908915972e-06, "loss": 0.7995, "step": 63110 }, { "epoch": 0.7691979574177665, "grad_norm": 1.7786909341812134, "learning_rate": 1.2148171905067352e-06, "loss": 0.8146, "step": 63115 }, { "epoch": 0.7692588936419144, "grad_norm": 2.143585681915283, "learning_rate": 1.2144964720974985e-06, "loss": 0.809, "step": 63120 }, { "epoch": 0.7693198298660622, "grad_norm": 1.9380801916122437, "learning_rate": 1.2141757536882617e-06, "loss": 0.8187, "step": 63125 }, { "epoch": 0.76938076609021, "grad_norm": 2.098536729812622, "learning_rate": 1.2138550352790251e-06, "loss": 0.7482, "step": 63130 }, { "epoch": 0.7694417023143578, "grad_norm": 1.943881869316101, "learning_rate": 1.2135343168697884e-06, "loss": 0.8139, "step": 63135 }, { "epoch": 0.7695026385385056, "grad_norm": 1.5547101497650146, "learning_rate": 1.2132135984605518e-06, "loss": 0.8306, "step": 63140 }, { "epoch": 0.7695635747626534, "grad_norm": 2.3628227710723877, "learning_rate": 1.212892880051315e-06, "loss": 0.8254, "step": 63145 }, { "epoch": 0.7696245109868012, "grad_norm": 1.7691673040390015, "learning_rate": 1.2125721616420785e-06, "loss": 0.8284, "step": 63150 }, { "epoch": 0.769685447210949, "grad_norm": 2.194289207458496, "learning_rate": 1.2122514432328417e-06, "loss": 0.7613, "step": 63155 }, { "epoch": 0.7697463834350968, "grad_norm": 2.2708568572998047, "learning_rate": 1.211930724823605e-06, "loss": 0.8514, "step": 63160 }, { "epoch": 0.7698073196592446, "grad_norm": 2.2058396339416504, "learning_rate": 1.2116100064143684e-06, "loss": 0.8017, "step": 63165 }, { "epoch": 0.7698682558833925, "grad_norm": 2.0880634784698486, "learning_rate": 1.2112892880051316e-06, "loss": 0.8233, "step": 63170 }, { "epoch": 0.7699291921075403, "grad_norm": 1.9387646913528442, "learning_rate": 1.2109685695958948e-06, "loss": 0.8269, "step": 63175 }, { "epoch": 0.769990128331688, "grad_norm": 1.7521476745605469, "learning_rate": 1.2106478511866583e-06, "loss": 0.8667, "step": 63180 }, { "epoch": 0.7700510645558358, "grad_norm": 2.555830478668213, "learning_rate": 1.2103271327774215e-06, "loss": 0.8254, "step": 63185 }, { "epoch": 0.7701120007799837, "grad_norm": 1.9615836143493652, "learning_rate": 1.210006414368185e-06, "loss": 0.8691, "step": 63190 }, { "epoch": 0.7701729370041315, "grad_norm": 1.832857608795166, "learning_rate": 1.2096856959589482e-06, "loss": 0.7612, "step": 63195 }, { "epoch": 0.7702338732282793, "grad_norm": 1.7278205156326294, "learning_rate": 1.2093649775497114e-06, "loss": 0.8113, "step": 63200 }, { "epoch": 0.7702948094524271, "grad_norm": 2.100116729736328, "learning_rate": 1.2090442591404748e-06, "loss": 0.8581, "step": 63205 }, { "epoch": 0.7703557456765749, "grad_norm": 2.0057313442230225, "learning_rate": 1.208723540731238e-06, "loss": 0.8466, "step": 63210 }, { "epoch": 0.7704166819007227, "grad_norm": 1.7449159622192383, "learning_rate": 1.2084028223220013e-06, "loss": 0.8035, "step": 63215 }, { "epoch": 0.7704776181248705, "grad_norm": 2.3140296936035156, "learning_rate": 1.2080821039127647e-06, "loss": 0.8066, "step": 63220 }, { "epoch": 0.7705385543490183, "grad_norm": 1.8049583435058594, "learning_rate": 1.2077613855035282e-06, "loss": 0.8221, "step": 63225 }, { "epoch": 0.7705994905731661, "grad_norm": 2.1539084911346436, "learning_rate": 1.2074406670942914e-06, "loss": 0.8719, "step": 63230 }, { "epoch": 0.770660426797314, "grad_norm": 2.168482542037964, "learning_rate": 1.2071199486850546e-06, "loss": 0.8104, "step": 63235 }, { "epoch": 0.7707213630214618, "grad_norm": 2.039461135864258, "learning_rate": 1.2067992302758178e-06, "loss": 0.7934, "step": 63240 }, { "epoch": 0.7707822992456096, "grad_norm": 1.9194315671920776, "learning_rate": 1.2064785118665813e-06, "loss": 0.8324, "step": 63245 }, { "epoch": 0.7708432354697573, "grad_norm": 1.757789969444275, "learning_rate": 1.2061577934573445e-06, "loss": 0.8137, "step": 63250 }, { "epoch": 0.7709041716939051, "grad_norm": 2.0393338203430176, "learning_rate": 1.2058370750481077e-06, "loss": 0.874, "step": 63255 }, { "epoch": 0.770965107918053, "grad_norm": 1.7718794345855713, "learning_rate": 1.2055163566388712e-06, "loss": 0.8418, "step": 63260 }, { "epoch": 0.7710260441422008, "grad_norm": 2.270209789276123, "learning_rate": 1.2051956382296346e-06, "loss": 0.8519, "step": 63265 }, { "epoch": 0.7710869803663486, "grad_norm": 1.73362398147583, "learning_rate": 1.2048749198203979e-06, "loss": 0.853, "step": 63270 }, { "epoch": 0.7711479165904964, "grad_norm": 1.8478176593780518, "learning_rate": 1.204554201411161e-06, "loss": 0.8034, "step": 63275 }, { "epoch": 0.7712088528146442, "grad_norm": 2.2498457431793213, "learning_rate": 1.2042334830019243e-06, "loss": 0.7553, "step": 63280 }, { "epoch": 0.771269789038792, "grad_norm": 2.005073308944702, "learning_rate": 1.2039127645926878e-06, "loss": 0.7747, "step": 63285 }, { "epoch": 0.7713307252629398, "grad_norm": 1.9334508180618286, "learning_rate": 1.203592046183451e-06, "loss": 0.8147, "step": 63290 }, { "epoch": 0.7713916614870876, "grad_norm": 2.237710952758789, "learning_rate": 1.2032713277742144e-06, "loss": 0.8614, "step": 63295 }, { "epoch": 0.7714525977112354, "grad_norm": 1.637028694152832, "learning_rate": 1.2029506093649777e-06, "loss": 0.8368, "step": 63300 }, { "epoch": 0.7715135339353832, "grad_norm": 2.0153069496154785, "learning_rate": 1.202629890955741e-06, "loss": 0.7712, "step": 63305 }, { "epoch": 0.7715744701595311, "grad_norm": 1.7341444492340088, "learning_rate": 1.2023091725465043e-06, "loss": 0.7481, "step": 63310 }, { "epoch": 0.7716354063836789, "grad_norm": 1.9826854467391968, "learning_rate": 1.2019884541372675e-06, "loss": 0.8099, "step": 63315 }, { "epoch": 0.7716963426078266, "grad_norm": 2.008669137954712, "learning_rate": 1.2016677357280308e-06, "loss": 0.8227, "step": 63320 }, { "epoch": 0.7717572788319744, "grad_norm": 1.5231012105941772, "learning_rate": 1.2013470173187942e-06, "loss": 0.8271, "step": 63325 }, { "epoch": 0.7718182150561222, "grad_norm": 2.092792272567749, "learning_rate": 1.2010262989095574e-06, "loss": 0.7751, "step": 63330 }, { "epoch": 0.7718791512802701, "grad_norm": 3.1727614402770996, "learning_rate": 1.2007055805003209e-06, "loss": 0.7727, "step": 63335 }, { "epoch": 0.7719400875044179, "grad_norm": 2.0190176963806152, "learning_rate": 1.2003848620910841e-06, "loss": 0.8207, "step": 63340 }, { "epoch": 0.7720010237285657, "grad_norm": 2.4180679321289062, "learning_rate": 1.2000641436818476e-06, "loss": 0.8616, "step": 63345 }, { "epoch": 0.7720619599527135, "grad_norm": 1.9691649675369263, "learning_rate": 1.1997434252726108e-06, "loss": 0.783, "step": 63350 }, { "epoch": 0.7721228961768613, "grad_norm": 2.181273937225342, "learning_rate": 1.199422706863374e-06, "loss": 0.7703, "step": 63355 }, { "epoch": 0.7721838324010091, "grad_norm": 1.7158845663070679, "learning_rate": 1.1991019884541372e-06, "loss": 0.7894, "step": 63360 }, { "epoch": 0.7722447686251569, "grad_norm": 2.0382180213928223, "learning_rate": 1.1987812700449007e-06, "loss": 0.754, "step": 63365 }, { "epoch": 0.7723057048493047, "grad_norm": 1.7180461883544922, "learning_rate": 1.198460551635664e-06, "loss": 0.7984, "step": 63370 }, { "epoch": 0.7723666410734525, "grad_norm": 1.9572110176086426, "learning_rate": 1.1981398332264273e-06, "loss": 0.8464, "step": 63375 }, { "epoch": 0.7724275772976004, "grad_norm": 1.8245844841003418, "learning_rate": 1.1978191148171906e-06, "loss": 0.8272, "step": 63380 }, { "epoch": 0.7724885135217482, "grad_norm": 1.762891411781311, "learning_rate": 1.197498396407954e-06, "loss": 0.7978, "step": 63385 }, { "epoch": 0.7725494497458959, "grad_norm": 1.97169828414917, "learning_rate": 1.1971776779987172e-06, "loss": 0.7912, "step": 63390 }, { "epoch": 0.7726103859700437, "grad_norm": 2.2409708499908447, "learning_rate": 1.1968569595894805e-06, "loss": 0.779, "step": 63395 }, { "epoch": 0.7726713221941915, "grad_norm": 1.8122508525848389, "learning_rate": 1.1965362411802437e-06, "loss": 0.7744, "step": 63400 }, { "epoch": 0.7727322584183394, "grad_norm": 2.28170108795166, "learning_rate": 1.1962155227710071e-06, "loss": 0.8296, "step": 63405 }, { "epoch": 0.7727931946424872, "grad_norm": 1.920878529548645, "learning_rate": 1.1958948043617704e-06, "loss": 0.8536, "step": 63410 }, { "epoch": 0.772854130866635, "grad_norm": 3.965242385864258, "learning_rate": 1.1955740859525338e-06, "loss": 0.8115, "step": 63415 }, { "epoch": 0.7729150670907828, "grad_norm": 1.9380648136138916, "learning_rate": 1.195253367543297e-06, "loss": 0.8143, "step": 63420 }, { "epoch": 0.7729760033149305, "grad_norm": 1.6970300674438477, "learning_rate": 1.1949326491340605e-06, "loss": 0.8412, "step": 63425 }, { "epoch": 0.7730369395390784, "grad_norm": 1.8541605472564697, "learning_rate": 1.1946119307248237e-06, "loss": 0.7793, "step": 63430 }, { "epoch": 0.7730978757632262, "grad_norm": 1.9568908214569092, "learning_rate": 1.194291212315587e-06, "loss": 0.798, "step": 63435 }, { "epoch": 0.773158811987374, "grad_norm": 1.9610427618026733, "learning_rate": 1.1939704939063502e-06, "loss": 0.7925, "step": 63440 }, { "epoch": 0.7732197482115218, "grad_norm": 1.8591550588607788, "learning_rate": 1.1936497754971136e-06, "loss": 0.8434, "step": 63445 }, { "epoch": 0.7732806844356697, "grad_norm": 1.8532192707061768, "learning_rate": 1.1933290570878768e-06, "loss": 0.803, "step": 63450 }, { "epoch": 0.7733416206598175, "grad_norm": 1.987360954284668, "learning_rate": 1.1930083386786403e-06, "loss": 0.7734, "step": 63455 }, { "epoch": 0.7734025568839652, "grad_norm": 1.883995532989502, "learning_rate": 1.1926876202694037e-06, "loss": 0.8004, "step": 63460 }, { "epoch": 0.773463493108113, "grad_norm": 2.1291263103485107, "learning_rate": 1.192366901860167e-06, "loss": 0.8802, "step": 63465 }, { "epoch": 0.7735244293322608, "grad_norm": 1.7735439538955688, "learning_rate": 1.1920461834509302e-06, "loss": 0.8883, "step": 63470 }, { "epoch": 0.7735853655564087, "grad_norm": 2.0401790142059326, "learning_rate": 1.1917254650416934e-06, "loss": 0.8595, "step": 63475 }, { "epoch": 0.7736463017805565, "grad_norm": 1.9800420999526978, "learning_rate": 1.1914047466324568e-06, "loss": 0.7671, "step": 63480 }, { "epoch": 0.7737072380047043, "grad_norm": 2.305917739868164, "learning_rate": 1.19108402822322e-06, "loss": 0.773, "step": 63485 }, { "epoch": 0.7737681742288521, "grad_norm": 2.2227654457092285, "learning_rate": 1.1907633098139835e-06, "loss": 0.8419, "step": 63490 }, { "epoch": 0.7738291104529998, "grad_norm": 1.7549351453781128, "learning_rate": 1.1904425914047467e-06, "loss": 0.8232, "step": 63495 }, { "epoch": 0.7738900466771477, "grad_norm": 2.298276901245117, "learning_rate": 1.1901218729955102e-06, "loss": 0.828, "step": 63500 }, { "epoch": 0.7739509829012955, "grad_norm": 2.0169496536254883, "learning_rate": 1.1898011545862734e-06, "loss": 0.8295, "step": 63505 }, { "epoch": 0.7740119191254433, "grad_norm": 2.468008279800415, "learning_rate": 1.1894804361770366e-06, "loss": 0.8617, "step": 63510 }, { "epoch": 0.7740728553495911, "grad_norm": 1.9944871664047241, "learning_rate": 1.1891597177677999e-06, "loss": 0.7661, "step": 63515 }, { "epoch": 0.774133791573739, "grad_norm": 2.3683102130889893, "learning_rate": 1.1888389993585633e-06, "loss": 0.8574, "step": 63520 }, { "epoch": 0.7741947277978868, "grad_norm": 2.0056042671203613, "learning_rate": 1.1885182809493265e-06, "loss": 0.8887, "step": 63525 }, { "epoch": 0.7742556640220345, "grad_norm": 2.5444254875183105, "learning_rate": 1.18819756254009e-06, "loss": 0.8154, "step": 63530 }, { "epoch": 0.7743166002461823, "grad_norm": 2.0258374214172363, "learning_rate": 1.1878768441308532e-06, "loss": 0.852, "step": 63535 }, { "epoch": 0.7743775364703301, "grad_norm": 1.7683883905410767, "learning_rate": 1.1875561257216166e-06, "loss": 0.8606, "step": 63540 }, { "epoch": 0.774438472694478, "grad_norm": 2.1383655071258545, "learning_rate": 1.1872354073123799e-06, "loss": 0.7786, "step": 63545 }, { "epoch": 0.7744994089186258, "grad_norm": 2.0073463916778564, "learning_rate": 1.186914688903143e-06, "loss": 0.8319, "step": 63550 }, { "epoch": 0.7745603451427736, "grad_norm": 2.074549674987793, "learning_rate": 1.1865939704939063e-06, "loss": 0.8395, "step": 63555 }, { "epoch": 0.7746212813669214, "grad_norm": 1.9571495056152344, "learning_rate": 1.1862732520846698e-06, "loss": 0.8672, "step": 63560 }, { "epoch": 0.7746822175910691, "grad_norm": 1.9590622186660767, "learning_rate": 1.185952533675433e-06, "loss": 0.8584, "step": 63565 }, { "epoch": 0.774743153815217, "grad_norm": 1.969184398651123, "learning_rate": 1.1856318152661964e-06, "loss": 0.8411, "step": 63570 }, { "epoch": 0.7748040900393648, "grad_norm": 2.148684501647949, "learning_rate": 1.1853110968569597e-06, "loss": 0.8243, "step": 63575 }, { "epoch": 0.7748650262635126, "grad_norm": 1.7558062076568604, "learning_rate": 1.1849903784477231e-06, "loss": 0.8066, "step": 63580 }, { "epoch": 0.7749259624876604, "grad_norm": 2.1678524017333984, "learning_rate": 1.1846696600384863e-06, "loss": 0.8479, "step": 63585 }, { "epoch": 0.7749868987118083, "grad_norm": 1.7322731018066406, "learning_rate": 1.1843489416292496e-06, "loss": 0.8495, "step": 63590 }, { "epoch": 0.7750478349359561, "grad_norm": 1.669014573097229, "learning_rate": 1.1840282232200128e-06, "loss": 0.7775, "step": 63595 }, { "epoch": 0.7751087711601038, "grad_norm": 1.8031566143035889, "learning_rate": 1.1837075048107762e-06, "loss": 0.8239, "step": 63600 }, { "epoch": 0.7751697073842516, "grad_norm": 1.8365577459335327, "learning_rate": 1.1833867864015395e-06, "loss": 0.789, "step": 63605 }, { "epoch": 0.7752306436083994, "grad_norm": 2.358477830886841, "learning_rate": 1.183066067992303e-06, "loss": 0.8305, "step": 63610 }, { "epoch": 0.7752915798325473, "grad_norm": 2.265667676925659, "learning_rate": 1.1827453495830661e-06, "loss": 0.8391, "step": 63615 }, { "epoch": 0.7753525160566951, "grad_norm": 2.2829272747039795, "learning_rate": 1.1824246311738296e-06, "loss": 0.8112, "step": 63620 }, { "epoch": 0.7754134522808429, "grad_norm": 1.8174833059310913, "learning_rate": 1.1821039127645928e-06, "loss": 0.8202, "step": 63625 }, { "epoch": 0.7754743885049907, "grad_norm": 1.7898942232131958, "learning_rate": 1.181783194355356e-06, "loss": 0.8449, "step": 63630 }, { "epoch": 0.7755353247291384, "grad_norm": 2.512967586517334, "learning_rate": 1.1814624759461193e-06, "loss": 0.7854, "step": 63635 }, { "epoch": 0.7755962609532863, "grad_norm": 1.6873390674591064, "learning_rate": 1.1811417575368827e-06, "loss": 0.8405, "step": 63640 }, { "epoch": 0.7756571971774341, "grad_norm": 1.7909654378890991, "learning_rate": 1.1808210391276461e-06, "loss": 0.905, "step": 63645 }, { "epoch": 0.7757181334015819, "grad_norm": 2.1615638732910156, "learning_rate": 1.1805003207184094e-06, "loss": 0.8564, "step": 63650 }, { "epoch": 0.7757790696257297, "grad_norm": 1.7533913850784302, "learning_rate": 1.1801796023091726e-06, "loss": 0.7605, "step": 63655 }, { "epoch": 0.7758400058498776, "grad_norm": 1.9045579433441162, "learning_rate": 1.179858883899936e-06, "loss": 0.7912, "step": 63660 }, { "epoch": 0.7759009420740254, "grad_norm": 1.579532265663147, "learning_rate": 1.1795381654906993e-06, "loss": 0.7885, "step": 63665 }, { "epoch": 0.7759618782981731, "grad_norm": 1.917103886604309, "learning_rate": 1.1792174470814625e-06, "loss": 0.6846, "step": 63670 }, { "epoch": 0.7760228145223209, "grad_norm": 1.909380555152893, "learning_rate": 1.1788967286722257e-06, "loss": 0.8512, "step": 63675 }, { "epoch": 0.7760837507464687, "grad_norm": 1.9701837301254272, "learning_rate": 1.1785760102629892e-06, "loss": 0.82, "step": 63680 }, { "epoch": 0.7761446869706166, "grad_norm": 1.8617230653762817, "learning_rate": 1.1782552918537526e-06, "loss": 0.8028, "step": 63685 }, { "epoch": 0.7762056231947644, "grad_norm": 1.8840430974960327, "learning_rate": 1.1779345734445158e-06, "loss": 0.7811, "step": 63690 }, { "epoch": 0.7762665594189122, "grad_norm": 1.9402638673782349, "learning_rate": 1.177613855035279e-06, "loss": 0.7471, "step": 63695 }, { "epoch": 0.77632749564306, "grad_norm": 1.9971619844436646, "learning_rate": 1.1772931366260425e-06, "loss": 0.763, "step": 63700 }, { "epoch": 0.7763884318672077, "grad_norm": 1.910717487335205, "learning_rate": 1.1769724182168057e-06, "loss": 0.7662, "step": 63705 }, { "epoch": 0.7764493680913556, "grad_norm": 1.7530813217163086, "learning_rate": 1.176651699807569e-06, "loss": 0.7927, "step": 63710 }, { "epoch": 0.7765103043155034, "grad_norm": 2.397965431213379, "learning_rate": 1.1763309813983324e-06, "loss": 0.8086, "step": 63715 }, { "epoch": 0.7765712405396512, "grad_norm": 1.8856899738311768, "learning_rate": 1.1760102629890956e-06, "loss": 0.9133, "step": 63720 }, { "epoch": 0.776632176763799, "grad_norm": 1.7195206880569458, "learning_rate": 1.175689544579859e-06, "loss": 0.8547, "step": 63725 }, { "epoch": 0.7766931129879469, "grad_norm": 1.97159743309021, "learning_rate": 1.1753688261706223e-06, "loss": 0.8451, "step": 63730 }, { "epoch": 0.7767540492120947, "grad_norm": 2.4144654273986816, "learning_rate": 1.1750481077613855e-06, "loss": 0.8043, "step": 63735 }, { "epoch": 0.7768149854362424, "grad_norm": 1.9978941679000854, "learning_rate": 1.174727389352149e-06, "loss": 0.8356, "step": 63740 }, { "epoch": 0.7768759216603902, "grad_norm": 2.1203432083129883, "learning_rate": 1.1744066709429122e-06, "loss": 0.7729, "step": 63745 }, { "epoch": 0.776936857884538, "grad_norm": 2.2523066997528076, "learning_rate": 1.1740859525336754e-06, "loss": 0.823, "step": 63750 }, { "epoch": 0.7769977941086859, "grad_norm": 1.8934342861175537, "learning_rate": 1.1737652341244389e-06, "loss": 0.8009, "step": 63755 }, { "epoch": 0.7770587303328337, "grad_norm": 2.1704261302948, "learning_rate": 1.173444515715202e-06, "loss": 0.8264, "step": 63760 }, { "epoch": 0.7771196665569815, "grad_norm": 1.8167294263839722, "learning_rate": 1.1731237973059655e-06, "loss": 0.8518, "step": 63765 }, { "epoch": 0.7771806027811292, "grad_norm": 1.7142364978790283, "learning_rate": 1.1728030788967288e-06, "loss": 0.8232, "step": 63770 }, { "epoch": 0.777241539005277, "grad_norm": 1.9285380840301514, "learning_rate": 1.1724823604874922e-06, "loss": 0.8697, "step": 63775 }, { "epoch": 0.7773024752294249, "grad_norm": 1.7061901092529297, "learning_rate": 1.1721616420782554e-06, "loss": 0.8241, "step": 63780 }, { "epoch": 0.7773634114535727, "grad_norm": 1.796431064605713, "learning_rate": 1.1718409236690187e-06, "loss": 0.8231, "step": 63785 }, { "epoch": 0.7774243476777205, "grad_norm": 2.058990001678467, "learning_rate": 1.1715202052597819e-06, "loss": 0.8758, "step": 63790 }, { "epoch": 0.7774852839018683, "grad_norm": 2.2870500087738037, "learning_rate": 1.1711994868505453e-06, "loss": 0.828, "step": 63795 }, { "epoch": 0.7775462201260162, "grad_norm": 1.6444171667099, "learning_rate": 1.1708787684413086e-06, "loss": 0.7965, "step": 63800 }, { "epoch": 0.7776071563501639, "grad_norm": 2.100637435913086, "learning_rate": 1.170558050032072e-06, "loss": 0.7518, "step": 63805 }, { "epoch": 0.7776680925743117, "grad_norm": 1.797539472579956, "learning_rate": 1.1702373316228352e-06, "loss": 0.8028, "step": 63810 }, { "epoch": 0.7777290287984595, "grad_norm": 1.809799313545227, "learning_rate": 1.1699166132135987e-06, "loss": 0.8218, "step": 63815 }, { "epoch": 0.7777899650226073, "grad_norm": 1.8464829921722412, "learning_rate": 1.1695958948043619e-06, "loss": 0.7629, "step": 63820 }, { "epoch": 0.7778509012467552, "grad_norm": 2.4307188987731934, "learning_rate": 1.1692751763951251e-06, "loss": 0.8549, "step": 63825 }, { "epoch": 0.777911837470903, "grad_norm": 2.211909294128418, "learning_rate": 1.1689544579858883e-06, "loss": 0.8366, "step": 63830 }, { "epoch": 0.7779727736950508, "grad_norm": 1.8830960988998413, "learning_rate": 1.1686337395766518e-06, "loss": 0.7953, "step": 63835 }, { "epoch": 0.7780337099191985, "grad_norm": 2.1669065952301025, "learning_rate": 1.1683130211674152e-06, "loss": 0.8453, "step": 63840 }, { "epoch": 0.7780946461433463, "grad_norm": 2.016920804977417, "learning_rate": 1.1679923027581785e-06, "loss": 0.8313, "step": 63845 }, { "epoch": 0.7781555823674942, "grad_norm": 2.138890027999878, "learning_rate": 1.1676715843489417e-06, "loss": 0.7769, "step": 63850 }, { "epoch": 0.778216518591642, "grad_norm": 1.9746698141098022, "learning_rate": 1.1673508659397051e-06, "loss": 0.8026, "step": 63855 }, { "epoch": 0.7782774548157898, "grad_norm": 2.026801586151123, "learning_rate": 1.1670301475304684e-06, "loss": 0.8865, "step": 63860 }, { "epoch": 0.7783383910399376, "grad_norm": 2.0411410331726074, "learning_rate": 1.1667094291212316e-06, "loss": 0.8448, "step": 63865 }, { "epoch": 0.7783993272640854, "grad_norm": 1.831388235092163, "learning_rate": 1.166388710711995e-06, "loss": 0.8176, "step": 63870 }, { "epoch": 0.7784602634882332, "grad_norm": 2.1050379276275635, "learning_rate": 1.1660679923027582e-06, "loss": 0.8297, "step": 63875 }, { "epoch": 0.778521199712381, "grad_norm": 1.8443430662155151, "learning_rate": 1.1657472738935217e-06, "loss": 0.8244, "step": 63880 }, { "epoch": 0.7785821359365288, "grad_norm": 1.7172048091888428, "learning_rate": 1.165426555484285e-06, "loss": 0.7895, "step": 63885 }, { "epoch": 0.7786430721606766, "grad_norm": 2.0469255447387695, "learning_rate": 1.1651058370750481e-06, "loss": 0.8496, "step": 63890 }, { "epoch": 0.7787040083848245, "grad_norm": 2.352360248565674, "learning_rate": 1.1647851186658116e-06, "loss": 0.8792, "step": 63895 }, { "epoch": 0.7787649446089723, "grad_norm": 2.0397093296051025, "learning_rate": 1.1644644002565748e-06, "loss": 0.7806, "step": 63900 }, { "epoch": 0.7788258808331201, "grad_norm": 1.876085638999939, "learning_rate": 1.164143681847338e-06, "loss": 0.7638, "step": 63905 }, { "epoch": 0.7788868170572678, "grad_norm": 1.7135697603225708, "learning_rate": 1.1638229634381015e-06, "loss": 0.7923, "step": 63910 }, { "epoch": 0.7789477532814156, "grad_norm": 1.7023321390151978, "learning_rate": 1.1635022450288647e-06, "loss": 0.8883, "step": 63915 }, { "epoch": 0.7790086895055635, "grad_norm": 1.8371245861053467, "learning_rate": 1.1631815266196282e-06, "loss": 0.8167, "step": 63920 }, { "epoch": 0.7790696257297113, "grad_norm": 1.8697419166564941, "learning_rate": 1.1628608082103914e-06, "loss": 0.8265, "step": 63925 }, { "epoch": 0.7791305619538591, "grad_norm": 1.7819571495056152, "learning_rate": 1.1625400898011546e-06, "loss": 0.7382, "step": 63930 }, { "epoch": 0.7791914981780069, "grad_norm": 1.986760139465332, "learning_rate": 1.162219371391918e-06, "loss": 0.7937, "step": 63935 }, { "epoch": 0.7792524344021547, "grad_norm": 2.2185921669006348, "learning_rate": 1.1618986529826813e-06, "loss": 0.8707, "step": 63940 }, { "epoch": 0.7793133706263025, "grad_norm": 1.958683967590332, "learning_rate": 1.1615779345734445e-06, "loss": 0.7703, "step": 63945 }, { "epoch": 0.7793743068504503, "grad_norm": 1.7782678604125977, "learning_rate": 1.161257216164208e-06, "loss": 0.8321, "step": 63950 }, { "epoch": 0.7794352430745981, "grad_norm": 1.6192020177841187, "learning_rate": 1.1609364977549712e-06, "loss": 0.7208, "step": 63955 }, { "epoch": 0.7794961792987459, "grad_norm": 1.8734581470489502, "learning_rate": 1.1606157793457346e-06, "loss": 0.7827, "step": 63960 }, { "epoch": 0.7795571155228938, "grad_norm": 1.9375934600830078, "learning_rate": 1.1602950609364978e-06, "loss": 0.8673, "step": 63965 }, { "epoch": 0.7796180517470416, "grad_norm": 1.7686821222305298, "learning_rate": 1.159974342527261e-06, "loss": 0.7859, "step": 63970 }, { "epoch": 0.7796789879711894, "grad_norm": 1.8236018419265747, "learning_rate": 1.1596536241180245e-06, "loss": 0.8091, "step": 63975 }, { "epoch": 0.7797399241953371, "grad_norm": 1.6720346212387085, "learning_rate": 1.1593329057087877e-06, "loss": 0.7989, "step": 63980 }, { "epoch": 0.7798008604194849, "grad_norm": 1.904428243637085, "learning_rate": 1.159012187299551e-06, "loss": 0.7846, "step": 63985 }, { "epoch": 0.7798617966436328, "grad_norm": 1.595602035522461, "learning_rate": 1.1586914688903144e-06, "loss": 0.7054, "step": 63990 }, { "epoch": 0.7799227328677806, "grad_norm": 1.784741997718811, "learning_rate": 1.1583707504810779e-06, "loss": 0.7843, "step": 63995 }, { "epoch": 0.7799836690919284, "grad_norm": 1.9631154537200928, "learning_rate": 1.158050032071841e-06, "loss": 0.8641, "step": 64000 }, { "epoch": 0.7800446053160762, "grad_norm": 1.744702935218811, "learning_rate": 1.1577293136626043e-06, "loss": 0.6533, "step": 64005 }, { "epoch": 0.780105541540224, "grad_norm": 1.8415002822875977, "learning_rate": 1.1574085952533675e-06, "loss": 0.7702, "step": 64010 }, { "epoch": 0.7801664777643718, "grad_norm": 2.042424440383911, "learning_rate": 1.157087876844131e-06, "loss": 0.8369, "step": 64015 }, { "epoch": 0.7802274139885196, "grad_norm": 2.486304759979248, "learning_rate": 1.1567671584348942e-06, "loss": 0.7747, "step": 64020 }, { "epoch": 0.7802883502126674, "grad_norm": 1.8450812101364136, "learning_rate": 1.1564464400256574e-06, "loss": 0.8377, "step": 64025 }, { "epoch": 0.7803492864368152, "grad_norm": 1.805174469947815, "learning_rate": 1.1561257216164209e-06, "loss": 0.7747, "step": 64030 }, { "epoch": 0.780410222660963, "grad_norm": 1.9855592250823975, "learning_rate": 1.1558050032071843e-06, "loss": 0.8018, "step": 64035 }, { "epoch": 0.7804711588851109, "grad_norm": 1.9540209770202637, "learning_rate": 1.1554842847979475e-06, "loss": 0.7717, "step": 64040 }, { "epoch": 0.7805320951092587, "grad_norm": 2.018733024597168, "learning_rate": 1.1551635663887108e-06, "loss": 0.8413, "step": 64045 }, { "epoch": 0.7805930313334064, "grad_norm": 2.252300500869751, "learning_rate": 1.1548428479794742e-06, "loss": 0.867, "step": 64050 }, { "epoch": 0.7806539675575542, "grad_norm": 1.7428785562515259, "learning_rate": 1.1545221295702374e-06, "loss": 0.7939, "step": 64055 }, { "epoch": 0.780714903781702, "grad_norm": 2.062790632247925, "learning_rate": 1.1542014111610007e-06, "loss": 0.7817, "step": 64060 }, { "epoch": 0.7807758400058499, "grad_norm": 1.80805504322052, "learning_rate": 1.1538806927517641e-06, "loss": 0.8031, "step": 64065 }, { "epoch": 0.7808367762299977, "grad_norm": 2.125290870666504, "learning_rate": 1.1535599743425273e-06, "loss": 0.7473, "step": 64070 }, { "epoch": 0.7808977124541455, "grad_norm": 1.810316562652588, "learning_rate": 1.1532392559332908e-06, "loss": 0.7806, "step": 64075 }, { "epoch": 0.7809586486782933, "grad_norm": 1.964257001876831, "learning_rate": 1.152918537524054e-06, "loss": 0.8438, "step": 64080 }, { "epoch": 0.781019584902441, "grad_norm": 1.852169394493103, "learning_rate": 1.1525978191148172e-06, "loss": 0.8286, "step": 64085 }, { "epoch": 0.7810805211265889, "grad_norm": 2.098783493041992, "learning_rate": 1.1522771007055807e-06, "loss": 0.7945, "step": 64090 }, { "epoch": 0.7811414573507367, "grad_norm": 1.4621789455413818, "learning_rate": 1.151956382296344e-06, "loss": 0.7137, "step": 64095 }, { "epoch": 0.7812023935748845, "grad_norm": 2.0102391242980957, "learning_rate": 1.1516356638871071e-06, "loss": 0.8157, "step": 64100 }, { "epoch": 0.7812633297990323, "grad_norm": 2.028517484664917, "learning_rate": 1.1513149454778706e-06, "loss": 0.816, "step": 64105 }, { "epoch": 0.7813242660231802, "grad_norm": 1.827303409576416, "learning_rate": 1.1509942270686338e-06, "loss": 0.8249, "step": 64110 }, { "epoch": 0.781385202247328, "grad_norm": 2.0589253902435303, "learning_rate": 1.1506735086593972e-06, "loss": 0.851, "step": 64115 }, { "epoch": 0.7814461384714757, "grad_norm": 2.074913263320923, "learning_rate": 1.1503527902501605e-06, "loss": 0.9094, "step": 64120 }, { "epoch": 0.7815070746956235, "grad_norm": 1.9514554738998413, "learning_rate": 1.1500320718409237e-06, "loss": 0.8114, "step": 64125 }, { "epoch": 0.7815680109197713, "grad_norm": 1.8801240921020508, "learning_rate": 1.1497113534316871e-06, "loss": 0.7704, "step": 64130 }, { "epoch": 0.7816289471439192, "grad_norm": 2.065112590789795, "learning_rate": 1.1493906350224504e-06, "loss": 0.83, "step": 64135 }, { "epoch": 0.781689883368067, "grad_norm": 1.9536107778549194, "learning_rate": 1.1490699166132136e-06, "loss": 0.8007, "step": 64140 }, { "epoch": 0.7817508195922148, "grad_norm": 1.9790276288986206, "learning_rate": 1.148749198203977e-06, "loss": 0.8063, "step": 64145 }, { "epoch": 0.7818117558163626, "grad_norm": 1.7636991739273071, "learning_rate": 1.1484284797947403e-06, "loss": 0.7819, "step": 64150 }, { "epoch": 0.7818726920405104, "grad_norm": 1.7798806428909302, "learning_rate": 1.1481077613855037e-06, "loss": 0.8198, "step": 64155 }, { "epoch": 0.7819336282646582, "grad_norm": 1.92875337600708, "learning_rate": 1.147787042976267e-06, "loss": 0.75, "step": 64160 }, { "epoch": 0.781994564488806, "grad_norm": 2.635375738143921, "learning_rate": 1.1474663245670302e-06, "loss": 0.8102, "step": 64165 }, { "epoch": 0.7820555007129538, "grad_norm": 1.7964849472045898, "learning_rate": 1.1471456061577936e-06, "loss": 0.8136, "step": 64170 }, { "epoch": 0.7821164369371016, "grad_norm": 1.933982014656067, "learning_rate": 1.1468248877485568e-06, "loss": 0.8205, "step": 64175 }, { "epoch": 0.7821773731612495, "grad_norm": 1.811476707458496, "learning_rate": 1.14650416933932e-06, "loss": 0.7938, "step": 64180 }, { "epoch": 0.7822383093853973, "grad_norm": 1.9007821083068848, "learning_rate": 1.1461834509300835e-06, "loss": 0.7208, "step": 64185 }, { "epoch": 0.782299245609545, "grad_norm": 2.063345432281494, "learning_rate": 1.145862732520847e-06, "loss": 0.8284, "step": 64190 }, { "epoch": 0.7823601818336928, "grad_norm": 1.971948504447937, "learning_rate": 1.1455420141116102e-06, "loss": 0.8018, "step": 64195 }, { "epoch": 0.7824211180578406, "grad_norm": 2.246854543685913, "learning_rate": 1.1452212957023734e-06, "loss": 0.866, "step": 64200 }, { "epoch": 0.7824820542819885, "grad_norm": 2.1135313510894775, "learning_rate": 1.1449005772931366e-06, "loss": 0.8419, "step": 64205 }, { "epoch": 0.7825429905061363, "grad_norm": 2.224088430404663, "learning_rate": 1.1445798588839e-06, "loss": 0.8123, "step": 64210 }, { "epoch": 0.7826039267302841, "grad_norm": 2.304441213607788, "learning_rate": 1.1442591404746633e-06, "loss": 0.899, "step": 64215 }, { "epoch": 0.7826648629544319, "grad_norm": 1.9908645153045654, "learning_rate": 1.1439384220654265e-06, "loss": 0.8185, "step": 64220 }, { "epoch": 0.7827257991785797, "grad_norm": 1.9466272592544556, "learning_rate": 1.14361770365619e-06, "loss": 0.7901, "step": 64225 }, { "epoch": 0.7827867354027275, "grad_norm": 1.8000564575195312, "learning_rate": 1.1432969852469534e-06, "loss": 0.8149, "step": 64230 }, { "epoch": 0.7828476716268753, "grad_norm": 2.0536177158355713, "learning_rate": 1.1429762668377166e-06, "loss": 0.7484, "step": 64235 }, { "epoch": 0.7829086078510231, "grad_norm": 1.9754657745361328, "learning_rate": 1.1426555484284799e-06, "loss": 0.8086, "step": 64240 }, { "epoch": 0.7829695440751709, "grad_norm": 2.0228447914123535, "learning_rate": 1.142334830019243e-06, "loss": 0.8374, "step": 64245 }, { "epoch": 0.7830304802993188, "grad_norm": 2.1678383350372314, "learning_rate": 1.1420141116100065e-06, "loss": 0.8322, "step": 64250 }, { "epoch": 0.7830914165234666, "grad_norm": 1.7901332378387451, "learning_rate": 1.1416933932007698e-06, "loss": 0.8667, "step": 64255 }, { "epoch": 0.7831523527476143, "grad_norm": 1.8328328132629395, "learning_rate": 1.1413726747915332e-06, "loss": 0.8029, "step": 64260 }, { "epoch": 0.7832132889717621, "grad_norm": 2.3514344692230225, "learning_rate": 1.1410519563822964e-06, "loss": 0.773, "step": 64265 }, { "epoch": 0.78327422519591, "grad_norm": 2.709078550338745, "learning_rate": 1.1407312379730599e-06, "loss": 0.8781, "step": 64270 }, { "epoch": 0.7833351614200578, "grad_norm": 2.390637159347534, "learning_rate": 1.140410519563823e-06, "loss": 0.766, "step": 64275 }, { "epoch": 0.7833960976442056, "grad_norm": 2.358039617538452, "learning_rate": 1.1400898011545863e-06, "loss": 0.8109, "step": 64280 }, { "epoch": 0.7834570338683534, "grad_norm": 1.8938839435577393, "learning_rate": 1.1397690827453496e-06, "loss": 0.8133, "step": 64285 }, { "epoch": 0.7835179700925012, "grad_norm": 2.209468126296997, "learning_rate": 1.139448364336113e-06, "loss": 0.7816, "step": 64290 }, { "epoch": 0.783578906316649, "grad_norm": 1.734086513519287, "learning_rate": 1.1391276459268762e-06, "loss": 0.7626, "step": 64295 }, { "epoch": 0.7836398425407968, "grad_norm": 2.069701671600342, "learning_rate": 1.1388069275176397e-06, "loss": 0.8113, "step": 64300 }, { "epoch": 0.7837007787649446, "grad_norm": 2.193920135498047, "learning_rate": 1.1384862091084029e-06, "loss": 0.7933, "step": 64305 }, { "epoch": 0.7837617149890924, "grad_norm": 2.0803725719451904, "learning_rate": 1.1381654906991663e-06, "loss": 0.8212, "step": 64310 }, { "epoch": 0.7838226512132402, "grad_norm": 2.1857540607452393, "learning_rate": 1.1378447722899296e-06, "loss": 0.784, "step": 64315 }, { "epoch": 0.7838835874373881, "grad_norm": 2.0629053115844727, "learning_rate": 1.1375240538806928e-06, "loss": 0.8916, "step": 64320 }, { "epoch": 0.7839445236615359, "grad_norm": 1.7437827587127686, "learning_rate": 1.137203335471456e-06, "loss": 0.8518, "step": 64325 }, { "epoch": 0.7840054598856836, "grad_norm": 2.0752851963043213, "learning_rate": 1.1368826170622195e-06, "loss": 0.7864, "step": 64330 }, { "epoch": 0.7840663961098314, "grad_norm": 1.7405292987823486, "learning_rate": 1.1365618986529827e-06, "loss": 0.8422, "step": 64335 }, { "epoch": 0.7841273323339792, "grad_norm": 2.0148026943206787, "learning_rate": 1.1362411802437461e-06, "loss": 0.8436, "step": 64340 }, { "epoch": 0.7841882685581271, "grad_norm": 1.9856040477752686, "learning_rate": 1.1359204618345094e-06, "loss": 0.8533, "step": 64345 }, { "epoch": 0.7842492047822749, "grad_norm": 1.9251527786254883, "learning_rate": 1.1355997434252728e-06, "loss": 0.8537, "step": 64350 }, { "epoch": 0.7843101410064227, "grad_norm": 1.7881262302398682, "learning_rate": 1.135279025016036e-06, "loss": 0.7775, "step": 64355 }, { "epoch": 0.7843710772305705, "grad_norm": 1.8944432735443115, "learning_rate": 1.1349583066067993e-06, "loss": 0.7895, "step": 64360 }, { "epoch": 0.7844320134547182, "grad_norm": 2.0224666595458984, "learning_rate": 1.1346375881975627e-06, "loss": 0.8119, "step": 64365 }, { "epoch": 0.7844929496788661, "grad_norm": 1.7933343648910522, "learning_rate": 1.134316869788326e-06, "loss": 0.8116, "step": 64370 }, { "epoch": 0.7845538859030139, "grad_norm": 1.7318106889724731, "learning_rate": 1.1339961513790891e-06, "loss": 0.7449, "step": 64375 }, { "epoch": 0.7846148221271617, "grad_norm": 1.8776966333389282, "learning_rate": 1.1336754329698526e-06, "loss": 0.8051, "step": 64380 }, { "epoch": 0.7846757583513095, "grad_norm": 2.0626368522644043, "learning_rate": 1.133354714560616e-06, "loss": 0.7849, "step": 64385 }, { "epoch": 0.7847366945754574, "grad_norm": 2.3055102825164795, "learning_rate": 1.1330339961513793e-06, "loss": 0.8021, "step": 64390 }, { "epoch": 0.7847976307996052, "grad_norm": 2.1383936405181885, "learning_rate": 1.1327132777421425e-06, "loss": 0.8354, "step": 64395 }, { "epoch": 0.7848585670237529, "grad_norm": 1.7998405694961548, "learning_rate": 1.1323925593329057e-06, "loss": 0.8141, "step": 64400 }, { "epoch": 0.7849195032479007, "grad_norm": 2.181178569793701, "learning_rate": 1.1320718409236692e-06, "loss": 0.7567, "step": 64405 }, { "epoch": 0.7849804394720485, "grad_norm": 1.7673676013946533, "learning_rate": 1.1317511225144324e-06, "loss": 0.8151, "step": 64410 }, { "epoch": 0.7850413756961964, "grad_norm": 1.9526492357254028, "learning_rate": 1.1314304041051958e-06, "loss": 0.7982, "step": 64415 }, { "epoch": 0.7851023119203442, "grad_norm": 1.808546781539917, "learning_rate": 1.131109685695959e-06, "loss": 0.7971, "step": 64420 }, { "epoch": 0.785163248144492, "grad_norm": 1.929230809211731, "learning_rate": 1.1307889672867225e-06, "loss": 0.7932, "step": 64425 }, { "epoch": 0.7852241843686398, "grad_norm": 2.2014665603637695, "learning_rate": 1.1304682488774857e-06, "loss": 0.8177, "step": 64430 }, { "epoch": 0.7852851205927875, "grad_norm": 2.0537354946136475, "learning_rate": 1.130147530468249e-06, "loss": 0.7783, "step": 64435 }, { "epoch": 0.7853460568169354, "grad_norm": 1.9387601613998413, "learning_rate": 1.1298268120590122e-06, "loss": 0.7789, "step": 64440 }, { "epoch": 0.7854069930410832, "grad_norm": 1.8339030742645264, "learning_rate": 1.1295060936497756e-06, "loss": 0.7331, "step": 64445 }, { "epoch": 0.785467929265231, "grad_norm": 2.0419232845306396, "learning_rate": 1.1291853752405388e-06, "loss": 0.7789, "step": 64450 }, { "epoch": 0.7855288654893788, "grad_norm": 1.9604988098144531, "learning_rate": 1.1288646568313023e-06, "loss": 0.8363, "step": 64455 }, { "epoch": 0.7855898017135267, "grad_norm": 1.7115477323532104, "learning_rate": 1.1285439384220655e-06, "loss": 0.8598, "step": 64460 }, { "epoch": 0.7856507379376745, "grad_norm": 2.042687177658081, "learning_rate": 1.128223220012829e-06, "loss": 0.7981, "step": 64465 }, { "epoch": 0.7857116741618222, "grad_norm": 2.0795350074768066, "learning_rate": 1.1279025016035922e-06, "loss": 0.8419, "step": 64470 }, { "epoch": 0.78577261038597, "grad_norm": 1.953253149986267, "learning_rate": 1.1275817831943554e-06, "loss": 0.8126, "step": 64475 }, { "epoch": 0.7858335466101178, "grad_norm": 2.119396209716797, "learning_rate": 1.1272610647851186e-06, "loss": 0.8336, "step": 64480 }, { "epoch": 0.7858944828342657, "grad_norm": 2.0167930126190186, "learning_rate": 1.126940346375882e-06, "loss": 0.8674, "step": 64485 }, { "epoch": 0.7859554190584135, "grad_norm": 1.7672724723815918, "learning_rate": 1.1266196279666453e-06, "loss": 0.8671, "step": 64490 }, { "epoch": 0.7860163552825613, "grad_norm": 2.135049343109131, "learning_rate": 1.1262989095574088e-06, "loss": 0.8616, "step": 64495 }, { "epoch": 0.7860772915067091, "grad_norm": 1.8195302486419678, "learning_rate": 1.125978191148172e-06, "loss": 0.8647, "step": 64500 }, { "epoch": 0.7861382277308568, "grad_norm": 1.969638466835022, "learning_rate": 1.1256574727389354e-06, "loss": 0.7254, "step": 64505 }, { "epoch": 0.7861991639550047, "grad_norm": 1.8717790842056274, "learning_rate": 1.1253367543296986e-06, "loss": 0.8127, "step": 64510 }, { "epoch": 0.7862601001791525, "grad_norm": 1.793777585029602, "learning_rate": 1.1250160359204619e-06, "loss": 0.8032, "step": 64515 }, { "epoch": 0.7863210364033003, "grad_norm": 1.9911237955093384, "learning_rate": 1.1246953175112251e-06, "loss": 0.835, "step": 64520 }, { "epoch": 0.7863819726274481, "grad_norm": 2.0409815311431885, "learning_rate": 1.1243745991019885e-06, "loss": 0.7931, "step": 64525 }, { "epoch": 0.786442908851596, "grad_norm": 1.7944306135177612, "learning_rate": 1.1240538806927518e-06, "loss": 0.7921, "step": 64530 }, { "epoch": 0.7865038450757438, "grad_norm": 2.334639072418213, "learning_rate": 1.1237331622835152e-06, "loss": 0.7484, "step": 64535 }, { "epoch": 0.7865647812998915, "grad_norm": 1.9031181335449219, "learning_rate": 1.1234124438742784e-06, "loss": 0.8042, "step": 64540 }, { "epoch": 0.7866257175240393, "grad_norm": 2.0671498775482178, "learning_rate": 1.1230917254650419e-06, "loss": 0.8021, "step": 64545 }, { "epoch": 0.7866866537481871, "grad_norm": 1.8834351301193237, "learning_rate": 1.1227710070558051e-06, "loss": 0.7819, "step": 64550 }, { "epoch": 0.786747589972335, "grad_norm": 2.1924350261688232, "learning_rate": 1.1224502886465683e-06, "loss": 0.7655, "step": 64555 }, { "epoch": 0.7868085261964828, "grad_norm": 2.114090919494629, "learning_rate": 1.1221295702373316e-06, "loss": 0.8077, "step": 64560 }, { "epoch": 0.7868694624206306, "grad_norm": 1.6768834590911865, "learning_rate": 1.121808851828095e-06, "loss": 0.7373, "step": 64565 }, { "epoch": 0.7869303986447784, "grad_norm": 2.0082716941833496, "learning_rate": 1.1214881334188582e-06, "loss": 0.8139, "step": 64570 }, { "epoch": 0.7869913348689261, "grad_norm": 2.2692525386810303, "learning_rate": 1.1211674150096217e-06, "loss": 0.8124, "step": 64575 }, { "epoch": 0.787052271093074, "grad_norm": 2.031851291656494, "learning_rate": 1.120846696600385e-06, "loss": 0.8195, "step": 64580 }, { "epoch": 0.7871132073172218, "grad_norm": 2.187554359436035, "learning_rate": 1.1205259781911483e-06, "loss": 0.802, "step": 64585 }, { "epoch": 0.7871741435413696, "grad_norm": 2.0553829669952393, "learning_rate": 1.1202052597819116e-06, "loss": 0.8725, "step": 64590 }, { "epoch": 0.7872350797655174, "grad_norm": 1.7835313081741333, "learning_rate": 1.1198845413726748e-06, "loss": 0.8185, "step": 64595 }, { "epoch": 0.7872960159896653, "grad_norm": 2.3306374549865723, "learning_rate": 1.119563822963438e-06, "loss": 0.8017, "step": 64600 }, { "epoch": 0.7873569522138131, "grad_norm": 1.8889764547348022, "learning_rate": 1.1192431045542015e-06, "loss": 0.7985, "step": 64605 }, { "epoch": 0.7874178884379608, "grad_norm": 1.9671169519424438, "learning_rate": 1.118922386144965e-06, "loss": 0.7902, "step": 64610 }, { "epoch": 0.7874788246621086, "grad_norm": 1.8079339265823364, "learning_rate": 1.1186016677357281e-06, "loss": 0.8476, "step": 64615 }, { "epoch": 0.7875397608862564, "grad_norm": 2.7626547813415527, "learning_rate": 1.1182809493264914e-06, "loss": 0.8471, "step": 64620 }, { "epoch": 0.7876006971104043, "grad_norm": 2.414998769760132, "learning_rate": 1.1179602309172548e-06, "loss": 0.8014, "step": 64625 }, { "epoch": 0.7876616333345521, "grad_norm": 1.8035287857055664, "learning_rate": 1.117639512508018e-06, "loss": 0.8315, "step": 64630 }, { "epoch": 0.7877225695586999, "grad_norm": 1.950028419494629, "learning_rate": 1.1173187940987813e-06, "loss": 0.844, "step": 64635 }, { "epoch": 0.7877835057828477, "grad_norm": 2.1361870765686035, "learning_rate": 1.1169980756895447e-06, "loss": 0.8275, "step": 64640 }, { "epoch": 0.7878444420069954, "grad_norm": 1.9958122968673706, "learning_rate": 1.116677357280308e-06, "loss": 0.8101, "step": 64645 }, { "epoch": 0.7879053782311433, "grad_norm": 2.0928211212158203, "learning_rate": 1.1163566388710714e-06, "loss": 0.7351, "step": 64650 }, { "epoch": 0.7879663144552911, "grad_norm": 1.7214937210083008, "learning_rate": 1.1160359204618346e-06, "loss": 0.784, "step": 64655 }, { "epoch": 0.7880272506794389, "grad_norm": 2.4821033477783203, "learning_rate": 1.115715202052598e-06, "loss": 0.7715, "step": 64660 }, { "epoch": 0.7880881869035867, "grad_norm": 2.076625108718872, "learning_rate": 1.1153944836433613e-06, "loss": 0.784, "step": 64665 }, { "epoch": 0.7881491231277346, "grad_norm": 1.9785959720611572, "learning_rate": 1.1150737652341245e-06, "loss": 0.7523, "step": 64670 }, { "epoch": 0.7882100593518824, "grad_norm": 1.7198541164398193, "learning_rate": 1.1147530468248877e-06, "loss": 0.7835, "step": 64675 }, { "epoch": 0.7882709955760301, "grad_norm": 1.7742596864700317, "learning_rate": 1.1144323284156512e-06, "loss": 0.8343, "step": 64680 }, { "epoch": 0.7883319318001779, "grad_norm": 2.246781587600708, "learning_rate": 1.1141116100064144e-06, "loss": 0.8171, "step": 64685 }, { "epoch": 0.7883928680243257, "grad_norm": 1.8817371129989624, "learning_rate": 1.1137908915971778e-06, "loss": 0.7836, "step": 64690 }, { "epoch": 0.7884538042484736, "grad_norm": 2.1429460048675537, "learning_rate": 1.113470173187941e-06, "loss": 0.7594, "step": 64695 }, { "epoch": 0.7885147404726214, "grad_norm": 1.773041844367981, "learning_rate": 1.1131494547787045e-06, "loss": 0.8025, "step": 64700 }, { "epoch": 0.7885756766967692, "grad_norm": 1.7463423013687134, "learning_rate": 1.1128287363694677e-06, "loss": 0.7843, "step": 64705 }, { "epoch": 0.7886366129209169, "grad_norm": 1.8952531814575195, "learning_rate": 1.112508017960231e-06, "loss": 0.8282, "step": 64710 }, { "epoch": 0.7886975491450647, "grad_norm": 2.008429765701294, "learning_rate": 1.1121872995509942e-06, "loss": 0.8615, "step": 64715 }, { "epoch": 0.7887584853692126, "grad_norm": 2.279069185256958, "learning_rate": 1.1118665811417576e-06, "loss": 0.8213, "step": 64720 }, { "epoch": 0.7888194215933604, "grad_norm": 1.7173404693603516, "learning_rate": 1.1115458627325209e-06, "loss": 0.7945, "step": 64725 }, { "epoch": 0.7888803578175082, "grad_norm": 1.8891953229904175, "learning_rate": 1.1112251443232843e-06, "loss": 0.7937, "step": 64730 }, { "epoch": 0.788941294041656, "grad_norm": 2.0112972259521484, "learning_rate": 1.1109044259140475e-06, "loss": 0.8109, "step": 64735 }, { "epoch": 0.7890022302658038, "grad_norm": 1.9127211570739746, "learning_rate": 1.110583707504811e-06, "loss": 0.7617, "step": 64740 }, { "epoch": 0.7890631664899516, "grad_norm": 1.869222640991211, "learning_rate": 1.1102629890955742e-06, "loss": 0.8341, "step": 64745 }, { "epoch": 0.7891241027140994, "grad_norm": 1.7758108377456665, "learning_rate": 1.1099422706863374e-06, "loss": 0.7725, "step": 64750 }, { "epoch": 0.7891850389382472, "grad_norm": 1.9138859510421753, "learning_rate": 1.1096215522771007e-06, "loss": 0.8347, "step": 64755 }, { "epoch": 0.789245975162395, "grad_norm": 1.8838924169540405, "learning_rate": 1.109300833867864e-06, "loss": 0.8555, "step": 64760 }, { "epoch": 0.7893069113865429, "grad_norm": 1.7273024320602417, "learning_rate": 1.1089801154586275e-06, "loss": 0.8757, "step": 64765 }, { "epoch": 0.7893678476106907, "grad_norm": 2.0225462913513184, "learning_rate": 1.1086593970493908e-06, "loss": 0.8062, "step": 64770 }, { "epoch": 0.7894287838348385, "grad_norm": 2.0392203330993652, "learning_rate": 1.108338678640154e-06, "loss": 0.739, "step": 64775 }, { "epoch": 0.7894897200589862, "grad_norm": 2.330967903137207, "learning_rate": 1.1080179602309174e-06, "loss": 0.8156, "step": 64780 }, { "epoch": 0.789550656283134, "grad_norm": 1.9353790283203125, "learning_rate": 1.1076972418216807e-06, "loss": 0.7503, "step": 64785 }, { "epoch": 0.7896115925072819, "grad_norm": 1.9214130640029907, "learning_rate": 1.1073765234124439e-06, "loss": 0.7718, "step": 64790 }, { "epoch": 0.7896725287314297, "grad_norm": 1.824367642402649, "learning_rate": 1.1070558050032071e-06, "loss": 0.9031, "step": 64795 }, { "epoch": 0.7897334649555775, "grad_norm": 1.7754361629486084, "learning_rate": 1.1067350865939706e-06, "loss": 0.8023, "step": 64800 }, { "epoch": 0.7897944011797253, "grad_norm": 2.0698835849761963, "learning_rate": 1.106414368184734e-06, "loss": 0.8277, "step": 64805 }, { "epoch": 0.7898553374038731, "grad_norm": 2.1529417037963867, "learning_rate": 1.1060936497754972e-06, "loss": 0.8272, "step": 64810 }, { "epoch": 0.7899162736280209, "grad_norm": 1.8847382068634033, "learning_rate": 1.1057729313662605e-06, "loss": 0.8054, "step": 64815 }, { "epoch": 0.7899772098521687, "grad_norm": 1.7279013395309448, "learning_rate": 1.105452212957024e-06, "loss": 0.954, "step": 64820 }, { "epoch": 0.7900381460763165, "grad_norm": 1.727512240409851, "learning_rate": 1.1051314945477871e-06, "loss": 0.8729, "step": 64825 }, { "epoch": 0.7900990823004643, "grad_norm": 2.175436496734619, "learning_rate": 1.1048107761385504e-06, "loss": 0.7984, "step": 64830 }, { "epoch": 0.7901600185246122, "grad_norm": 2.0162386894226074, "learning_rate": 1.1044900577293138e-06, "loss": 0.8978, "step": 64835 }, { "epoch": 0.79022095474876, "grad_norm": 1.8072127103805542, "learning_rate": 1.104169339320077e-06, "loss": 0.7566, "step": 64840 }, { "epoch": 0.7902818909729078, "grad_norm": 2.0308711528778076, "learning_rate": 1.1038486209108405e-06, "loss": 0.7947, "step": 64845 }, { "epoch": 0.7903428271970555, "grad_norm": 1.9273508787155151, "learning_rate": 1.1035279025016037e-06, "loss": 0.8334, "step": 64850 }, { "epoch": 0.7904037634212033, "grad_norm": 1.9689323902130127, "learning_rate": 1.103207184092367e-06, "loss": 0.8364, "step": 64855 }, { "epoch": 0.7904646996453512, "grad_norm": 2.627608060836792, "learning_rate": 1.1028864656831304e-06, "loss": 0.7681, "step": 64860 }, { "epoch": 0.790525635869499, "grad_norm": 1.8864123821258545, "learning_rate": 1.1025657472738936e-06, "loss": 0.8292, "step": 64865 }, { "epoch": 0.7905865720936468, "grad_norm": 1.6925554275512695, "learning_rate": 1.1022450288646568e-06, "loss": 0.8246, "step": 64870 }, { "epoch": 0.7906475083177946, "grad_norm": 2.342939615249634, "learning_rate": 1.1019243104554203e-06, "loss": 0.8475, "step": 64875 }, { "epoch": 0.7907084445419424, "grad_norm": 2.1140875816345215, "learning_rate": 1.1016035920461835e-06, "loss": 0.7768, "step": 64880 }, { "epoch": 0.7907693807660902, "grad_norm": 1.9043391942977905, "learning_rate": 1.101282873636947e-06, "loss": 0.8143, "step": 64885 }, { "epoch": 0.790830316990238, "grad_norm": 1.8781228065490723, "learning_rate": 1.1009621552277102e-06, "loss": 0.9009, "step": 64890 }, { "epoch": 0.7908912532143858, "grad_norm": 2.2804453372955322, "learning_rate": 1.1006414368184734e-06, "loss": 0.8376, "step": 64895 }, { "epoch": 0.7909521894385336, "grad_norm": 2.006505250930786, "learning_rate": 1.1003207184092368e-06, "loss": 0.8003, "step": 64900 }, { "epoch": 0.7910131256626814, "grad_norm": 2.014758825302124, "learning_rate": 1.1e-06, "loss": 0.8011, "step": 64905 }, { "epoch": 0.7910740618868293, "grad_norm": 1.8827743530273438, "learning_rate": 1.0996792815907633e-06, "loss": 0.7458, "step": 64910 }, { "epoch": 0.7911349981109771, "grad_norm": 2.0330593585968018, "learning_rate": 1.0993585631815267e-06, "loss": 0.8439, "step": 64915 }, { "epoch": 0.7911959343351248, "grad_norm": 2.0285112857818604, "learning_rate": 1.09903784477229e-06, "loss": 0.8647, "step": 64920 }, { "epoch": 0.7912568705592726, "grad_norm": 1.8988670110702515, "learning_rate": 1.0987171263630534e-06, "loss": 0.7788, "step": 64925 }, { "epoch": 0.7913178067834205, "grad_norm": 2.0451765060424805, "learning_rate": 1.0983964079538166e-06, "loss": 0.7368, "step": 64930 }, { "epoch": 0.7913787430075683, "grad_norm": 1.7353111505508423, "learning_rate": 1.0980756895445799e-06, "loss": 0.774, "step": 64935 }, { "epoch": 0.7914396792317161, "grad_norm": 2.300098180770874, "learning_rate": 1.0977549711353433e-06, "loss": 0.8003, "step": 64940 }, { "epoch": 0.7915006154558639, "grad_norm": 2.0686750411987305, "learning_rate": 1.0974342527261065e-06, "loss": 0.8595, "step": 64945 }, { "epoch": 0.7915615516800117, "grad_norm": 1.9729772806167603, "learning_rate": 1.0971135343168697e-06, "loss": 0.8603, "step": 64950 }, { "epoch": 0.7916224879041595, "grad_norm": 2.027702808380127, "learning_rate": 1.0967928159076332e-06, "loss": 0.7583, "step": 64955 }, { "epoch": 0.7916834241283073, "grad_norm": 2.116379499435425, "learning_rate": 1.0964720974983966e-06, "loss": 0.7592, "step": 64960 }, { "epoch": 0.7917443603524551, "grad_norm": 1.984281063079834, "learning_rate": 1.0961513790891599e-06, "loss": 0.8434, "step": 64965 }, { "epoch": 0.7918052965766029, "grad_norm": 1.8868650197982788, "learning_rate": 1.095830660679923e-06, "loss": 0.8259, "step": 64970 }, { "epoch": 0.7918662328007507, "grad_norm": 1.9305245876312256, "learning_rate": 1.0955099422706865e-06, "loss": 0.7872, "step": 64975 }, { "epoch": 0.7919271690248986, "grad_norm": 1.712606430053711, "learning_rate": 1.0951892238614498e-06, "loss": 0.8399, "step": 64980 }, { "epoch": 0.7919881052490464, "grad_norm": 1.8490138053894043, "learning_rate": 1.094868505452213e-06, "loss": 0.8354, "step": 64985 }, { "epoch": 0.7920490414731941, "grad_norm": 2.2635657787323, "learning_rate": 1.0945477870429762e-06, "loss": 0.8348, "step": 64990 }, { "epoch": 0.7921099776973419, "grad_norm": 1.9795173406600952, "learning_rate": 1.0942270686337397e-06, "loss": 0.76, "step": 64995 }, { "epoch": 0.7921709139214897, "grad_norm": 2.0715975761413574, "learning_rate": 1.093906350224503e-06, "loss": 0.8683, "step": 65000 }, { "epoch": 0.7922318501456376, "grad_norm": 2.0722837448120117, "learning_rate": 1.0935856318152663e-06, "loss": 0.7699, "step": 65005 }, { "epoch": 0.7922927863697854, "grad_norm": 1.8703737258911133, "learning_rate": 1.0932649134060295e-06, "loss": 0.8004, "step": 65010 }, { "epoch": 0.7923537225939332, "grad_norm": 1.9582124948501587, "learning_rate": 1.092944194996793e-06, "loss": 0.724, "step": 65015 }, { "epoch": 0.792414658818081, "grad_norm": 1.959810733795166, "learning_rate": 1.0926234765875562e-06, "loss": 0.8024, "step": 65020 }, { "epoch": 0.7924755950422288, "grad_norm": 2.2777793407440186, "learning_rate": 1.0923027581783194e-06, "loss": 0.8535, "step": 65025 }, { "epoch": 0.7925365312663766, "grad_norm": 1.7284085750579834, "learning_rate": 1.0919820397690829e-06, "loss": 0.7315, "step": 65030 }, { "epoch": 0.7925974674905244, "grad_norm": 2.057180404663086, "learning_rate": 1.0916613213598461e-06, "loss": 0.8514, "step": 65035 }, { "epoch": 0.7926584037146722, "grad_norm": 1.7513835430145264, "learning_rate": 1.0913406029506096e-06, "loss": 0.8743, "step": 65040 }, { "epoch": 0.79271933993882, "grad_norm": 2.0612313747406006, "learning_rate": 1.0910198845413728e-06, "loss": 0.8009, "step": 65045 }, { "epoch": 0.7927802761629679, "grad_norm": 1.6902143955230713, "learning_rate": 1.090699166132136e-06, "loss": 0.8395, "step": 65050 }, { "epoch": 0.7928412123871157, "grad_norm": 1.8072062730789185, "learning_rate": 1.0903784477228995e-06, "loss": 0.7932, "step": 65055 }, { "epoch": 0.7929021486112634, "grad_norm": 1.7819671630859375, "learning_rate": 1.0900577293136627e-06, "loss": 0.7817, "step": 65060 }, { "epoch": 0.7929630848354112, "grad_norm": 1.7399888038635254, "learning_rate": 1.089737010904426e-06, "loss": 0.8522, "step": 65065 }, { "epoch": 0.793024021059559, "grad_norm": 1.7700105905532837, "learning_rate": 1.0894162924951894e-06, "loss": 0.7979, "step": 65070 }, { "epoch": 0.7930849572837069, "grad_norm": 1.9808056354522705, "learning_rate": 1.0890955740859526e-06, "loss": 0.8414, "step": 65075 }, { "epoch": 0.7931458935078547, "grad_norm": 1.8517719507217407, "learning_rate": 1.088774855676716e-06, "loss": 0.8166, "step": 65080 }, { "epoch": 0.7932068297320025, "grad_norm": 1.8744105100631714, "learning_rate": 1.0884541372674792e-06, "loss": 0.8159, "step": 65085 }, { "epoch": 0.7932677659561503, "grad_norm": 2.1683177947998047, "learning_rate": 1.0881334188582425e-06, "loss": 0.7952, "step": 65090 }, { "epoch": 0.793328702180298, "grad_norm": 1.7833552360534668, "learning_rate": 1.087812700449006e-06, "loss": 0.7446, "step": 65095 }, { "epoch": 0.7933896384044459, "grad_norm": 2.062366247177124, "learning_rate": 1.0874919820397691e-06, "loss": 0.8367, "step": 65100 }, { "epoch": 0.7934505746285937, "grad_norm": 1.7426385879516602, "learning_rate": 1.0871712636305324e-06, "loss": 0.83, "step": 65105 }, { "epoch": 0.7935115108527415, "grad_norm": 2.043785333633423, "learning_rate": 1.0868505452212958e-06, "loss": 0.8918, "step": 65110 }, { "epoch": 0.7935724470768893, "grad_norm": 1.8981666564941406, "learning_rate": 1.0865298268120593e-06, "loss": 0.7845, "step": 65115 }, { "epoch": 0.7936333833010372, "grad_norm": 1.8357024192810059, "learning_rate": 1.0862091084028225e-06, "loss": 0.8182, "step": 65120 }, { "epoch": 0.793694319525185, "grad_norm": 2.0004658699035645, "learning_rate": 1.0858883899935857e-06, "loss": 0.7562, "step": 65125 }, { "epoch": 0.7937552557493327, "grad_norm": 2.2438082695007324, "learning_rate": 1.085567671584349e-06, "loss": 0.8274, "step": 65130 }, { "epoch": 0.7938161919734805, "grad_norm": 2.3553214073181152, "learning_rate": 1.0852469531751124e-06, "loss": 0.8549, "step": 65135 }, { "epoch": 0.7938771281976283, "grad_norm": 2.939389944076538, "learning_rate": 1.0849262347658756e-06, "loss": 0.8203, "step": 65140 }, { "epoch": 0.7939380644217762, "grad_norm": 2.004476547241211, "learning_rate": 1.0846055163566388e-06, "loss": 0.7586, "step": 65145 }, { "epoch": 0.793999000645924, "grad_norm": 1.9818038940429688, "learning_rate": 1.0842847979474023e-06, "loss": 0.8582, "step": 65150 }, { "epoch": 0.7940599368700718, "grad_norm": 1.8924537897109985, "learning_rate": 1.0839640795381657e-06, "loss": 0.8038, "step": 65155 }, { "epoch": 0.7941208730942196, "grad_norm": 2.0462417602539062, "learning_rate": 1.083643361128929e-06, "loss": 0.811, "step": 65160 }, { "epoch": 0.7941818093183673, "grad_norm": 1.5937449932098389, "learning_rate": 1.0833226427196922e-06, "loss": 0.7863, "step": 65165 }, { "epoch": 0.7942427455425152, "grad_norm": 1.8645683526992798, "learning_rate": 1.0830019243104554e-06, "loss": 0.7871, "step": 65170 }, { "epoch": 0.794303681766663, "grad_norm": 2.3515231609344482, "learning_rate": 1.0826812059012188e-06, "loss": 0.8249, "step": 65175 }, { "epoch": 0.7943646179908108, "grad_norm": 2.132406711578369, "learning_rate": 1.082360487491982e-06, "loss": 0.8218, "step": 65180 }, { "epoch": 0.7944255542149586, "grad_norm": 1.9180598258972168, "learning_rate": 1.0820397690827455e-06, "loss": 0.8877, "step": 65185 }, { "epoch": 0.7944864904391065, "grad_norm": 2.1258862018585205, "learning_rate": 1.0817190506735087e-06, "loss": 0.7309, "step": 65190 }, { "epoch": 0.7945474266632543, "grad_norm": 1.8268405199050903, "learning_rate": 1.0813983322642722e-06, "loss": 0.8739, "step": 65195 }, { "epoch": 0.794608362887402, "grad_norm": 1.4980974197387695, "learning_rate": 1.0810776138550354e-06, "loss": 0.8289, "step": 65200 }, { "epoch": 0.7946692991115498, "grad_norm": 2.0933122634887695, "learning_rate": 1.0807568954457986e-06, "loss": 0.7779, "step": 65205 }, { "epoch": 0.7947302353356976, "grad_norm": 2.0738823413848877, "learning_rate": 1.0804361770365619e-06, "loss": 0.8352, "step": 65210 }, { "epoch": 0.7947911715598455, "grad_norm": 2.148320436477661, "learning_rate": 1.0801154586273253e-06, "loss": 0.8405, "step": 65215 }, { "epoch": 0.7948521077839933, "grad_norm": 2.083409309387207, "learning_rate": 1.0797947402180885e-06, "loss": 0.7997, "step": 65220 }, { "epoch": 0.7949130440081411, "grad_norm": 2.089888095855713, "learning_rate": 1.079474021808852e-06, "loss": 0.8247, "step": 65225 }, { "epoch": 0.7949739802322889, "grad_norm": 1.870793342590332, "learning_rate": 1.0791533033996152e-06, "loss": 0.8653, "step": 65230 }, { "epoch": 0.7950349164564366, "grad_norm": 1.8766974210739136, "learning_rate": 1.0788325849903786e-06, "loss": 0.8097, "step": 65235 }, { "epoch": 0.7950958526805845, "grad_norm": 1.9676709175109863, "learning_rate": 1.0785118665811419e-06, "loss": 0.7948, "step": 65240 }, { "epoch": 0.7951567889047323, "grad_norm": 2.0350492000579834, "learning_rate": 1.078191148171905e-06, "loss": 0.8201, "step": 65245 }, { "epoch": 0.7952177251288801, "grad_norm": 1.8125983476638794, "learning_rate": 1.0778704297626683e-06, "loss": 0.8182, "step": 65250 }, { "epoch": 0.7952786613530279, "grad_norm": 1.9992954730987549, "learning_rate": 1.0775497113534318e-06, "loss": 0.8518, "step": 65255 }, { "epoch": 0.7953395975771758, "grad_norm": 1.7973103523254395, "learning_rate": 1.077228992944195e-06, "loss": 0.8074, "step": 65260 }, { "epoch": 0.7954005338013236, "grad_norm": 1.785598635673523, "learning_rate": 1.0769082745349584e-06, "loss": 0.8082, "step": 65265 }, { "epoch": 0.7954614700254713, "grad_norm": 2.0387158393859863, "learning_rate": 1.0765875561257217e-06, "loss": 0.7651, "step": 65270 }, { "epoch": 0.7955224062496191, "grad_norm": 2.025310754776001, "learning_rate": 1.0762668377164851e-06, "loss": 0.7878, "step": 65275 }, { "epoch": 0.7955833424737669, "grad_norm": 1.8773399591445923, "learning_rate": 1.0759461193072483e-06, "loss": 0.8254, "step": 65280 }, { "epoch": 0.7956442786979148, "grad_norm": 1.8411585092544556, "learning_rate": 1.0756254008980116e-06, "loss": 0.7847, "step": 65285 }, { "epoch": 0.7957052149220626, "grad_norm": 2.032916784286499, "learning_rate": 1.075304682488775e-06, "loss": 0.8425, "step": 65290 }, { "epoch": 0.7957661511462104, "grad_norm": 1.9665281772613525, "learning_rate": 1.0749839640795382e-06, "loss": 0.7784, "step": 65295 }, { "epoch": 0.7958270873703582, "grad_norm": 1.9536484479904175, "learning_rate": 1.0746632456703015e-06, "loss": 0.8321, "step": 65300 }, { "epoch": 0.7958880235945059, "grad_norm": 1.970605731010437, "learning_rate": 1.074342527261065e-06, "loss": 0.8449, "step": 65305 }, { "epoch": 0.7959489598186538, "grad_norm": 2.3303582668304443, "learning_rate": 1.0740218088518283e-06, "loss": 0.8351, "step": 65310 }, { "epoch": 0.7960098960428016, "grad_norm": 1.8769944906234741, "learning_rate": 1.0737010904425916e-06, "loss": 0.7456, "step": 65315 }, { "epoch": 0.7960708322669494, "grad_norm": 2.1958603858947754, "learning_rate": 1.0733803720333548e-06, "loss": 0.7951, "step": 65320 }, { "epoch": 0.7961317684910972, "grad_norm": 1.7681434154510498, "learning_rate": 1.073059653624118e-06, "loss": 0.8478, "step": 65325 }, { "epoch": 0.7961927047152451, "grad_norm": 1.9890930652618408, "learning_rate": 1.0727389352148815e-06, "loss": 0.7838, "step": 65330 }, { "epoch": 0.7962536409393929, "grad_norm": 2.181631565093994, "learning_rate": 1.0724182168056447e-06, "loss": 0.8029, "step": 65335 }, { "epoch": 0.7963145771635406, "grad_norm": 1.8519928455352783, "learning_rate": 1.072097498396408e-06, "loss": 0.8467, "step": 65340 }, { "epoch": 0.7963755133876884, "grad_norm": 2.1780452728271484, "learning_rate": 1.0717767799871714e-06, "loss": 0.7899, "step": 65345 }, { "epoch": 0.7964364496118362, "grad_norm": 2.0286202430725098, "learning_rate": 1.0714560615779348e-06, "loss": 0.8482, "step": 65350 }, { "epoch": 0.7964973858359841, "grad_norm": 2.1867904663085938, "learning_rate": 1.071135343168698e-06, "loss": 0.8246, "step": 65355 }, { "epoch": 0.7965583220601319, "grad_norm": 2.3486838340759277, "learning_rate": 1.0708146247594613e-06, "loss": 0.7941, "step": 65360 }, { "epoch": 0.7966192582842797, "grad_norm": 1.9235988855361938, "learning_rate": 1.0704939063502245e-06, "loss": 0.7846, "step": 65365 }, { "epoch": 0.7966801945084275, "grad_norm": 1.8719327449798584, "learning_rate": 1.070173187940988e-06, "loss": 0.8082, "step": 65370 }, { "epoch": 0.7967411307325752, "grad_norm": 1.737565517425537, "learning_rate": 1.0698524695317512e-06, "loss": 0.8645, "step": 65375 }, { "epoch": 0.7968020669567231, "grad_norm": 1.9017829895019531, "learning_rate": 1.0695317511225146e-06, "loss": 0.8172, "step": 65380 }, { "epoch": 0.7968630031808709, "grad_norm": 2.009699821472168, "learning_rate": 1.0692110327132778e-06, "loss": 0.7373, "step": 65385 }, { "epoch": 0.7969239394050187, "grad_norm": 2.398710012435913, "learning_rate": 1.0688903143040413e-06, "loss": 0.8628, "step": 65390 }, { "epoch": 0.7969848756291665, "grad_norm": 1.839179515838623, "learning_rate": 1.0685695958948045e-06, "loss": 0.8193, "step": 65395 }, { "epoch": 0.7970458118533144, "grad_norm": 2.9680869579315186, "learning_rate": 1.0682488774855677e-06, "loss": 0.812, "step": 65400 }, { "epoch": 0.7971067480774622, "grad_norm": 2.050097703933716, "learning_rate": 1.067928159076331e-06, "loss": 0.845, "step": 65405 }, { "epoch": 0.7971676843016099, "grad_norm": 2.0210440158843994, "learning_rate": 1.0676074406670944e-06, "loss": 0.7474, "step": 65410 }, { "epoch": 0.7972286205257577, "grad_norm": 2.339386463165283, "learning_rate": 1.0672867222578576e-06, "loss": 0.7477, "step": 65415 }, { "epoch": 0.7972895567499055, "grad_norm": 2.3157145977020264, "learning_rate": 1.066966003848621e-06, "loss": 0.8472, "step": 65420 }, { "epoch": 0.7973504929740534, "grad_norm": 1.9467811584472656, "learning_rate": 1.0666452854393843e-06, "loss": 0.8258, "step": 65425 }, { "epoch": 0.7974114291982012, "grad_norm": 2.3510282039642334, "learning_rate": 1.0663245670301477e-06, "loss": 0.781, "step": 65430 }, { "epoch": 0.797472365422349, "grad_norm": 1.8030016422271729, "learning_rate": 1.066003848620911e-06, "loss": 0.8367, "step": 65435 }, { "epoch": 0.7975333016464968, "grad_norm": 2.1410763263702393, "learning_rate": 1.0656831302116742e-06, "loss": 0.8948, "step": 65440 }, { "epoch": 0.7975942378706445, "grad_norm": 1.9408329725265503, "learning_rate": 1.0653624118024374e-06, "loss": 0.8273, "step": 65445 }, { "epoch": 0.7976551740947924, "grad_norm": 1.8344268798828125, "learning_rate": 1.0650416933932009e-06, "loss": 0.815, "step": 65450 }, { "epoch": 0.7977161103189402, "grad_norm": 1.7730823755264282, "learning_rate": 1.064720974983964e-06, "loss": 0.7507, "step": 65455 }, { "epoch": 0.797777046543088, "grad_norm": 2.3154919147491455, "learning_rate": 1.0644002565747275e-06, "loss": 0.8485, "step": 65460 }, { "epoch": 0.7978379827672358, "grad_norm": 2.2306699752807617, "learning_rate": 1.0640795381654908e-06, "loss": 0.8305, "step": 65465 }, { "epoch": 0.7978989189913837, "grad_norm": 2.173001766204834, "learning_rate": 1.0637588197562542e-06, "loss": 0.8749, "step": 65470 }, { "epoch": 0.7979598552155315, "grad_norm": 2.105652332305908, "learning_rate": 1.0634381013470174e-06, "loss": 0.8516, "step": 65475 }, { "epoch": 0.7980207914396792, "grad_norm": 2.0741467475891113, "learning_rate": 1.0631173829377807e-06, "loss": 0.8375, "step": 65480 }, { "epoch": 0.798081727663827, "grad_norm": 1.989606499671936, "learning_rate": 1.0627966645285439e-06, "loss": 0.8253, "step": 65485 }, { "epoch": 0.7981426638879748, "grad_norm": 2.140789747238159, "learning_rate": 1.0624759461193073e-06, "loss": 0.7938, "step": 65490 }, { "epoch": 0.7982036001121227, "grad_norm": 2.2310283184051514, "learning_rate": 1.0621552277100706e-06, "loss": 0.8666, "step": 65495 }, { "epoch": 0.7982645363362705, "grad_norm": 2.10847544670105, "learning_rate": 1.061834509300834e-06, "loss": 0.8563, "step": 65500 }, { "epoch": 0.7983254725604183, "grad_norm": 1.9283902645111084, "learning_rate": 1.0615137908915972e-06, "loss": 0.8076, "step": 65505 }, { "epoch": 0.7983864087845661, "grad_norm": 2.5944159030914307, "learning_rate": 1.0611930724823607e-06, "loss": 0.897, "step": 65510 }, { "epoch": 0.7984473450087138, "grad_norm": 1.8297269344329834, "learning_rate": 1.0608723540731239e-06, "loss": 0.8153, "step": 65515 }, { "epoch": 0.7985082812328617, "grad_norm": 2.228318452835083, "learning_rate": 1.0605516356638871e-06, "loss": 0.7619, "step": 65520 }, { "epoch": 0.7985692174570095, "grad_norm": 2.116946220397949, "learning_rate": 1.0602309172546503e-06, "loss": 0.7704, "step": 65525 }, { "epoch": 0.7986301536811573, "grad_norm": 1.8573323488235474, "learning_rate": 1.0599101988454138e-06, "loss": 0.7926, "step": 65530 }, { "epoch": 0.7986910899053051, "grad_norm": 2.157621383666992, "learning_rate": 1.0595894804361772e-06, "loss": 0.758, "step": 65535 }, { "epoch": 0.798752026129453, "grad_norm": 2.238208770751953, "learning_rate": 1.0592687620269405e-06, "loss": 0.7505, "step": 65540 }, { "epoch": 0.7988129623536008, "grad_norm": 1.9639132022857666, "learning_rate": 1.0589480436177037e-06, "loss": 0.8219, "step": 65545 }, { "epoch": 0.7988738985777485, "grad_norm": 1.8492568731307983, "learning_rate": 1.0586273252084671e-06, "loss": 0.8326, "step": 65550 }, { "epoch": 0.7989348348018963, "grad_norm": 1.7384854555130005, "learning_rate": 1.0583066067992304e-06, "loss": 0.8219, "step": 65555 }, { "epoch": 0.7989957710260441, "grad_norm": 2.0059707164764404, "learning_rate": 1.0579858883899936e-06, "loss": 0.7865, "step": 65560 }, { "epoch": 0.799056707250192, "grad_norm": 1.6855851411819458, "learning_rate": 1.057665169980757e-06, "loss": 0.7809, "step": 65565 }, { "epoch": 0.7991176434743398, "grad_norm": 2.0049870014190674, "learning_rate": 1.0573444515715203e-06, "loss": 0.7587, "step": 65570 }, { "epoch": 0.7991785796984876, "grad_norm": 2.204040765762329, "learning_rate": 1.0570237331622837e-06, "loss": 0.7605, "step": 65575 }, { "epoch": 0.7992395159226354, "grad_norm": 1.8642274141311646, "learning_rate": 1.056703014753047e-06, "loss": 0.7621, "step": 65580 }, { "epoch": 0.7993004521467831, "grad_norm": 1.953426480293274, "learning_rate": 1.0563822963438104e-06, "loss": 0.7825, "step": 65585 }, { "epoch": 0.799361388370931, "grad_norm": 2.3848443031311035, "learning_rate": 1.0560615779345736e-06, "loss": 0.8585, "step": 65590 }, { "epoch": 0.7994223245950788, "grad_norm": 1.8855916261672974, "learning_rate": 1.0557408595253368e-06, "loss": 0.7929, "step": 65595 }, { "epoch": 0.7994832608192266, "grad_norm": 2.0262677669525146, "learning_rate": 1.0554201411161e-06, "loss": 0.7896, "step": 65600 }, { "epoch": 0.7995441970433744, "grad_norm": 2.1470255851745605, "learning_rate": 1.0550994227068635e-06, "loss": 0.8324, "step": 65605 }, { "epoch": 0.7996051332675223, "grad_norm": 2.089081048965454, "learning_rate": 1.0547787042976267e-06, "loss": 0.8038, "step": 65610 }, { "epoch": 0.7996660694916701, "grad_norm": 1.703672170639038, "learning_rate": 1.0544579858883902e-06, "loss": 0.865, "step": 65615 }, { "epoch": 0.7997270057158178, "grad_norm": 1.6201164722442627, "learning_rate": 1.0541372674791534e-06, "loss": 0.853, "step": 65620 }, { "epoch": 0.7997879419399656, "grad_norm": 2.204580783843994, "learning_rate": 1.0538165490699168e-06, "loss": 0.8703, "step": 65625 }, { "epoch": 0.7998488781641134, "grad_norm": 2.0817651748657227, "learning_rate": 1.05349583066068e-06, "loss": 0.7601, "step": 65630 }, { "epoch": 0.7999098143882613, "grad_norm": 2.052399158477783, "learning_rate": 1.0531751122514433e-06, "loss": 0.7768, "step": 65635 }, { "epoch": 0.7999707506124091, "grad_norm": 1.791393756866455, "learning_rate": 1.0528543938422065e-06, "loss": 0.8284, "step": 65640 }, { "epoch": 0.8000316868365569, "grad_norm": 2.1377546787261963, "learning_rate": 1.05253367543297e-06, "loss": 0.8182, "step": 65645 }, { "epoch": 0.8000926230607047, "grad_norm": 2.067713737487793, "learning_rate": 1.0522129570237332e-06, "loss": 0.8523, "step": 65650 }, { "epoch": 0.8001535592848524, "grad_norm": 1.6750707626342773, "learning_rate": 1.0518922386144966e-06, "loss": 0.7582, "step": 65655 }, { "epoch": 0.8002144955090003, "grad_norm": 1.7922353744506836, "learning_rate": 1.0515715202052598e-06, "loss": 0.8145, "step": 65660 }, { "epoch": 0.8002754317331481, "grad_norm": 2.0088415145874023, "learning_rate": 1.0512508017960233e-06, "loss": 0.7913, "step": 65665 }, { "epoch": 0.8003363679572959, "grad_norm": 1.9794703722000122, "learning_rate": 1.0509300833867865e-06, "loss": 0.8061, "step": 65670 }, { "epoch": 0.8003973041814437, "grad_norm": 1.792435646057129, "learning_rate": 1.0506093649775497e-06, "loss": 0.824, "step": 65675 }, { "epoch": 0.8004582404055915, "grad_norm": 2.002610206604004, "learning_rate": 1.050288646568313e-06, "loss": 0.8399, "step": 65680 }, { "epoch": 0.8005191766297393, "grad_norm": 1.9165549278259277, "learning_rate": 1.0499679281590764e-06, "loss": 0.8249, "step": 65685 }, { "epoch": 0.8005801128538871, "grad_norm": 2.0294606685638428, "learning_rate": 1.0496472097498396e-06, "loss": 0.7834, "step": 65690 }, { "epoch": 0.8006410490780349, "grad_norm": 1.6429532766342163, "learning_rate": 1.049326491340603e-06, "loss": 0.8361, "step": 65695 }, { "epoch": 0.8007019853021827, "grad_norm": 1.8623058795928955, "learning_rate": 1.0490057729313663e-06, "loss": 0.7852, "step": 65700 }, { "epoch": 0.8007629215263306, "grad_norm": 1.9992014169692993, "learning_rate": 1.0486850545221298e-06, "loss": 0.8185, "step": 65705 }, { "epoch": 0.8008238577504784, "grad_norm": 2.0006659030914307, "learning_rate": 1.048364336112893e-06, "loss": 0.9184, "step": 65710 }, { "epoch": 0.8008847939746262, "grad_norm": 1.9299418926239014, "learning_rate": 1.0480436177036562e-06, "loss": 0.7821, "step": 65715 }, { "epoch": 0.8009457301987739, "grad_norm": 2.262476921081543, "learning_rate": 1.0477228992944194e-06, "loss": 0.8395, "step": 65720 }, { "epoch": 0.8010066664229217, "grad_norm": 1.9910054206848145, "learning_rate": 1.0474021808851829e-06, "loss": 0.7909, "step": 65725 }, { "epoch": 0.8010676026470696, "grad_norm": 1.9507828950881958, "learning_rate": 1.0470814624759463e-06, "loss": 0.7592, "step": 65730 }, { "epoch": 0.8011285388712174, "grad_norm": 1.9912958145141602, "learning_rate": 1.0467607440667095e-06, "loss": 0.8056, "step": 65735 }, { "epoch": 0.8011894750953652, "grad_norm": 1.7375110387802124, "learning_rate": 1.0464400256574728e-06, "loss": 0.842, "step": 65740 }, { "epoch": 0.801250411319513, "grad_norm": 1.7594594955444336, "learning_rate": 1.0461193072482362e-06, "loss": 0.788, "step": 65745 }, { "epoch": 0.8013113475436608, "grad_norm": 1.889571189880371, "learning_rate": 1.0457985888389994e-06, "loss": 0.8287, "step": 65750 }, { "epoch": 0.8013722837678086, "grad_norm": 2.009955883026123, "learning_rate": 1.0454778704297627e-06, "loss": 0.8328, "step": 65755 }, { "epoch": 0.8014332199919564, "grad_norm": 2.0828332901000977, "learning_rate": 1.0451571520205261e-06, "loss": 0.9019, "step": 65760 }, { "epoch": 0.8014941562161042, "grad_norm": 2.112114191055298, "learning_rate": 1.0448364336112893e-06, "loss": 0.8076, "step": 65765 }, { "epoch": 0.801555092440252, "grad_norm": 2.127445936203003, "learning_rate": 1.0445157152020528e-06, "loss": 0.8564, "step": 65770 }, { "epoch": 0.8016160286643998, "grad_norm": 1.6808503866195679, "learning_rate": 1.044194996792816e-06, "loss": 0.7642, "step": 65775 }, { "epoch": 0.8016769648885477, "grad_norm": 2.4428508281707764, "learning_rate": 1.0438742783835792e-06, "loss": 0.8149, "step": 65780 }, { "epoch": 0.8017379011126955, "grad_norm": 1.88777494430542, "learning_rate": 1.0435535599743427e-06, "loss": 0.7888, "step": 65785 }, { "epoch": 0.8017988373368432, "grad_norm": 2.3814685344696045, "learning_rate": 1.043232841565106e-06, "loss": 0.8305, "step": 65790 }, { "epoch": 0.801859773560991, "grad_norm": 1.9743311405181885, "learning_rate": 1.0429121231558691e-06, "loss": 0.8412, "step": 65795 }, { "epoch": 0.8019207097851389, "grad_norm": 1.7572548389434814, "learning_rate": 1.0425914047466326e-06, "loss": 0.8418, "step": 65800 }, { "epoch": 0.8019816460092867, "grad_norm": 1.698272466659546, "learning_rate": 1.0422706863373958e-06, "loss": 0.7923, "step": 65805 }, { "epoch": 0.8020425822334345, "grad_norm": 1.6646419763565063, "learning_rate": 1.0419499679281592e-06, "loss": 0.7599, "step": 65810 }, { "epoch": 0.8021035184575823, "grad_norm": 2.100647211074829, "learning_rate": 1.0416292495189225e-06, "loss": 0.8673, "step": 65815 }, { "epoch": 0.8021644546817301, "grad_norm": 1.773566722869873, "learning_rate": 1.0413085311096857e-06, "loss": 0.808, "step": 65820 }, { "epoch": 0.8022253909058779, "grad_norm": 2.592865467071533, "learning_rate": 1.0409878127004491e-06, "loss": 0.8125, "step": 65825 }, { "epoch": 0.8022863271300257, "grad_norm": 2.0977964401245117, "learning_rate": 1.0406670942912124e-06, "loss": 0.865, "step": 65830 }, { "epoch": 0.8023472633541735, "grad_norm": 2.2941057682037354, "learning_rate": 1.0403463758819756e-06, "loss": 0.7893, "step": 65835 }, { "epoch": 0.8024081995783213, "grad_norm": 1.969958782196045, "learning_rate": 1.040025657472739e-06, "loss": 0.8181, "step": 65840 }, { "epoch": 0.8024691358024691, "grad_norm": 1.8633543252944946, "learning_rate": 1.0397049390635023e-06, "loss": 0.8161, "step": 65845 }, { "epoch": 0.802530072026617, "grad_norm": 1.87224280834198, "learning_rate": 1.0393842206542657e-06, "loss": 0.8499, "step": 65850 }, { "epoch": 0.8025910082507648, "grad_norm": 1.737154245376587, "learning_rate": 1.039063502245029e-06, "loss": 0.8169, "step": 65855 }, { "epoch": 0.8026519444749125, "grad_norm": 1.9660292863845825, "learning_rate": 1.0387427838357924e-06, "loss": 0.8193, "step": 65860 }, { "epoch": 0.8027128806990603, "grad_norm": 1.9621856212615967, "learning_rate": 1.0384220654265556e-06, "loss": 0.7961, "step": 65865 }, { "epoch": 0.8027738169232081, "grad_norm": 1.9950898885726929, "learning_rate": 1.0381013470173188e-06, "loss": 0.8153, "step": 65870 }, { "epoch": 0.802834753147356, "grad_norm": 1.8696717023849487, "learning_rate": 1.037780628608082e-06, "loss": 0.76, "step": 65875 }, { "epoch": 0.8028956893715038, "grad_norm": 1.9494255781173706, "learning_rate": 1.0374599101988455e-06, "loss": 0.8001, "step": 65880 }, { "epoch": 0.8029566255956516, "grad_norm": 2.156691312789917, "learning_rate": 1.037139191789609e-06, "loss": 0.825, "step": 65885 }, { "epoch": 0.8030175618197994, "grad_norm": 1.9079389572143555, "learning_rate": 1.0368184733803722e-06, "loss": 0.7468, "step": 65890 }, { "epoch": 0.8030784980439472, "grad_norm": 1.9898606538772583, "learning_rate": 1.0364977549711354e-06, "loss": 0.8115, "step": 65895 }, { "epoch": 0.803139434268095, "grad_norm": 2.199742317199707, "learning_rate": 1.0361770365618988e-06, "loss": 0.7092, "step": 65900 }, { "epoch": 0.8032003704922428, "grad_norm": 2.086205244064331, "learning_rate": 1.035856318152662e-06, "loss": 0.6979, "step": 65905 }, { "epoch": 0.8032613067163906, "grad_norm": 1.7886284589767456, "learning_rate": 1.0355355997434253e-06, "loss": 0.8338, "step": 65910 }, { "epoch": 0.8033222429405384, "grad_norm": 1.9776191711425781, "learning_rate": 1.0352148813341885e-06, "loss": 0.8553, "step": 65915 }, { "epoch": 0.8033831791646863, "grad_norm": 1.9841480255126953, "learning_rate": 1.034894162924952e-06, "loss": 0.8015, "step": 65920 }, { "epoch": 0.8034441153888341, "grad_norm": 1.8816567659378052, "learning_rate": 1.0345734445157154e-06, "loss": 0.8439, "step": 65925 }, { "epoch": 0.8035050516129818, "grad_norm": 2.0916614532470703, "learning_rate": 1.0342527261064786e-06, "loss": 0.7536, "step": 65930 }, { "epoch": 0.8035659878371296, "grad_norm": 2.326253652572632, "learning_rate": 1.0339320076972419e-06, "loss": 0.8461, "step": 65935 }, { "epoch": 0.8036269240612774, "grad_norm": 2.0784683227539062, "learning_rate": 1.0336112892880053e-06, "loss": 0.8593, "step": 65940 }, { "epoch": 0.8036878602854253, "grad_norm": 1.9169598817825317, "learning_rate": 1.0332905708787685e-06, "loss": 0.7756, "step": 65945 }, { "epoch": 0.8037487965095731, "grad_norm": 2.037353277206421, "learning_rate": 1.0329698524695318e-06, "loss": 0.8814, "step": 65950 }, { "epoch": 0.8038097327337209, "grad_norm": 1.765710711479187, "learning_rate": 1.0326491340602952e-06, "loss": 0.8249, "step": 65955 }, { "epoch": 0.8038706689578687, "grad_norm": 2.0748095512390137, "learning_rate": 1.0323284156510584e-06, "loss": 0.8916, "step": 65960 }, { "epoch": 0.8039316051820165, "grad_norm": 1.9499540328979492, "learning_rate": 1.0320076972418219e-06, "loss": 0.795, "step": 65965 }, { "epoch": 0.8039925414061643, "grad_norm": 2.244415283203125, "learning_rate": 1.031686978832585e-06, "loss": 0.8329, "step": 65970 }, { "epoch": 0.8040534776303121, "grad_norm": 1.8478494882583618, "learning_rate": 1.0313662604233483e-06, "loss": 0.8026, "step": 65975 }, { "epoch": 0.8041144138544599, "grad_norm": 2.1156930923461914, "learning_rate": 1.0310455420141118e-06, "loss": 0.7104, "step": 65980 }, { "epoch": 0.8041753500786077, "grad_norm": 2.3697588443756104, "learning_rate": 1.030724823604875e-06, "loss": 0.831, "step": 65985 }, { "epoch": 0.8042362863027556, "grad_norm": 1.8937724828720093, "learning_rate": 1.0304041051956382e-06, "loss": 0.8075, "step": 65990 }, { "epoch": 0.8042972225269034, "grad_norm": 2.176880121231079, "learning_rate": 1.0300833867864017e-06, "loss": 0.7793, "step": 65995 }, { "epoch": 0.8043581587510511, "grad_norm": 1.6329301595687866, "learning_rate": 1.0297626683771649e-06, "loss": 0.8097, "step": 66000 }, { "epoch": 0.8044190949751989, "grad_norm": 2.3357791900634766, "learning_rate": 1.0294419499679283e-06, "loss": 0.7486, "step": 66005 }, { "epoch": 0.8044800311993467, "grad_norm": 1.9296514987945557, "learning_rate": 1.0291212315586916e-06, "loss": 0.8401, "step": 66010 }, { "epoch": 0.8045409674234946, "grad_norm": 1.8863773345947266, "learning_rate": 1.0288005131494548e-06, "loss": 0.8138, "step": 66015 }, { "epoch": 0.8046019036476424, "grad_norm": 1.824453353881836, "learning_rate": 1.0284797947402182e-06, "loss": 0.7874, "step": 66020 }, { "epoch": 0.8046628398717902, "grad_norm": 2.1368372440338135, "learning_rate": 1.0281590763309815e-06, "loss": 0.8557, "step": 66025 }, { "epoch": 0.804723776095938, "grad_norm": 2.0194613933563232, "learning_rate": 1.0278383579217447e-06, "loss": 0.8232, "step": 66030 }, { "epoch": 0.8047847123200857, "grad_norm": 1.8744423389434814, "learning_rate": 1.0275176395125081e-06, "loss": 0.8668, "step": 66035 }, { "epoch": 0.8048456485442336, "grad_norm": 1.8217828273773193, "learning_rate": 1.0271969211032714e-06, "loss": 0.8409, "step": 66040 }, { "epoch": 0.8049065847683814, "grad_norm": 1.868202567100525, "learning_rate": 1.0268762026940348e-06, "loss": 0.799, "step": 66045 }, { "epoch": 0.8049675209925292, "grad_norm": 2.0845725536346436, "learning_rate": 1.026555484284798e-06, "loss": 0.8376, "step": 66050 }, { "epoch": 0.805028457216677, "grad_norm": 2.076427459716797, "learning_rate": 1.0262347658755613e-06, "loss": 0.8448, "step": 66055 }, { "epoch": 0.8050893934408249, "grad_norm": 2.045822858810425, "learning_rate": 1.0259140474663247e-06, "loss": 0.7549, "step": 66060 }, { "epoch": 0.8051503296649727, "grad_norm": 1.9947144985198975, "learning_rate": 1.025593329057088e-06, "loss": 0.8099, "step": 66065 }, { "epoch": 0.8052112658891204, "grad_norm": 2.1099517345428467, "learning_rate": 1.0252726106478512e-06, "loss": 0.7793, "step": 66070 }, { "epoch": 0.8052722021132682, "grad_norm": 1.7877156734466553, "learning_rate": 1.0249518922386146e-06, "loss": 0.8092, "step": 66075 }, { "epoch": 0.805333138337416, "grad_norm": 1.7976558208465576, "learning_rate": 1.024631173829378e-06, "loss": 0.8289, "step": 66080 }, { "epoch": 0.8053940745615639, "grad_norm": 2.29728364944458, "learning_rate": 1.0243104554201413e-06, "loss": 0.7942, "step": 66085 }, { "epoch": 0.8054550107857117, "grad_norm": 2.125394105911255, "learning_rate": 1.0239897370109045e-06, "loss": 0.8212, "step": 66090 }, { "epoch": 0.8055159470098595, "grad_norm": 2.2033095359802246, "learning_rate": 1.0236690186016677e-06, "loss": 0.8286, "step": 66095 }, { "epoch": 0.8055768832340073, "grad_norm": 1.7837916612625122, "learning_rate": 1.0233483001924312e-06, "loss": 0.8127, "step": 66100 }, { "epoch": 0.805637819458155, "grad_norm": 1.7482542991638184, "learning_rate": 1.0230275817831944e-06, "loss": 0.8148, "step": 66105 }, { "epoch": 0.8056987556823029, "grad_norm": 2.0334999561309814, "learning_rate": 1.0227068633739576e-06, "loss": 0.7709, "step": 66110 }, { "epoch": 0.8057596919064507, "grad_norm": 2.158609390258789, "learning_rate": 1.022386144964721e-06, "loss": 0.8742, "step": 66115 }, { "epoch": 0.8058206281305985, "grad_norm": 2.1416893005371094, "learning_rate": 1.0220654265554845e-06, "loss": 0.7864, "step": 66120 }, { "epoch": 0.8058815643547463, "grad_norm": 2.4476280212402344, "learning_rate": 1.0217447081462477e-06, "loss": 0.8828, "step": 66125 }, { "epoch": 0.8059425005788942, "grad_norm": 2.0750789642333984, "learning_rate": 1.021423989737011e-06, "loss": 0.83, "step": 66130 }, { "epoch": 0.806003436803042, "grad_norm": 1.6560453176498413, "learning_rate": 1.0211032713277742e-06, "loss": 0.8178, "step": 66135 }, { "epoch": 0.8060643730271897, "grad_norm": 1.7723560333251953, "learning_rate": 1.0207825529185376e-06, "loss": 0.7939, "step": 66140 }, { "epoch": 0.8061253092513375, "grad_norm": 2.4538609981536865, "learning_rate": 1.0204618345093008e-06, "loss": 0.7878, "step": 66145 }, { "epoch": 0.8061862454754853, "grad_norm": 1.7930792570114136, "learning_rate": 1.0201411161000643e-06, "loss": 0.8297, "step": 66150 }, { "epoch": 0.8062471816996332, "grad_norm": 2.021484851837158, "learning_rate": 1.0198203976908275e-06, "loss": 0.7966, "step": 66155 }, { "epoch": 0.806308117923781, "grad_norm": 1.8732423782348633, "learning_rate": 1.019499679281591e-06, "loss": 0.795, "step": 66160 }, { "epoch": 0.8063690541479288, "grad_norm": 2.0649304389953613, "learning_rate": 1.0191789608723542e-06, "loss": 0.8126, "step": 66165 }, { "epoch": 0.8064299903720766, "grad_norm": 1.9220939874649048, "learning_rate": 1.0188582424631174e-06, "loss": 0.8004, "step": 66170 }, { "epoch": 0.8064909265962243, "grad_norm": 2.368198871612549, "learning_rate": 1.0185375240538809e-06, "loss": 0.8185, "step": 66175 }, { "epoch": 0.8065518628203722, "grad_norm": 2.292858839035034, "learning_rate": 1.018216805644644e-06, "loss": 0.7666, "step": 66180 }, { "epoch": 0.80661279904452, "grad_norm": 2.090322971343994, "learning_rate": 1.0178960872354073e-06, "loss": 0.8184, "step": 66185 }, { "epoch": 0.8066737352686678, "grad_norm": 1.741708517074585, "learning_rate": 1.0175753688261708e-06, "loss": 0.7866, "step": 66190 }, { "epoch": 0.8067346714928156, "grad_norm": 2.3952181339263916, "learning_rate": 1.017254650416934e-06, "loss": 0.794, "step": 66195 }, { "epoch": 0.8067956077169635, "grad_norm": 1.6473584175109863, "learning_rate": 1.0169339320076974e-06, "loss": 0.8159, "step": 66200 }, { "epoch": 0.8068565439411113, "grad_norm": 2.3411307334899902, "learning_rate": 1.0166132135984607e-06, "loss": 0.787, "step": 66205 }, { "epoch": 0.806917480165259, "grad_norm": 2.0496091842651367, "learning_rate": 1.0162924951892239e-06, "loss": 0.8263, "step": 66210 }, { "epoch": 0.8069784163894068, "grad_norm": 1.9419046640396118, "learning_rate": 1.0159717767799873e-06, "loss": 0.8184, "step": 66215 }, { "epoch": 0.8070393526135546, "grad_norm": 1.9310277700424194, "learning_rate": 1.0156510583707505e-06, "loss": 0.8337, "step": 66220 }, { "epoch": 0.8071002888377025, "grad_norm": 1.8491935729980469, "learning_rate": 1.0153303399615138e-06, "loss": 0.7735, "step": 66225 }, { "epoch": 0.8071612250618503, "grad_norm": 2.033947467803955, "learning_rate": 1.0150096215522772e-06, "loss": 0.8703, "step": 66230 }, { "epoch": 0.8072221612859981, "grad_norm": 1.884150505065918, "learning_rate": 1.0146889031430407e-06, "loss": 0.9138, "step": 66235 }, { "epoch": 0.8072830975101459, "grad_norm": 1.7835237979888916, "learning_rate": 1.0143681847338039e-06, "loss": 0.7898, "step": 66240 }, { "epoch": 0.8073440337342936, "grad_norm": 2.0337460041046143, "learning_rate": 1.0140474663245671e-06, "loss": 0.8324, "step": 66245 }, { "epoch": 0.8074049699584415, "grad_norm": 1.8233965635299683, "learning_rate": 1.0137267479153303e-06, "loss": 0.8321, "step": 66250 }, { "epoch": 0.8074659061825893, "grad_norm": 1.8095877170562744, "learning_rate": 1.0134060295060938e-06, "loss": 0.8959, "step": 66255 }, { "epoch": 0.8075268424067371, "grad_norm": 1.9456838369369507, "learning_rate": 1.013085311096857e-06, "loss": 0.8124, "step": 66260 }, { "epoch": 0.8075877786308849, "grad_norm": 2.0546231269836426, "learning_rate": 1.0127645926876202e-06, "loss": 0.8493, "step": 66265 }, { "epoch": 0.8076487148550328, "grad_norm": 2.437394142150879, "learning_rate": 1.0124438742783837e-06, "loss": 0.829, "step": 66270 }, { "epoch": 0.8077096510791806, "grad_norm": 1.7037575244903564, "learning_rate": 1.0121231558691471e-06, "loss": 0.7804, "step": 66275 }, { "epoch": 0.8077705873033283, "grad_norm": 1.7529895305633545, "learning_rate": 1.0118024374599103e-06, "loss": 0.8659, "step": 66280 }, { "epoch": 0.8078315235274761, "grad_norm": 1.8693434000015259, "learning_rate": 1.0114817190506736e-06, "loss": 0.8115, "step": 66285 }, { "epoch": 0.8078924597516239, "grad_norm": 1.8836795091629028, "learning_rate": 1.0111610006414368e-06, "loss": 0.8939, "step": 66290 }, { "epoch": 0.8079533959757718, "grad_norm": 2.16072416305542, "learning_rate": 1.0108402822322002e-06, "loss": 0.8156, "step": 66295 }, { "epoch": 0.8080143321999196, "grad_norm": 1.8602728843688965, "learning_rate": 1.0105195638229635e-06, "loss": 0.8665, "step": 66300 }, { "epoch": 0.8080752684240674, "grad_norm": 1.7286376953125, "learning_rate": 1.010198845413727e-06, "loss": 0.8195, "step": 66305 }, { "epoch": 0.8081362046482152, "grad_norm": 1.9418160915374756, "learning_rate": 1.0098781270044901e-06, "loss": 0.7583, "step": 66310 }, { "epoch": 0.8081971408723629, "grad_norm": 1.681630253791809, "learning_rate": 1.0095574085952536e-06, "loss": 0.8149, "step": 66315 }, { "epoch": 0.8082580770965108, "grad_norm": 1.8099942207336426, "learning_rate": 1.0092366901860168e-06, "loss": 0.8141, "step": 66320 }, { "epoch": 0.8083190133206586, "grad_norm": 1.6269891262054443, "learning_rate": 1.00891597177678e-06, "loss": 0.7907, "step": 66325 }, { "epoch": 0.8083799495448064, "grad_norm": 1.998368740081787, "learning_rate": 1.0085952533675433e-06, "loss": 0.797, "step": 66330 }, { "epoch": 0.8084408857689542, "grad_norm": 1.8915177583694458, "learning_rate": 1.0082745349583067e-06, "loss": 0.832, "step": 66335 }, { "epoch": 0.808501821993102, "grad_norm": 1.9240045547485352, "learning_rate": 1.00795381654907e-06, "loss": 0.8003, "step": 66340 }, { "epoch": 0.8085627582172499, "grad_norm": 2.2381691932678223, "learning_rate": 1.0076330981398334e-06, "loss": 0.903, "step": 66345 }, { "epoch": 0.8086236944413976, "grad_norm": 1.9115757942199707, "learning_rate": 1.0073123797305966e-06, "loss": 0.7294, "step": 66350 }, { "epoch": 0.8086846306655454, "grad_norm": 2.001960277557373, "learning_rate": 1.00699166132136e-06, "loss": 0.8855, "step": 66355 }, { "epoch": 0.8087455668896932, "grad_norm": 2.4993906021118164, "learning_rate": 1.0066709429121233e-06, "loss": 0.8219, "step": 66360 }, { "epoch": 0.8088065031138411, "grad_norm": 1.617967128753662, "learning_rate": 1.0063502245028865e-06, "loss": 0.8804, "step": 66365 }, { "epoch": 0.8088674393379889, "grad_norm": 2.564822196960449, "learning_rate": 1.0060295060936497e-06, "loss": 0.7955, "step": 66370 }, { "epoch": 0.8089283755621367, "grad_norm": 2.1851677894592285, "learning_rate": 1.0057087876844132e-06, "loss": 0.7717, "step": 66375 }, { "epoch": 0.8089893117862845, "grad_norm": 1.9823988676071167, "learning_rate": 1.0053880692751764e-06, "loss": 0.797, "step": 66380 }, { "epoch": 0.8090502480104322, "grad_norm": 1.958296537399292, "learning_rate": 1.0050673508659398e-06, "loss": 0.854, "step": 66385 }, { "epoch": 0.8091111842345801, "grad_norm": 2.12439227104187, "learning_rate": 1.004746632456703e-06, "loss": 0.7964, "step": 66390 }, { "epoch": 0.8091721204587279, "grad_norm": 1.777376413345337, "learning_rate": 1.0044259140474665e-06, "loss": 0.8684, "step": 66395 }, { "epoch": 0.8092330566828757, "grad_norm": 2.2617404460906982, "learning_rate": 1.0041051956382297e-06, "loss": 0.8254, "step": 66400 }, { "epoch": 0.8092939929070235, "grad_norm": 2.0038137435913086, "learning_rate": 1.003784477228993e-06, "loss": 0.8049, "step": 66405 }, { "epoch": 0.8093549291311714, "grad_norm": 1.7620409727096558, "learning_rate": 1.0034637588197562e-06, "loss": 0.7429, "step": 66410 }, { "epoch": 0.8094158653553192, "grad_norm": 1.926535964012146, "learning_rate": 1.0031430404105196e-06, "loss": 0.7625, "step": 66415 }, { "epoch": 0.8094768015794669, "grad_norm": 2.4705846309661865, "learning_rate": 1.0028223220012829e-06, "loss": 0.8035, "step": 66420 }, { "epoch": 0.8095377378036147, "grad_norm": 2.2215704917907715, "learning_rate": 1.0025016035920463e-06, "loss": 0.8151, "step": 66425 }, { "epoch": 0.8095986740277625, "grad_norm": 1.9530384540557861, "learning_rate": 1.0021808851828095e-06, "loss": 0.8005, "step": 66430 }, { "epoch": 0.8096596102519104, "grad_norm": 2.1167874336242676, "learning_rate": 1.001860166773573e-06, "loss": 0.7749, "step": 66435 }, { "epoch": 0.8097205464760582, "grad_norm": 2.1573643684387207, "learning_rate": 1.0015394483643362e-06, "loss": 0.7161, "step": 66440 }, { "epoch": 0.809781482700206, "grad_norm": 1.8537009954452515, "learning_rate": 1.0012187299550994e-06, "loss": 0.875, "step": 66445 }, { "epoch": 0.8098424189243538, "grad_norm": 2.21187162399292, "learning_rate": 1.0008980115458627e-06, "loss": 0.826, "step": 66450 }, { "epoch": 0.8099033551485015, "grad_norm": 1.9248807430267334, "learning_rate": 1.000577293136626e-06, "loss": 0.7355, "step": 66455 }, { "epoch": 0.8099642913726494, "grad_norm": 1.9115828275680542, "learning_rate": 1.0002565747273893e-06, "loss": 0.8759, "step": 66460 }, { "epoch": 0.8100252275967972, "grad_norm": 2.0249879360198975, "learning_rate": 9.999358563181528e-07, "loss": 0.827, "step": 66465 }, { "epoch": 0.810086163820945, "grad_norm": 2.0954387187957764, "learning_rate": 9.996151379089162e-07, "loss": 0.8477, "step": 66470 }, { "epoch": 0.8101471000450928, "grad_norm": 2.3753623962402344, "learning_rate": 9.992944194996794e-07, "loss": 0.8313, "step": 66475 }, { "epoch": 0.8102080362692407, "grad_norm": 1.901447057723999, "learning_rate": 9.989737010904427e-07, "loss": 0.8405, "step": 66480 }, { "epoch": 0.8102689724933885, "grad_norm": 2.2871146202087402, "learning_rate": 9.98652982681206e-07, "loss": 0.7824, "step": 66485 }, { "epoch": 0.8103299087175362, "grad_norm": 1.7594587802886963, "learning_rate": 9.983322642719693e-07, "loss": 0.8204, "step": 66490 }, { "epoch": 0.810390844941684, "grad_norm": 1.785463571548462, "learning_rate": 9.980115458627326e-07, "loss": 0.7707, "step": 66495 }, { "epoch": 0.8104517811658318, "grad_norm": 1.9891438484191895, "learning_rate": 9.97690827453496e-07, "loss": 0.8098, "step": 66500 }, { "epoch": 0.8105127173899797, "grad_norm": 2.015763759613037, "learning_rate": 9.973701090442592e-07, "loss": 0.8686, "step": 66505 }, { "epoch": 0.8105736536141275, "grad_norm": 2.3346996307373047, "learning_rate": 9.970493906350227e-07, "loss": 0.794, "step": 66510 }, { "epoch": 0.8106345898382753, "grad_norm": 2.089991569519043, "learning_rate": 9.96728672225786e-07, "loss": 0.7751, "step": 66515 }, { "epoch": 0.8106955260624231, "grad_norm": 1.8740453720092773, "learning_rate": 9.964079538165491e-07, "loss": 0.8006, "step": 66520 }, { "epoch": 0.8107564622865708, "grad_norm": 2.0068178176879883, "learning_rate": 9.960872354073124e-07, "loss": 0.7918, "step": 66525 }, { "epoch": 0.8108173985107187, "grad_norm": 2.203824281692505, "learning_rate": 9.957665169980758e-07, "loss": 0.8365, "step": 66530 }, { "epoch": 0.8108783347348665, "grad_norm": 2.0652012825012207, "learning_rate": 9.95445798588839e-07, "loss": 0.7393, "step": 66535 }, { "epoch": 0.8109392709590143, "grad_norm": 1.8009514808654785, "learning_rate": 9.951250801796025e-07, "loss": 0.8252, "step": 66540 }, { "epoch": 0.8110002071831621, "grad_norm": 1.6866366863250732, "learning_rate": 9.948043617703657e-07, "loss": 0.8848, "step": 66545 }, { "epoch": 0.81106114340731, "grad_norm": 2.3735435009002686, "learning_rate": 9.944836433611291e-07, "loss": 0.812, "step": 66550 }, { "epoch": 0.8111220796314578, "grad_norm": 1.5860992670059204, "learning_rate": 9.941629249518924e-07, "loss": 0.7616, "step": 66555 }, { "epoch": 0.8111830158556055, "grad_norm": 2.0914769172668457, "learning_rate": 9.938422065426556e-07, "loss": 0.8092, "step": 66560 }, { "epoch": 0.8112439520797533, "grad_norm": 1.83171546459198, "learning_rate": 9.935214881334188e-07, "loss": 0.7899, "step": 66565 }, { "epoch": 0.8113048883039011, "grad_norm": 1.9084669351577759, "learning_rate": 9.932007697241823e-07, "loss": 0.7921, "step": 66570 }, { "epoch": 0.811365824528049, "grad_norm": 2.259861946105957, "learning_rate": 9.928800513149455e-07, "loss": 0.8553, "step": 66575 }, { "epoch": 0.8114267607521968, "grad_norm": 1.888387680053711, "learning_rate": 9.92559332905709e-07, "loss": 0.7763, "step": 66580 }, { "epoch": 0.8114876969763446, "grad_norm": 1.6871824264526367, "learning_rate": 9.922386144964722e-07, "loss": 0.7834, "step": 66585 }, { "epoch": 0.8115486332004924, "grad_norm": 1.9905678033828735, "learning_rate": 9.919178960872356e-07, "loss": 0.7931, "step": 66590 }, { "epoch": 0.8116095694246401, "grad_norm": 1.9608594179153442, "learning_rate": 9.915971776779988e-07, "loss": 0.8724, "step": 66595 }, { "epoch": 0.811670505648788, "grad_norm": 1.7099615335464478, "learning_rate": 9.91276459268762e-07, "loss": 0.8066, "step": 66600 }, { "epoch": 0.8117314418729358, "grad_norm": 2.3727433681488037, "learning_rate": 9.909557408595253e-07, "loss": 0.7834, "step": 66605 }, { "epoch": 0.8117923780970836, "grad_norm": 1.8799009323120117, "learning_rate": 9.906350224502887e-07, "loss": 0.8202, "step": 66610 }, { "epoch": 0.8118533143212314, "grad_norm": 1.8414461612701416, "learning_rate": 9.90314304041052e-07, "loss": 0.8365, "step": 66615 }, { "epoch": 0.8119142505453792, "grad_norm": 1.6549277305603027, "learning_rate": 9.899935856318154e-07, "loss": 0.7975, "step": 66620 }, { "epoch": 0.811975186769527, "grad_norm": 2.3965461254119873, "learning_rate": 9.896728672225786e-07, "loss": 0.8371, "step": 66625 }, { "epoch": 0.8120361229936748, "grad_norm": 1.8846811056137085, "learning_rate": 9.89352148813342e-07, "loss": 0.7233, "step": 66630 }, { "epoch": 0.8120970592178226, "grad_norm": 1.8999439477920532, "learning_rate": 9.890314304041053e-07, "loss": 0.7968, "step": 66635 }, { "epoch": 0.8121579954419704, "grad_norm": 1.9028178453445435, "learning_rate": 9.887107119948685e-07, "loss": 0.834, "step": 66640 }, { "epoch": 0.8122189316661182, "grad_norm": 1.9939961433410645, "learning_rate": 9.883899935856317e-07, "loss": 0.8306, "step": 66645 }, { "epoch": 0.8122798678902661, "grad_norm": 1.7994403839111328, "learning_rate": 9.880692751763952e-07, "loss": 0.7967, "step": 66650 }, { "epoch": 0.8123408041144139, "grad_norm": 2.038689374923706, "learning_rate": 9.877485567671586e-07, "loss": 0.8283, "step": 66655 }, { "epoch": 0.8124017403385616, "grad_norm": 1.8802084922790527, "learning_rate": 9.874278383579219e-07, "loss": 0.8358, "step": 66660 }, { "epoch": 0.8124626765627094, "grad_norm": 1.9846752882003784, "learning_rate": 9.87107119948685e-07, "loss": 0.7997, "step": 66665 }, { "epoch": 0.8125236127868573, "grad_norm": 1.8560495376586914, "learning_rate": 9.867864015394485e-07, "loss": 0.7562, "step": 66670 }, { "epoch": 0.8125845490110051, "grad_norm": 1.8832917213439941, "learning_rate": 9.864656831302118e-07, "loss": 0.7723, "step": 66675 }, { "epoch": 0.8126454852351529, "grad_norm": 1.9105323553085327, "learning_rate": 9.86144964720975e-07, "loss": 0.8151, "step": 66680 }, { "epoch": 0.8127064214593007, "grad_norm": 1.8010873794555664, "learning_rate": 9.858242463117382e-07, "loss": 0.7852, "step": 66685 }, { "epoch": 0.8127673576834485, "grad_norm": 1.8203572034835815, "learning_rate": 9.855035279025017e-07, "loss": 0.8307, "step": 66690 }, { "epoch": 0.8128282939075963, "grad_norm": 1.986550211906433, "learning_rate": 9.85182809493265e-07, "loss": 0.8156, "step": 66695 }, { "epoch": 0.8128892301317441, "grad_norm": 1.9838032722473145, "learning_rate": 9.848620910840283e-07, "loss": 0.7516, "step": 66700 }, { "epoch": 0.8129501663558919, "grad_norm": 2.0181760787963867, "learning_rate": 9.845413726747916e-07, "loss": 0.7809, "step": 66705 }, { "epoch": 0.8130111025800397, "grad_norm": 1.7878800630569458, "learning_rate": 9.84220654265555e-07, "loss": 0.7759, "step": 66710 }, { "epoch": 0.8130720388041875, "grad_norm": 1.7651513814926147, "learning_rate": 9.838999358563182e-07, "loss": 0.7958, "step": 66715 }, { "epoch": 0.8131329750283354, "grad_norm": 1.5594645738601685, "learning_rate": 9.835792174470814e-07, "loss": 0.7859, "step": 66720 }, { "epoch": 0.8131939112524832, "grad_norm": 1.7818397283554077, "learning_rate": 9.832584990378449e-07, "loss": 0.7646, "step": 66725 }, { "epoch": 0.8132548474766309, "grad_norm": 1.6319233179092407, "learning_rate": 9.829377806286081e-07, "loss": 0.7676, "step": 66730 }, { "epoch": 0.8133157837007787, "grad_norm": 2.0400454998016357, "learning_rate": 9.826170622193716e-07, "loss": 0.8559, "step": 66735 }, { "epoch": 0.8133767199249266, "grad_norm": 1.7740226984024048, "learning_rate": 9.822963438101348e-07, "loss": 0.7656, "step": 66740 }, { "epoch": 0.8134376561490744, "grad_norm": 1.7627054452896118, "learning_rate": 9.81975625400898e-07, "loss": 0.8231, "step": 66745 }, { "epoch": 0.8134985923732222, "grad_norm": 1.986975073814392, "learning_rate": 9.816549069916615e-07, "loss": 0.7463, "step": 66750 }, { "epoch": 0.81355952859737, "grad_norm": 2.0432047843933105, "learning_rate": 9.813341885824247e-07, "loss": 0.8251, "step": 66755 }, { "epoch": 0.8136204648215178, "grad_norm": 2.008967399597168, "learning_rate": 9.81013470173188e-07, "loss": 0.7941, "step": 66760 }, { "epoch": 0.8136814010456656, "grad_norm": 1.9394704103469849, "learning_rate": 9.806927517639514e-07, "loss": 0.7992, "step": 66765 }, { "epoch": 0.8137423372698134, "grad_norm": 2.091226577758789, "learning_rate": 9.803720333547146e-07, "loss": 0.8544, "step": 66770 }, { "epoch": 0.8138032734939612, "grad_norm": 1.9153004884719849, "learning_rate": 9.80051314945478e-07, "loss": 0.7482, "step": 66775 }, { "epoch": 0.813864209718109, "grad_norm": 2.1196062564849854, "learning_rate": 9.797305965362412e-07, "loss": 0.753, "step": 66780 }, { "epoch": 0.8139251459422568, "grad_norm": 1.8346128463745117, "learning_rate": 9.794098781270047e-07, "loss": 0.8002, "step": 66785 }, { "epoch": 0.8139860821664047, "grad_norm": 1.7572033405303955, "learning_rate": 9.79089159717768e-07, "loss": 0.8288, "step": 66790 }, { "epoch": 0.8140470183905525, "grad_norm": 1.7024924755096436, "learning_rate": 9.787684413085311e-07, "loss": 0.8481, "step": 66795 }, { "epoch": 0.8141079546147002, "grad_norm": 1.6387525796890259, "learning_rate": 9.784477228992944e-07, "loss": 0.7574, "step": 66800 }, { "epoch": 0.814168890838848, "grad_norm": 1.7277809381484985, "learning_rate": 9.781270044900578e-07, "loss": 0.747, "step": 66805 }, { "epoch": 0.8142298270629958, "grad_norm": 2.0641722679138184, "learning_rate": 9.77806286080821e-07, "loss": 0.8603, "step": 66810 }, { "epoch": 0.8142907632871437, "grad_norm": 2.0329766273498535, "learning_rate": 9.774855676715845e-07, "loss": 0.8314, "step": 66815 }, { "epoch": 0.8143516995112915, "grad_norm": 2.086839199066162, "learning_rate": 9.771648492623477e-07, "loss": 0.7993, "step": 66820 }, { "epoch": 0.8144126357354393, "grad_norm": 1.7664984464645386, "learning_rate": 9.768441308531112e-07, "loss": 0.7891, "step": 66825 }, { "epoch": 0.8144735719595871, "grad_norm": 2.047645092010498, "learning_rate": 9.765234124438744e-07, "loss": 0.8232, "step": 66830 }, { "epoch": 0.8145345081837349, "grad_norm": 2.144758701324463, "learning_rate": 9.762026940346376e-07, "loss": 0.7757, "step": 66835 }, { "epoch": 0.8145954444078827, "grad_norm": 2.3325605392456055, "learning_rate": 9.758819756254008e-07, "loss": 0.8068, "step": 66840 }, { "epoch": 0.8146563806320305, "grad_norm": 1.9708834886550903, "learning_rate": 9.755612572161643e-07, "loss": 0.7781, "step": 66845 }, { "epoch": 0.8147173168561783, "grad_norm": 1.8452750444412231, "learning_rate": 9.752405388069277e-07, "loss": 0.8183, "step": 66850 }, { "epoch": 0.8147782530803261, "grad_norm": 1.7823903560638428, "learning_rate": 9.74919820397691e-07, "loss": 0.7264, "step": 66855 }, { "epoch": 0.814839189304474, "grad_norm": 1.8408995866775513, "learning_rate": 9.745991019884542e-07, "loss": 0.8574, "step": 66860 }, { "epoch": 0.8149001255286218, "grad_norm": 1.8055740594863892, "learning_rate": 9.742783835792176e-07, "loss": 0.8236, "step": 66865 }, { "epoch": 0.8149610617527695, "grad_norm": 1.9120937585830688, "learning_rate": 9.739576651699808e-07, "loss": 0.7579, "step": 66870 }, { "epoch": 0.8150219979769173, "grad_norm": 1.9799093008041382, "learning_rate": 9.73636946760744e-07, "loss": 0.807, "step": 66875 }, { "epoch": 0.8150829342010651, "grad_norm": 1.900396466255188, "learning_rate": 9.733162283515073e-07, "loss": 0.8424, "step": 66880 }, { "epoch": 0.815143870425213, "grad_norm": 1.781091570854187, "learning_rate": 9.729955099422707e-07, "loss": 0.7643, "step": 66885 }, { "epoch": 0.8152048066493608, "grad_norm": 1.6279840469360352, "learning_rate": 9.726747915330342e-07, "loss": 0.8329, "step": 66890 }, { "epoch": 0.8152657428735086, "grad_norm": 1.8523956537246704, "learning_rate": 9.723540731237974e-07, "loss": 0.7809, "step": 66895 }, { "epoch": 0.8153266790976564, "grad_norm": 2.1940224170684814, "learning_rate": 9.720333547145606e-07, "loss": 0.8037, "step": 66900 }, { "epoch": 0.8153876153218041, "grad_norm": 1.7837765216827393, "learning_rate": 9.71712636305324e-07, "loss": 0.7991, "step": 66905 }, { "epoch": 0.815448551545952, "grad_norm": 2.329277515411377, "learning_rate": 9.713919178960873e-07, "loss": 0.8265, "step": 66910 }, { "epoch": 0.8155094877700998, "grad_norm": 2.0528664588928223, "learning_rate": 9.710711994868505e-07, "loss": 0.7925, "step": 66915 }, { "epoch": 0.8155704239942476, "grad_norm": 2.066293954849243, "learning_rate": 9.70750481077614e-07, "loss": 0.8141, "step": 66920 }, { "epoch": 0.8156313602183954, "grad_norm": 2.0893101692199707, "learning_rate": 9.704297626683772e-07, "loss": 0.8324, "step": 66925 }, { "epoch": 0.8156922964425433, "grad_norm": 1.6678729057312012, "learning_rate": 9.701090442591406e-07, "loss": 0.8336, "step": 66930 }, { "epoch": 0.8157532326666911, "grad_norm": 1.7948322296142578, "learning_rate": 9.697883258499039e-07, "loss": 0.8115, "step": 66935 }, { "epoch": 0.8158141688908388, "grad_norm": 1.7882835865020752, "learning_rate": 9.69467607440667e-07, "loss": 0.7909, "step": 66940 }, { "epoch": 0.8158751051149866, "grad_norm": 2.000580310821533, "learning_rate": 9.691468890314305e-07, "loss": 0.792, "step": 66945 }, { "epoch": 0.8159360413391344, "grad_norm": 2.1045150756835938, "learning_rate": 9.688261706221938e-07, "loss": 0.7304, "step": 66950 }, { "epoch": 0.8159969775632823, "grad_norm": 2.1062631607055664, "learning_rate": 9.68505452212957e-07, "loss": 0.8115, "step": 66955 }, { "epoch": 0.8160579137874301, "grad_norm": 1.92064368724823, "learning_rate": 9.681847338037204e-07, "loss": 0.7346, "step": 66960 }, { "epoch": 0.8161188500115779, "grad_norm": 1.9783552885055542, "learning_rate": 9.678640153944837e-07, "loss": 0.8266, "step": 66965 }, { "epoch": 0.8161797862357257, "grad_norm": 2.450998067855835, "learning_rate": 9.675432969852471e-07, "loss": 0.847, "step": 66970 }, { "epoch": 0.8162407224598734, "grad_norm": 1.7155261039733887, "learning_rate": 9.672225785760103e-07, "loss": 0.8071, "step": 66975 }, { "epoch": 0.8163016586840213, "grad_norm": 2.064432382583618, "learning_rate": 9.669018601667736e-07, "loss": 0.8279, "step": 66980 }, { "epoch": 0.8163625949081691, "grad_norm": 1.8270090818405151, "learning_rate": 9.66581141757537e-07, "loss": 0.7906, "step": 66985 }, { "epoch": 0.8164235311323169, "grad_norm": 2.226210832595825, "learning_rate": 9.662604233483002e-07, "loss": 0.8164, "step": 66990 }, { "epoch": 0.8164844673564647, "grad_norm": 1.7110602855682373, "learning_rate": 9.659397049390635e-07, "loss": 0.8062, "step": 66995 }, { "epoch": 0.8165454035806126, "grad_norm": 1.9011656045913696, "learning_rate": 9.65618986529827e-07, "loss": 0.886, "step": 67000 }, { "epoch": 0.8166063398047604, "grad_norm": 1.7182717323303223, "learning_rate": 9.652982681205903e-07, "loss": 0.8015, "step": 67005 }, { "epoch": 0.8166672760289081, "grad_norm": 1.9775673151016235, "learning_rate": 9.649775497113536e-07, "loss": 0.8182, "step": 67010 }, { "epoch": 0.8167282122530559, "grad_norm": 1.7569596767425537, "learning_rate": 9.646568313021168e-07, "loss": 0.8155, "step": 67015 }, { "epoch": 0.8167891484772037, "grad_norm": 2.088104724884033, "learning_rate": 9.6433611289288e-07, "loss": 0.8405, "step": 67020 }, { "epoch": 0.8168500847013516, "grad_norm": 1.9143763780593872, "learning_rate": 9.640153944836435e-07, "loss": 0.8522, "step": 67025 }, { "epoch": 0.8169110209254994, "grad_norm": 1.7494581937789917, "learning_rate": 9.636946760744067e-07, "loss": 0.8607, "step": 67030 }, { "epoch": 0.8169719571496472, "grad_norm": 2.288775682449341, "learning_rate": 9.6337395766517e-07, "loss": 0.7609, "step": 67035 }, { "epoch": 0.817032893373795, "grad_norm": 1.9674992561340332, "learning_rate": 9.630532392559334e-07, "loss": 0.7432, "step": 67040 }, { "epoch": 0.8170938295979427, "grad_norm": 1.9665857553482056, "learning_rate": 9.627325208466968e-07, "loss": 0.8343, "step": 67045 }, { "epoch": 0.8171547658220906, "grad_norm": 1.835342526435852, "learning_rate": 9.6241180243746e-07, "loss": 0.7644, "step": 67050 }, { "epoch": 0.8172157020462384, "grad_norm": 1.9289631843566895, "learning_rate": 9.620910840282233e-07, "loss": 0.8244, "step": 67055 }, { "epoch": 0.8172766382703862, "grad_norm": 2.1390225887298584, "learning_rate": 9.617703656189865e-07, "loss": 0.7799, "step": 67060 }, { "epoch": 0.817337574494534, "grad_norm": 1.9072283506393433, "learning_rate": 9.6144964720975e-07, "loss": 0.8473, "step": 67065 }, { "epoch": 0.8173985107186819, "grad_norm": 1.9144129753112793, "learning_rate": 9.611289288005132e-07, "loss": 0.8044, "step": 67070 }, { "epoch": 0.8174594469428297, "grad_norm": 2.02471661567688, "learning_rate": 9.608082103912766e-07, "loss": 0.824, "step": 67075 }, { "epoch": 0.8175203831669774, "grad_norm": 1.9962778091430664, "learning_rate": 9.604874919820398e-07, "loss": 0.8444, "step": 67080 }, { "epoch": 0.8175813193911252, "grad_norm": 2.091400623321533, "learning_rate": 9.601667735728033e-07, "loss": 0.8537, "step": 67085 }, { "epoch": 0.817642255615273, "grad_norm": 1.9797770977020264, "learning_rate": 9.598460551635665e-07, "loss": 0.8725, "step": 67090 }, { "epoch": 0.8177031918394209, "grad_norm": 2.028007745742798, "learning_rate": 9.595253367543297e-07, "loss": 0.8404, "step": 67095 }, { "epoch": 0.8177641280635687, "grad_norm": 1.939217209815979, "learning_rate": 9.592046183450932e-07, "loss": 0.801, "step": 67100 }, { "epoch": 0.8178250642877165, "grad_norm": 1.7945301532745361, "learning_rate": 9.588838999358564e-07, "loss": 0.7525, "step": 67105 }, { "epoch": 0.8178860005118643, "grad_norm": 2.1406517028808594, "learning_rate": 9.585631815266196e-07, "loss": 0.8503, "step": 67110 }, { "epoch": 0.817946936736012, "grad_norm": 2.400280475616455, "learning_rate": 9.58242463117383e-07, "loss": 0.8901, "step": 67115 }, { "epoch": 0.8180078729601599, "grad_norm": 2.0577635765075684, "learning_rate": 9.579217447081463e-07, "loss": 0.8435, "step": 67120 }, { "epoch": 0.8180688091843077, "grad_norm": 2.1906516551971436, "learning_rate": 9.576010262989097e-07, "loss": 0.8003, "step": 67125 }, { "epoch": 0.8181297454084555, "grad_norm": 1.8882979154586792, "learning_rate": 9.57280307889673e-07, "loss": 0.7985, "step": 67130 }, { "epoch": 0.8181906816326033, "grad_norm": 1.967780351638794, "learning_rate": 9.569595894804362e-07, "loss": 0.8308, "step": 67135 }, { "epoch": 0.8182516178567512, "grad_norm": 2.1490259170532227, "learning_rate": 9.566388710711996e-07, "loss": 0.8318, "step": 67140 }, { "epoch": 0.818312554080899, "grad_norm": 1.821493148803711, "learning_rate": 9.563181526619629e-07, "loss": 0.8176, "step": 67145 }, { "epoch": 0.8183734903050467, "grad_norm": 2.1858882904052734, "learning_rate": 9.55997434252726e-07, "loss": 0.8037, "step": 67150 }, { "epoch": 0.8184344265291945, "grad_norm": 1.745837926864624, "learning_rate": 9.556767158434895e-07, "loss": 0.8212, "step": 67155 }, { "epoch": 0.8184953627533423, "grad_norm": 2.16455340385437, "learning_rate": 9.553559974342528e-07, "loss": 0.8318, "step": 67160 }, { "epoch": 0.8185562989774902, "grad_norm": 2.2729885578155518, "learning_rate": 9.550352790250162e-07, "loss": 0.8022, "step": 67165 }, { "epoch": 0.818617235201638, "grad_norm": 1.7897974252700806, "learning_rate": 9.547145606157794e-07, "loss": 0.8568, "step": 67170 }, { "epoch": 0.8186781714257858, "grad_norm": 1.5880568027496338, "learning_rate": 9.543938422065427e-07, "loss": 0.7963, "step": 67175 }, { "epoch": 0.8187391076499336, "grad_norm": 1.684699535369873, "learning_rate": 9.54073123797306e-07, "loss": 0.793, "step": 67180 }, { "epoch": 0.8188000438740813, "grad_norm": 1.8274428844451904, "learning_rate": 9.537524053880693e-07, "loss": 0.8495, "step": 67185 }, { "epoch": 0.8188609800982292, "grad_norm": 2.9474117755889893, "learning_rate": 9.534316869788327e-07, "loss": 0.7365, "step": 67190 }, { "epoch": 0.818921916322377, "grad_norm": 1.7429473400115967, "learning_rate": 9.531109685695959e-07, "loss": 0.8031, "step": 67195 }, { "epoch": 0.8189828525465248, "grad_norm": 1.8681001663208008, "learning_rate": 9.527902501603593e-07, "loss": 0.7773, "step": 67200 }, { "epoch": 0.8190437887706726, "grad_norm": 1.8202437162399292, "learning_rate": 9.524695317511227e-07, "loss": 0.8515, "step": 67205 }, { "epoch": 0.8191047249948205, "grad_norm": 2.112156391143799, "learning_rate": 9.521488133418859e-07, "loss": 0.836, "step": 67210 }, { "epoch": 0.8191656612189683, "grad_norm": 1.8997517824172974, "learning_rate": 9.518280949326491e-07, "loss": 0.7827, "step": 67215 }, { "epoch": 0.819226597443116, "grad_norm": 1.7917555570602417, "learning_rate": 9.515073765234126e-07, "loss": 0.8473, "step": 67220 }, { "epoch": 0.8192875336672638, "grad_norm": 2.210620880126953, "learning_rate": 9.511866581141759e-07, "loss": 0.8611, "step": 67225 }, { "epoch": 0.8193484698914116, "grad_norm": 1.7583024501800537, "learning_rate": 9.508659397049391e-07, "loss": 0.7458, "step": 67230 }, { "epoch": 0.8194094061155595, "grad_norm": 1.986251711845398, "learning_rate": 9.505452212957024e-07, "loss": 0.802, "step": 67235 }, { "epoch": 0.8194703423397073, "grad_norm": 1.6953065395355225, "learning_rate": 9.502245028864658e-07, "loss": 0.7129, "step": 67240 }, { "epoch": 0.8195312785638551, "grad_norm": 1.8270848989486694, "learning_rate": 9.499037844772291e-07, "loss": 0.7594, "step": 67245 }, { "epoch": 0.8195922147880029, "grad_norm": 1.6639442443847656, "learning_rate": 9.495830660679924e-07, "loss": 0.7822, "step": 67250 }, { "epoch": 0.8196531510121506, "grad_norm": 1.7839378118515015, "learning_rate": 9.492623476587556e-07, "loss": 0.7486, "step": 67255 }, { "epoch": 0.8197140872362985, "grad_norm": 2.0520224571228027, "learning_rate": 9.48941629249519e-07, "loss": 0.843, "step": 67260 }, { "epoch": 0.8197750234604463, "grad_norm": 2.41575026512146, "learning_rate": 9.486209108402824e-07, "loss": 0.8357, "step": 67265 }, { "epoch": 0.8198359596845941, "grad_norm": 1.8995612859725952, "learning_rate": 9.483001924310456e-07, "loss": 0.7861, "step": 67270 }, { "epoch": 0.8198968959087419, "grad_norm": 1.8882564306259155, "learning_rate": 9.479794740218089e-07, "loss": 0.8382, "step": 67275 }, { "epoch": 0.8199578321328898, "grad_norm": 1.8738014698028564, "learning_rate": 9.476587556125723e-07, "loss": 0.8408, "step": 67280 }, { "epoch": 0.8200187683570376, "grad_norm": 1.924380898475647, "learning_rate": 9.473380372033356e-07, "loss": 0.8202, "step": 67285 }, { "epoch": 0.8200797045811853, "grad_norm": 2.240236520767212, "learning_rate": 9.470173187940988e-07, "loss": 0.811, "step": 67290 }, { "epoch": 0.8201406408053331, "grad_norm": 2.42977237701416, "learning_rate": 9.466966003848622e-07, "loss": 0.84, "step": 67295 }, { "epoch": 0.8202015770294809, "grad_norm": 1.9938191175460815, "learning_rate": 9.463758819756255e-07, "loss": 0.8682, "step": 67300 }, { "epoch": 0.8202625132536288, "grad_norm": 1.889960527420044, "learning_rate": 9.460551635663888e-07, "loss": 0.7554, "step": 67305 }, { "epoch": 0.8203234494777766, "grad_norm": 1.904619812965393, "learning_rate": 9.45734445157152e-07, "loss": 0.8033, "step": 67310 }, { "epoch": 0.8203843857019244, "grad_norm": 1.9552409648895264, "learning_rate": 9.454137267479154e-07, "loss": 0.7768, "step": 67315 }, { "epoch": 0.8204453219260722, "grad_norm": 1.6750041246414185, "learning_rate": 9.450930083386787e-07, "loss": 0.8217, "step": 67320 }, { "epoch": 0.8205062581502199, "grad_norm": 1.9895387887954712, "learning_rate": 9.447722899294421e-07, "loss": 0.8428, "step": 67325 }, { "epoch": 0.8205671943743678, "grad_norm": 2.0286526679992676, "learning_rate": 9.444515715202053e-07, "loss": 0.8582, "step": 67330 }, { "epoch": 0.8206281305985156, "grad_norm": 1.7590917348861694, "learning_rate": 9.441308531109686e-07, "loss": 0.7593, "step": 67335 }, { "epoch": 0.8206890668226634, "grad_norm": 1.869249701499939, "learning_rate": 9.43810134701732e-07, "loss": 0.7655, "step": 67340 }, { "epoch": 0.8207500030468112, "grad_norm": 1.649871587753296, "learning_rate": 9.434894162924953e-07, "loss": 0.8191, "step": 67345 }, { "epoch": 0.820810939270959, "grad_norm": 1.8434652090072632, "learning_rate": 9.431686978832585e-07, "loss": 0.7807, "step": 67350 }, { "epoch": 0.8208718754951069, "grad_norm": 1.9348217248916626, "learning_rate": 9.428479794740218e-07, "loss": 0.8158, "step": 67355 }, { "epoch": 0.8209328117192546, "grad_norm": 2.2107677459716797, "learning_rate": 9.425272610647852e-07, "loss": 0.8098, "step": 67360 }, { "epoch": 0.8209937479434024, "grad_norm": 1.967750072479248, "learning_rate": 9.422065426555485e-07, "loss": 0.8392, "step": 67365 }, { "epoch": 0.8210546841675502, "grad_norm": 1.9245240688323975, "learning_rate": 9.418858242463117e-07, "loss": 0.8427, "step": 67370 }, { "epoch": 0.821115620391698, "grad_norm": 2.4499990940093994, "learning_rate": 9.415651058370752e-07, "loss": 0.8397, "step": 67375 }, { "epoch": 0.8211765566158459, "grad_norm": 2.463712692260742, "learning_rate": 9.412443874278385e-07, "loss": 0.8106, "step": 67380 }, { "epoch": 0.8212374928399937, "grad_norm": 2.1235671043395996, "learning_rate": 9.409236690186017e-07, "loss": 0.7878, "step": 67385 }, { "epoch": 0.8212984290641415, "grad_norm": 1.966606855392456, "learning_rate": 9.40602950609365e-07, "loss": 0.8073, "step": 67390 }, { "epoch": 0.8213593652882892, "grad_norm": 1.9299098253250122, "learning_rate": 9.402822322001284e-07, "loss": 0.753, "step": 67395 }, { "epoch": 0.8214203015124371, "grad_norm": 1.6779478788375854, "learning_rate": 9.399615137908918e-07, "loss": 0.8299, "step": 67400 }, { "epoch": 0.8214812377365849, "grad_norm": 1.9318822622299194, "learning_rate": 9.39640795381655e-07, "loss": 0.808, "step": 67405 }, { "epoch": 0.8215421739607327, "grad_norm": 2.3227460384368896, "learning_rate": 9.393200769724182e-07, "loss": 0.7628, "step": 67410 }, { "epoch": 0.8216031101848805, "grad_norm": 1.899175763130188, "learning_rate": 9.389993585631816e-07, "loss": 0.8503, "step": 67415 }, { "epoch": 0.8216640464090283, "grad_norm": 2.124706268310547, "learning_rate": 9.38678640153945e-07, "loss": 0.7766, "step": 67420 }, { "epoch": 0.8217249826331762, "grad_norm": 2.177891969680786, "learning_rate": 9.383579217447082e-07, "loss": 0.8588, "step": 67425 }, { "epoch": 0.8217859188573239, "grad_norm": 1.7666757106781006, "learning_rate": 9.380372033354714e-07, "loss": 0.7671, "step": 67430 }, { "epoch": 0.8218468550814717, "grad_norm": 2.054809331893921, "learning_rate": 9.377164849262349e-07, "loss": 0.8551, "step": 67435 }, { "epoch": 0.8219077913056195, "grad_norm": 1.9323967695236206, "learning_rate": 9.373957665169982e-07, "loss": 0.8143, "step": 67440 }, { "epoch": 0.8219687275297674, "grad_norm": 1.9381299018859863, "learning_rate": 9.370750481077614e-07, "loss": 0.8079, "step": 67445 }, { "epoch": 0.8220296637539152, "grad_norm": 2.1414549350738525, "learning_rate": 9.367543296985248e-07, "loss": 0.816, "step": 67450 }, { "epoch": 0.822090599978063, "grad_norm": 1.7340962886810303, "learning_rate": 9.364336112892881e-07, "loss": 0.8132, "step": 67455 }, { "epoch": 0.8221515362022108, "grad_norm": 2.1418991088867188, "learning_rate": 9.361128928800514e-07, "loss": 0.8181, "step": 67460 }, { "epoch": 0.8222124724263585, "grad_norm": 1.866922378540039, "learning_rate": 9.357921744708147e-07, "loss": 0.8101, "step": 67465 }, { "epoch": 0.8222734086505064, "grad_norm": 2.40683650970459, "learning_rate": 9.35471456061578e-07, "loss": 0.8972, "step": 67470 }, { "epoch": 0.8223343448746542, "grad_norm": 1.7358359098434448, "learning_rate": 9.351507376523413e-07, "loss": 0.7931, "step": 67475 }, { "epoch": 0.822395281098802, "grad_norm": 1.6237773895263672, "learning_rate": 9.348300192431047e-07, "loss": 0.7948, "step": 67480 }, { "epoch": 0.8224562173229498, "grad_norm": 2.2915053367614746, "learning_rate": 9.345093008338679e-07, "loss": 0.7197, "step": 67485 }, { "epoch": 0.8225171535470976, "grad_norm": 1.9800909757614136, "learning_rate": 9.341885824246312e-07, "loss": 0.7933, "step": 67490 }, { "epoch": 0.8225780897712455, "grad_norm": 1.9503401517868042, "learning_rate": 9.338678640153946e-07, "loss": 0.7944, "step": 67495 }, { "epoch": 0.8226390259953932, "grad_norm": 1.8595417737960815, "learning_rate": 9.335471456061579e-07, "loss": 0.8243, "step": 67500 }, { "epoch": 0.822699962219541, "grad_norm": 1.7184014320373535, "learning_rate": 9.332264271969211e-07, "loss": 0.8001, "step": 67505 }, { "epoch": 0.8227608984436888, "grad_norm": 1.6584268808364868, "learning_rate": 9.329057087876845e-07, "loss": 0.8507, "step": 67510 }, { "epoch": 0.8228218346678366, "grad_norm": 2.456324338912964, "learning_rate": 9.325849903784478e-07, "loss": 0.8689, "step": 67515 }, { "epoch": 0.8228827708919845, "grad_norm": 2.1974074840545654, "learning_rate": 9.322642719692111e-07, "loss": 0.8315, "step": 67520 }, { "epoch": 0.8229437071161323, "grad_norm": 2.2190473079681396, "learning_rate": 9.319435535599744e-07, "loss": 0.8584, "step": 67525 }, { "epoch": 0.8230046433402801, "grad_norm": 1.8782199621200562, "learning_rate": 9.316228351507377e-07, "loss": 0.7904, "step": 67530 }, { "epoch": 0.8230655795644278, "grad_norm": 1.801018476486206, "learning_rate": 9.31302116741501e-07, "loss": 0.8008, "step": 67535 }, { "epoch": 0.8231265157885757, "grad_norm": 1.775309681892395, "learning_rate": 9.309813983322644e-07, "loss": 0.8988, "step": 67540 }, { "epoch": 0.8231874520127235, "grad_norm": 2.413078546524048, "learning_rate": 9.306606799230276e-07, "loss": 0.8106, "step": 67545 }, { "epoch": 0.8232483882368713, "grad_norm": 2.1624224185943604, "learning_rate": 9.303399615137909e-07, "loss": 0.8048, "step": 67550 }, { "epoch": 0.8233093244610191, "grad_norm": 1.8689452409744263, "learning_rate": 9.300192431045544e-07, "loss": 0.8124, "step": 67555 }, { "epoch": 0.8233702606851669, "grad_norm": 1.764175534248352, "learning_rate": 9.296985246953176e-07, "loss": 0.7873, "step": 67560 }, { "epoch": 0.8234311969093148, "grad_norm": 2.1417901515960693, "learning_rate": 9.293778062860808e-07, "loss": 0.8195, "step": 67565 }, { "epoch": 0.8234921331334625, "grad_norm": 2.044367551803589, "learning_rate": 9.290570878768442e-07, "loss": 0.8306, "step": 67570 }, { "epoch": 0.8235530693576103, "grad_norm": 2.124919891357422, "learning_rate": 9.287363694676076e-07, "loss": 0.7823, "step": 67575 }, { "epoch": 0.8236140055817581, "grad_norm": 1.8493579626083374, "learning_rate": 9.284156510583708e-07, "loss": 0.7642, "step": 67580 }, { "epoch": 0.823674941805906, "grad_norm": 1.9795869588851929, "learning_rate": 9.280949326491341e-07, "loss": 0.842, "step": 67585 }, { "epoch": 0.8237358780300538, "grad_norm": 1.840332269668579, "learning_rate": 9.277742142398974e-07, "loss": 0.7923, "step": 67590 }, { "epoch": 0.8237968142542016, "grad_norm": 2.4918582439422607, "learning_rate": 9.274534958306608e-07, "loss": 0.7753, "step": 67595 }, { "epoch": 0.8238577504783493, "grad_norm": 2.1866822242736816, "learning_rate": 9.271327774214241e-07, "loss": 0.8277, "step": 67600 }, { "epoch": 0.8239186867024971, "grad_norm": 1.8605867624282837, "learning_rate": 9.268120590121873e-07, "loss": 0.8237, "step": 67605 }, { "epoch": 0.823979622926645, "grad_norm": 1.7626773118972778, "learning_rate": 9.264913406029506e-07, "loss": 0.8402, "step": 67610 }, { "epoch": 0.8240405591507928, "grad_norm": 2.192758083343506, "learning_rate": 9.261706221937141e-07, "loss": 0.917, "step": 67615 }, { "epoch": 0.8241014953749406, "grad_norm": 2.0730490684509277, "learning_rate": 9.258499037844773e-07, "loss": 0.8569, "step": 67620 }, { "epoch": 0.8241624315990884, "grad_norm": 1.7323638200759888, "learning_rate": 9.255291853752406e-07, "loss": 0.8046, "step": 67625 }, { "epoch": 0.8242233678232362, "grad_norm": 2.061391830444336, "learning_rate": 9.252084669660039e-07, "loss": 0.7727, "step": 67630 }, { "epoch": 0.824284304047384, "grad_norm": 1.91548490524292, "learning_rate": 9.248877485567673e-07, "loss": 0.8, "step": 67635 }, { "epoch": 0.8243452402715318, "grad_norm": 1.7991358041763306, "learning_rate": 9.245670301475305e-07, "loss": 0.8602, "step": 67640 }, { "epoch": 0.8244061764956796, "grad_norm": 2.011701822280884, "learning_rate": 9.242463117382939e-07, "loss": 0.8134, "step": 67645 }, { "epoch": 0.8244671127198274, "grad_norm": 2.24936842918396, "learning_rate": 9.239255933290571e-07, "loss": 0.7773, "step": 67650 }, { "epoch": 0.8245280489439752, "grad_norm": 1.7910563945770264, "learning_rate": 9.236048749198205e-07, "loss": 0.7895, "step": 67655 }, { "epoch": 0.8245889851681231, "grad_norm": 2.0592963695526123, "learning_rate": 9.232841565105838e-07, "loss": 0.7783, "step": 67660 }, { "epoch": 0.8246499213922709, "grad_norm": 2.0637271404266357, "learning_rate": 9.229634381013471e-07, "loss": 0.8415, "step": 67665 }, { "epoch": 0.8247108576164186, "grad_norm": 2.4647068977355957, "learning_rate": 9.226427196921103e-07, "loss": 0.7949, "step": 67670 }, { "epoch": 0.8247717938405664, "grad_norm": 1.9138234853744507, "learning_rate": 9.223220012828738e-07, "loss": 0.8003, "step": 67675 }, { "epoch": 0.8248327300647142, "grad_norm": 1.7902220487594604, "learning_rate": 9.22001282873637e-07, "loss": 0.8433, "step": 67680 }, { "epoch": 0.8248936662888621, "grad_norm": 1.7868579626083374, "learning_rate": 9.216805644644003e-07, "loss": 0.8072, "step": 67685 }, { "epoch": 0.8249546025130099, "grad_norm": 2.043647050857544, "learning_rate": 9.213598460551637e-07, "loss": 0.8411, "step": 67690 }, { "epoch": 0.8250155387371577, "grad_norm": 1.720467448234558, "learning_rate": 9.21039127645927e-07, "loss": 0.7787, "step": 67695 }, { "epoch": 0.8250764749613055, "grad_norm": 2.1970672607421875, "learning_rate": 9.207184092366902e-07, "loss": 0.8472, "step": 67700 }, { "epoch": 0.8251374111854533, "grad_norm": 2.2189860343933105, "learning_rate": 9.203976908274536e-07, "loss": 0.7967, "step": 67705 }, { "epoch": 0.8251983474096011, "grad_norm": 2.0633387565612793, "learning_rate": 9.200769724182169e-07, "loss": 0.7809, "step": 67710 }, { "epoch": 0.8252592836337489, "grad_norm": 2.6756389141082764, "learning_rate": 9.197562540089802e-07, "loss": 0.7888, "step": 67715 }, { "epoch": 0.8253202198578967, "grad_norm": 1.7681634426116943, "learning_rate": 9.194355355997435e-07, "loss": 0.787, "step": 67720 }, { "epoch": 0.8253811560820445, "grad_norm": 1.828963279724121, "learning_rate": 9.191148171905068e-07, "loss": 0.8305, "step": 67725 }, { "epoch": 0.8254420923061924, "grad_norm": 2.0612006187438965, "learning_rate": 9.187940987812702e-07, "loss": 0.784, "step": 67730 }, { "epoch": 0.8255030285303402, "grad_norm": 2.1729865074157715, "learning_rate": 9.184733803720335e-07, "loss": 0.8314, "step": 67735 }, { "epoch": 0.8255639647544879, "grad_norm": 1.7002418041229248, "learning_rate": 9.181526619627967e-07, "loss": 0.7752, "step": 67740 }, { "epoch": 0.8256249009786357, "grad_norm": 1.9077965021133423, "learning_rate": 9.1783194355356e-07, "loss": 0.7904, "step": 67745 }, { "epoch": 0.8256858372027835, "grad_norm": 2.5839803218841553, "learning_rate": 9.175112251443235e-07, "loss": 0.887, "step": 67750 }, { "epoch": 0.8257467734269314, "grad_norm": 2.1448798179626465, "learning_rate": 9.171905067350867e-07, "loss": 0.7623, "step": 67755 }, { "epoch": 0.8258077096510792, "grad_norm": 2.2650396823883057, "learning_rate": 9.168697883258499e-07, "loss": 0.8045, "step": 67760 }, { "epoch": 0.825868645875227, "grad_norm": 2.206575870513916, "learning_rate": 9.165490699166133e-07, "loss": 0.8117, "step": 67765 }, { "epoch": 0.8259295820993748, "grad_norm": 1.8874722719192505, "learning_rate": 9.162283515073767e-07, "loss": 0.8522, "step": 67770 }, { "epoch": 0.8259905183235225, "grad_norm": 1.881399393081665, "learning_rate": 9.159076330981399e-07, "loss": 0.783, "step": 67775 }, { "epoch": 0.8260514545476704, "grad_norm": 2.076085090637207, "learning_rate": 9.155869146889032e-07, "loss": 0.7885, "step": 67780 }, { "epoch": 0.8261123907718182, "grad_norm": 1.9705991744995117, "learning_rate": 9.152661962796665e-07, "loss": 0.8842, "step": 67785 }, { "epoch": 0.826173326995966, "grad_norm": 1.6900577545166016, "learning_rate": 9.149454778704299e-07, "loss": 0.8552, "step": 67790 }, { "epoch": 0.8262342632201138, "grad_norm": 1.9397221803665161, "learning_rate": 9.146247594611932e-07, "loss": 0.8309, "step": 67795 }, { "epoch": 0.8262951994442617, "grad_norm": 2.0841526985168457, "learning_rate": 9.143040410519565e-07, "loss": 0.8787, "step": 67800 }, { "epoch": 0.8263561356684095, "grad_norm": 1.9923787117004395, "learning_rate": 9.139833226427197e-07, "loss": 0.8703, "step": 67805 }, { "epoch": 0.8264170718925572, "grad_norm": 1.9552018642425537, "learning_rate": 9.136626042334832e-07, "loss": 0.8348, "step": 67810 }, { "epoch": 0.826478008116705, "grad_norm": 2.301151752471924, "learning_rate": 9.133418858242464e-07, "loss": 0.7743, "step": 67815 }, { "epoch": 0.8265389443408528, "grad_norm": 1.9428958892822266, "learning_rate": 9.130211674150097e-07, "loss": 0.7833, "step": 67820 }, { "epoch": 0.8265998805650007, "grad_norm": 1.8040177822113037, "learning_rate": 9.12700449005773e-07, "loss": 0.7899, "step": 67825 }, { "epoch": 0.8266608167891485, "grad_norm": 1.7524058818817139, "learning_rate": 9.123797305965364e-07, "loss": 0.7279, "step": 67830 }, { "epoch": 0.8267217530132963, "grad_norm": 1.7887346744537354, "learning_rate": 9.120590121872996e-07, "loss": 0.821, "step": 67835 }, { "epoch": 0.8267826892374441, "grad_norm": 1.8506346940994263, "learning_rate": 9.11738293778063e-07, "loss": 0.7786, "step": 67840 }, { "epoch": 0.8268436254615918, "grad_norm": 2.086751699447632, "learning_rate": 9.114175753688262e-07, "loss": 0.8289, "step": 67845 }, { "epoch": 0.8269045616857397, "grad_norm": 1.7162247896194458, "learning_rate": 9.110968569595896e-07, "loss": 0.8267, "step": 67850 }, { "epoch": 0.8269654979098875, "grad_norm": 2.4820492267608643, "learning_rate": 9.107761385503529e-07, "loss": 0.7986, "step": 67855 }, { "epoch": 0.8270264341340353, "grad_norm": 2.0276029109954834, "learning_rate": 9.104554201411162e-07, "loss": 0.8017, "step": 67860 }, { "epoch": 0.8270873703581831, "grad_norm": 2.0113182067871094, "learning_rate": 9.101347017318794e-07, "loss": 0.849, "step": 67865 }, { "epoch": 0.827148306582331, "grad_norm": 2.1870338916778564, "learning_rate": 9.098139833226429e-07, "loss": 0.7827, "step": 67870 }, { "epoch": 0.8272092428064788, "grad_norm": 1.9038183689117432, "learning_rate": 9.094932649134061e-07, "loss": 0.8013, "step": 67875 }, { "epoch": 0.8272701790306265, "grad_norm": 2.267815113067627, "learning_rate": 9.091725465041694e-07, "loss": 0.8256, "step": 67880 }, { "epoch": 0.8273311152547743, "grad_norm": 1.683017373085022, "learning_rate": 9.088518280949326e-07, "loss": 0.7541, "step": 67885 }, { "epoch": 0.8273920514789221, "grad_norm": 1.8378010988235474, "learning_rate": 9.085311096856961e-07, "loss": 0.7905, "step": 67890 }, { "epoch": 0.82745298770307, "grad_norm": 1.896470308303833, "learning_rate": 9.082103912764593e-07, "loss": 0.8145, "step": 67895 }, { "epoch": 0.8275139239272178, "grad_norm": 1.9127360582351685, "learning_rate": 9.078896728672227e-07, "loss": 0.7985, "step": 67900 }, { "epoch": 0.8275748601513656, "grad_norm": 1.8778660297393799, "learning_rate": 9.075689544579859e-07, "loss": 0.8884, "step": 67905 }, { "epoch": 0.8276357963755134, "grad_norm": 2.267949342727661, "learning_rate": 9.072482360487493e-07, "loss": 0.8559, "step": 67910 }, { "epoch": 0.8276967325996611, "grad_norm": 1.6838725805282593, "learning_rate": 9.069275176395126e-07, "loss": 0.725, "step": 67915 }, { "epoch": 0.827757668823809, "grad_norm": 1.8502451181411743, "learning_rate": 9.066067992302759e-07, "loss": 0.7684, "step": 67920 }, { "epoch": 0.8278186050479568, "grad_norm": 1.957861065864563, "learning_rate": 9.062860808210391e-07, "loss": 0.8839, "step": 67925 }, { "epoch": 0.8278795412721046, "grad_norm": 2.028103828430176, "learning_rate": 9.059653624118026e-07, "loss": 0.8226, "step": 67930 }, { "epoch": 0.8279404774962524, "grad_norm": 1.989484190940857, "learning_rate": 9.056446440025658e-07, "loss": 0.8353, "step": 67935 }, { "epoch": 0.8280014137204003, "grad_norm": 2.0761234760284424, "learning_rate": 9.053239255933291e-07, "loss": 0.859, "step": 67940 }, { "epoch": 0.8280623499445481, "grad_norm": 1.9865031242370605, "learning_rate": 9.050032071840923e-07, "loss": 0.7274, "step": 67945 }, { "epoch": 0.8281232861686958, "grad_norm": 2.2115793228149414, "learning_rate": 9.046824887748558e-07, "loss": 0.7453, "step": 67950 }, { "epoch": 0.8281842223928436, "grad_norm": 1.9360592365264893, "learning_rate": 9.04361770365619e-07, "loss": 0.8789, "step": 67955 }, { "epoch": 0.8282451586169914, "grad_norm": 1.872248888015747, "learning_rate": 9.040410519563823e-07, "loss": 0.8169, "step": 67960 }, { "epoch": 0.8283060948411393, "grad_norm": 2.289792060852051, "learning_rate": 9.037203335471456e-07, "loss": 0.8219, "step": 67965 }, { "epoch": 0.8283670310652871, "grad_norm": 2.0476202964782715, "learning_rate": 9.03399615137909e-07, "loss": 0.8171, "step": 67970 }, { "epoch": 0.8284279672894349, "grad_norm": 2.752918243408203, "learning_rate": 9.030788967286724e-07, "loss": 0.8213, "step": 67975 }, { "epoch": 0.8284889035135827, "grad_norm": 1.8816308975219727, "learning_rate": 9.027581783194356e-07, "loss": 0.7947, "step": 67980 }, { "epoch": 0.8285498397377304, "grad_norm": 1.8923300504684448, "learning_rate": 9.02437459910199e-07, "loss": 0.7368, "step": 67985 }, { "epoch": 0.8286107759618783, "grad_norm": 1.7414251565933228, "learning_rate": 9.021167415009622e-07, "loss": 0.8353, "step": 67990 }, { "epoch": 0.8286717121860261, "grad_norm": 2.382474422454834, "learning_rate": 9.017960230917256e-07, "loss": 0.8933, "step": 67995 }, { "epoch": 0.8287326484101739, "grad_norm": 2.0288474559783936, "learning_rate": 9.014753046824888e-07, "loss": 0.8258, "step": 68000 }, { "epoch": 0.8287935846343217, "grad_norm": 1.843760371208191, "learning_rate": 9.011545862732523e-07, "loss": 0.7667, "step": 68005 }, { "epoch": 0.8288545208584696, "grad_norm": 2.244175910949707, "learning_rate": 9.008338678640155e-07, "loss": 0.8236, "step": 68010 }, { "epoch": 0.8289154570826174, "grad_norm": 1.86210036277771, "learning_rate": 9.005131494547788e-07, "loss": 0.8029, "step": 68015 }, { "epoch": 0.8289763933067651, "grad_norm": 1.6639952659606934, "learning_rate": 9.00192431045542e-07, "loss": 0.7728, "step": 68020 }, { "epoch": 0.8290373295309129, "grad_norm": 2.4289097785949707, "learning_rate": 8.998717126363055e-07, "loss": 0.8125, "step": 68025 }, { "epoch": 0.8290982657550607, "grad_norm": 1.7155015468597412, "learning_rate": 8.995509942270687e-07, "loss": 0.8117, "step": 68030 }, { "epoch": 0.8291592019792086, "grad_norm": 1.9745553731918335, "learning_rate": 8.99230275817832e-07, "loss": 0.8249, "step": 68035 }, { "epoch": 0.8292201382033564, "grad_norm": 2.4489316940307617, "learning_rate": 8.989095574085953e-07, "loss": 0.8082, "step": 68040 }, { "epoch": 0.8292810744275042, "grad_norm": 2.1494300365448, "learning_rate": 8.985888389993587e-07, "loss": 0.7878, "step": 68045 }, { "epoch": 0.829342010651652, "grad_norm": 2.2675371170043945, "learning_rate": 8.982681205901219e-07, "loss": 0.8132, "step": 68050 }, { "epoch": 0.8294029468757997, "grad_norm": 1.9906316995620728, "learning_rate": 8.979474021808853e-07, "loss": 0.762, "step": 68055 }, { "epoch": 0.8294638830999476, "grad_norm": 1.9179009199142456, "learning_rate": 8.976266837716485e-07, "loss": 0.7603, "step": 68060 }, { "epoch": 0.8295248193240954, "grad_norm": 1.7950481176376343, "learning_rate": 8.97305965362412e-07, "loss": 0.7913, "step": 68065 }, { "epoch": 0.8295857555482432, "grad_norm": 2.089590549468994, "learning_rate": 8.969852469531752e-07, "loss": 0.8576, "step": 68070 }, { "epoch": 0.829646691772391, "grad_norm": 1.8690907955169678, "learning_rate": 8.966645285439385e-07, "loss": 0.7915, "step": 68075 }, { "epoch": 0.8297076279965389, "grad_norm": 2.4076449871063232, "learning_rate": 8.963438101347017e-07, "loss": 0.8141, "step": 68080 }, { "epoch": 0.8297685642206867, "grad_norm": 1.9756453037261963, "learning_rate": 8.960230917254652e-07, "loss": 0.8347, "step": 68085 }, { "epoch": 0.8298295004448344, "grad_norm": 1.9467213153839111, "learning_rate": 8.957023733162284e-07, "loss": 0.7983, "step": 68090 }, { "epoch": 0.8298904366689822, "grad_norm": 2.0119898319244385, "learning_rate": 8.953816549069917e-07, "loss": 0.7809, "step": 68095 }, { "epoch": 0.82995137289313, "grad_norm": 1.847328543663025, "learning_rate": 8.95060936497755e-07, "loss": 0.7752, "step": 68100 }, { "epoch": 0.8300123091172779, "grad_norm": 1.7831807136535645, "learning_rate": 8.947402180885184e-07, "loss": 0.8253, "step": 68105 }, { "epoch": 0.8300732453414257, "grad_norm": 2.081773519515991, "learning_rate": 8.944194996792816e-07, "loss": 0.7895, "step": 68110 }, { "epoch": 0.8301341815655735, "grad_norm": 2.1643381118774414, "learning_rate": 8.94098781270045e-07, "loss": 0.8052, "step": 68115 }, { "epoch": 0.8301951177897213, "grad_norm": 2.125767469406128, "learning_rate": 8.937780628608082e-07, "loss": 0.7637, "step": 68120 }, { "epoch": 0.830256054013869, "grad_norm": 1.8464860916137695, "learning_rate": 8.934573444515716e-07, "loss": 0.7918, "step": 68125 }, { "epoch": 0.8303169902380169, "grad_norm": 2.036659002304077, "learning_rate": 8.931366260423349e-07, "loss": 0.8023, "step": 68130 }, { "epoch": 0.8303779264621647, "grad_norm": 2.0307459831237793, "learning_rate": 8.928159076330982e-07, "loss": 0.8284, "step": 68135 }, { "epoch": 0.8304388626863125, "grad_norm": 1.869212031364441, "learning_rate": 8.924951892238614e-07, "loss": 0.8481, "step": 68140 }, { "epoch": 0.8304997989104603, "grad_norm": 1.9570010900497437, "learning_rate": 8.921744708146249e-07, "loss": 0.7705, "step": 68145 }, { "epoch": 0.8305607351346082, "grad_norm": 2.0109732151031494, "learning_rate": 8.918537524053882e-07, "loss": 0.8039, "step": 68150 }, { "epoch": 0.830621671358756, "grad_norm": 2.1440510749816895, "learning_rate": 8.915330339961514e-07, "loss": 0.8166, "step": 68155 }, { "epoch": 0.8306826075829037, "grad_norm": 1.9487138986587524, "learning_rate": 8.912123155869147e-07, "loss": 0.8268, "step": 68160 }, { "epoch": 0.8307435438070515, "grad_norm": 2.093661069869995, "learning_rate": 8.908915971776781e-07, "loss": 0.7946, "step": 68165 }, { "epoch": 0.8308044800311993, "grad_norm": 1.9163289070129395, "learning_rate": 8.905708787684414e-07, "loss": 0.8171, "step": 68170 }, { "epoch": 0.8308654162553472, "grad_norm": 2.0581021308898926, "learning_rate": 8.902501603592047e-07, "loss": 0.8325, "step": 68175 }, { "epoch": 0.830926352479495, "grad_norm": 1.773669958114624, "learning_rate": 8.899294419499679e-07, "loss": 0.7716, "step": 68180 }, { "epoch": 0.8309872887036428, "grad_norm": 2.1085028648376465, "learning_rate": 8.896087235407313e-07, "loss": 0.8159, "step": 68185 }, { "epoch": 0.8310482249277906, "grad_norm": 1.955322504043579, "learning_rate": 8.892880051314947e-07, "loss": 0.8359, "step": 68190 }, { "epoch": 0.8311091611519383, "grad_norm": 1.9541552066802979, "learning_rate": 8.889672867222579e-07, "loss": 0.8088, "step": 68195 }, { "epoch": 0.8311700973760862, "grad_norm": 1.9283409118652344, "learning_rate": 8.886465683130211e-07, "loss": 0.7933, "step": 68200 }, { "epoch": 0.831231033600234, "grad_norm": 2.066911458969116, "learning_rate": 8.883258499037846e-07, "loss": 0.7697, "step": 68205 }, { "epoch": 0.8312919698243818, "grad_norm": 1.5498677492141724, "learning_rate": 8.880051314945479e-07, "loss": 0.8245, "step": 68210 }, { "epoch": 0.8313529060485296, "grad_norm": 2.2536802291870117, "learning_rate": 8.876844130853111e-07, "loss": 0.7565, "step": 68215 }, { "epoch": 0.8314138422726775, "grad_norm": 1.9217827320098877, "learning_rate": 8.873636946760745e-07, "loss": 0.7788, "step": 68220 }, { "epoch": 0.8314747784968253, "grad_norm": 2.165344476699829, "learning_rate": 8.870429762668378e-07, "loss": 0.8244, "step": 68225 }, { "epoch": 0.831535714720973, "grad_norm": 2.1958723068237305, "learning_rate": 8.867222578576011e-07, "loss": 0.8405, "step": 68230 }, { "epoch": 0.8315966509451208, "grad_norm": 2.093111991882324, "learning_rate": 8.864015394483644e-07, "loss": 0.7984, "step": 68235 }, { "epoch": 0.8316575871692686, "grad_norm": 2.1817593574523926, "learning_rate": 8.860808210391277e-07, "loss": 0.8981, "step": 68240 }, { "epoch": 0.8317185233934165, "grad_norm": 1.9960769414901733, "learning_rate": 8.85760102629891e-07, "loss": 0.8501, "step": 68245 }, { "epoch": 0.8317794596175643, "grad_norm": 1.6384916305541992, "learning_rate": 8.854393842206544e-07, "loss": 0.7603, "step": 68250 }, { "epoch": 0.8318403958417121, "grad_norm": 1.8187079429626465, "learning_rate": 8.851186658114176e-07, "loss": 0.7337, "step": 68255 }, { "epoch": 0.8319013320658599, "grad_norm": 2.030604362487793, "learning_rate": 8.847979474021809e-07, "loss": 0.8433, "step": 68260 }, { "epoch": 0.8319622682900076, "grad_norm": 2.1675164699554443, "learning_rate": 8.844772289929443e-07, "loss": 0.8435, "step": 68265 }, { "epoch": 0.8320232045141555, "grad_norm": 1.907631754875183, "learning_rate": 8.841565105837076e-07, "loss": 0.7743, "step": 68270 }, { "epoch": 0.8320841407383033, "grad_norm": 1.7783048152923584, "learning_rate": 8.838357921744708e-07, "loss": 0.8891, "step": 68275 }, { "epoch": 0.8321450769624511, "grad_norm": 2.0621373653411865, "learning_rate": 8.835150737652343e-07, "loss": 0.7844, "step": 68280 }, { "epoch": 0.8322060131865989, "grad_norm": 2.2116901874542236, "learning_rate": 8.831943553559975e-07, "loss": 0.8125, "step": 68285 }, { "epoch": 0.8322669494107467, "grad_norm": 1.9775422811508179, "learning_rate": 8.828736369467608e-07, "loss": 0.7939, "step": 68290 }, { "epoch": 0.8323278856348946, "grad_norm": 2.089104652404785, "learning_rate": 8.825529185375241e-07, "loss": 0.8607, "step": 68295 }, { "epoch": 0.8323888218590423, "grad_norm": 1.647948980331421, "learning_rate": 8.822322001282875e-07, "loss": 0.7817, "step": 68300 }, { "epoch": 0.8324497580831901, "grad_norm": 1.8326019048690796, "learning_rate": 8.819114817190507e-07, "loss": 0.8027, "step": 68305 }, { "epoch": 0.8325106943073379, "grad_norm": 2.0574638843536377, "learning_rate": 8.815907633098141e-07, "loss": 0.8233, "step": 68310 }, { "epoch": 0.8325716305314858, "grad_norm": 2.0686099529266357, "learning_rate": 8.812700449005773e-07, "loss": 0.765, "step": 68315 }, { "epoch": 0.8326325667556336, "grad_norm": 1.9494380950927734, "learning_rate": 8.809493264913407e-07, "loss": 0.8442, "step": 68320 }, { "epoch": 0.8326935029797814, "grad_norm": 2.0793392658233643, "learning_rate": 8.806286080821041e-07, "loss": 0.8275, "step": 68325 }, { "epoch": 0.8327544392039292, "grad_norm": 1.9721872806549072, "learning_rate": 8.803078896728673e-07, "loss": 0.82, "step": 68330 }, { "epoch": 0.8328153754280769, "grad_norm": 1.9599238634109497, "learning_rate": 8.799871712636305e-07, "loss": 0.7981, "step": 68335 }, { "epoch": 0.8328763116522248, "grad_norm": 2.07210636138916, "learning_rate": 8.79666452854394e-07, "loss": 0.8055, "step": 68340 }, { "epoch": 0.8329372478763726, "grad_norm": 2.29044246673584, "learning_rate": 8.793457344451573e-07, "loss": 0.8489, "step": 68345 }, { "epoch": 0.8329981841005204, "grad_norm": 1.8138140439987183, "learning_rate": 8.790250160359205e-07, "loss": 0.7678, "step": 68350 }, { "epoch": 0.8330591203246682, "grad_norm": 2.372466564178467, "learning_rate": 8.787042976266838e-07, "loss": 0.8283, "step": 68355 }, { "epoch": 0.833120056548816, "grad_norm": 2.2743418216705322, "learning_rate": 8.783835792174472e-07, "loss": 0.8066, "step": 68360 }, { "epoch": 0.8331809927729639, "grad_norm": 1.9511688947677612, "learning_rate": 8.780628608082105e-07, "loss": 0.7793, "step": 68365 }, { "epoch": 0.8332419289971116, "grad_norm": 1.7469316720962524, "learning_rate": 8.777421423989738e-07, "loss": 0.7834, "step": 68370 }, { "epoch": 0.8333028652212594, "grad_norm": 1.971130609512329, "learning_rate": 8.77421423989737e-07, "loss": 0.8217, "step": 68375 }, { "epoch": 0.8333638014454072, "grad_norm": 1.8709537982940674, "learning_rate": 8.771007055805004e-07, "loss": 0.7664, "step": 68380 }, { "epoch": 0.833424737669555, "grad_norm": 1.8249812126159668, "learning_rate": 8.767799871712638e-07, "loss": 0.8565, "step": 68385 }, { "epoch": 0.8334856738937029, "grad_norm": 1.818009376525879, "learning_rate": 8.76459268762027e-07, "loss": 0.7787, "step": 68390 }, { "epoch": 0.8335466101178507, "grad_norm": 2.5601918697357178, "learning_rate": 8.761385503527903e-07, "loss": 0.8472, "step": 68395 }, { "epoch": 0.8336075463419985, "grad_norm": 1.8651044368743896, "learning_rate": 8.758178319435537e-07, "loss": 0.706, "step": 68400 }, { "epoch": 0.8336684825661462, "grad_norm": 2.2968833446502686, "learning_rate": 8.75497113534317e-07, "loss": 0.8835, "step": 68405 }, { "epoch": 0.833729418790294, "grad_norm": 1.958647608757019, "learning_rate": 8.751763951250802e-07, "loss": 0.7688, "step": 68410 }, { "epoch": 0.8337903550144419, "grad_norm": 1.8240203857421875, "learning_rate": 8.748556767158436e-07, "loss": 0.7909, "step": 68415 }, { "epoch": 0.8338512912385897, "grad_norm": 1.747002363204956, "learning_rate": 8.745349583066069e-07, "loss": 0.8249, "step": 68420 }, { "epoch": 0.8339122274627375, "grad_norm": 2.049421548843384, "learning_rate": 8.742142398973702e-07, "loss": 0.7997, "step": 68425 }, { "epoch": 0.8339731636868853, "grad_norm": 2.062941074371338, "learning_rate": 8.738935214881335e-07, "loss": 0.8599, "step": 68430 }, { "epoch": 0.8340340999110332, "grad_norm": 1.8559662103652954, "learning_rate": 8.735728030788968e-07, "loss": 0.771, "step": 68435 }, { "epoch": 0.8340950361351809, "grad_norm": 1.7235252857208252, "learning_rate": 8.732520846696601e-07, "loss": 0.7714, "step": 68440 }, { "epoch": 0.8341559723593287, "grad_norm": 1.7287534475326538, "learning_rate": 8.729313662604235e-07, "loss": 0.7349, "step": 68445 }, { "epoch": 0.8342169085834765, "grad_norm": 1.9676793813705444, "learning_rate": 8.726106478511867e-07, "loss": 0.7904, "step": 68450 }, { "epoch": 0.8342778448076243, "grad_norm": 2.2699315547943115, "learning_rate": 8.7228992944195e-07, "loss": 0.7953, "step": 68455 }, { "epoch": 0.8343387810317722, "grad_norm": 1.9809398651123047, "learning_rate": 8.719692110327134e-07, "loss": 0.8515, "step": 68460 }, { "epoch": 0.83439971725592, "grad_norm": 1.6757152080535889, "learning_rate": 8.716484926234767e-07, "loss": 0.8538, "step": 68465 }, { "epoch": 0.8344606534800678, "grad_norm": 2.5411441326141357, "learning_rate": 8.713277742142399e-07, "loss": 0.82, "step": 68470 }, { "epoch": 0.8345215897042155, "grad_norm": 2.125941038131714, "learning_rate": 8.710070558050033e-07, "loss": 0.8487, "step": 68475 }, { "epoch": 0.8345825259283634, "grad_norm": 1.8695623874664307, "learning_rate": 8.706863373957666e-07, "loss": 0.7843, "step": 68480 }, { "epoch": 0.8346434621525112, "grad_norm": 1.715872883796692, "learning_rate": 8.703656189865299e-07, "loss": 0.8409, "step": 68485 }, { "epoch": 0.834704398376659, "grad_norm": 2.1649293899536133, "learning_rate": 8.700449005772931e-07, "loss": 0.8199, "step": 68490 }, { "epoch": 0.8347653346008068, "grad_norm": 1.954189419746399, "learning_rate": 8.697241821680565e-07, "loss": 0.803, "step": 68495 }, { "epoch": 0.8348262708249546, "grad_norm": 1.8315880298614502, "learning_rate": 8.694034637588199e-07, "loss": 0.7937, "step": 68500 }, { "epoch": 0.8348872070491025, "grad_norm": 2.3009932041168213, "learning_rate": 8.690827453495832e-07, "loss": 0.8305, "step": 68505 }, { "epoch": 0.8349481432732502, "grad_norm": 2.116154670715332, "learning_rate": 8.687620269403464e-07, "loss": 0.884, "step": 68510 }, { "epoch": 0.835009079497398, "grad_norm": 1.960589051246643, "learning_rate": 8.684413085311097e-07, "loss": 0.8067, "step": 68515 }, { "epoch": 0.8350700157215458, "grad_norm": 1.8087162971496582, "learning_rate": 8.681205901218732e-07, "loss": 0.8036, "step": 68520 }, { "epoch": 0.8351309519456936, "grad_norm": 2.4161956310272217, "learning_rate": 8.677998717126364e-07, "loss": 0.7771, "step": 68525 }, { "epoch": 0.8351918881698415, "grad_norm": 2.8047709465026855, "learning_rate": 8.674791533033996e-07, "loss": 0.8705, "step": 68530 }, { "epoch": 0.8352528243939893, "grad_norm": 2.116387128829956, "learning_rate": 8.671584348941629e-07, "loss": 0.8902, "step": 68535 }, { "epoch": 0.835313760618137, "grad_norm": 1.849404215812683, "learning_rate": 8.668377164849264e-07, "loss": 0.7703, "step": 68540 }, { "epoch": 0.8353746968422848, "grad_norm": 1.7276214361190796, "learning_rate": 8.665169980756896e-07, "loss": 0.8828, "step": 68545 }, { "epoch": 0.8354356330664326, "grad_norm": 1.8342863321304321, "learning_rate": 8.661962796664528e-07, "loss": 0.8191, "step": 68550 }, { "epoch": 0.8354965692905805, "grad_norm": 2.336488723754883, "learning_rate": 8.658755612572162e-07, "loss": 0.8187, "step": 68555 }, { "epoch": 0.8355575055147283, "grad_norm": 2.516756772994995, "learning_rate": 8.655548428479796e-07, "loss": 0.8011, "step": 68560 }, { "epoch": 0.8356184417388761, "grad_norm": 1.855692744255066, "learning_rate": 8.652341244387428e-07, "loss": 0.8323, "step": 68565 }, { "epoch": 0.8356793779630239, "grad_norm": 2.147653102874756, "learning_rate": 8.649134060295062e-07, "loss": 0.8643, "step": 68570 }, { "epoch": 0.8357403141871717, "grad_norm": 1.781770944595337, "learning_rate": 8.645926876202694e-07, "loss": 0.843, "step": 68575 }, { "epoch": 0.8358012504113195, "grad_norm": 2.1089305877685547, "learning_rate": 8.642719692110329e-07, "loss": 0.7377, "step": 68580 }, { "epoch": 0.8358621866354673, "grad_norm": 1.856061577796936, "learning_rate": 8.639512508017961e-07, "loss": 0.6915, "step": 68585 }, { "epoch": 0.8359231228596151, "grad_norm": 2.0011708736419678, "learning_rate": 8.636305323925594e-07, "loss": 0.8212, "step": 68590 }, { "epoch": 0.8359840590837629, "grad_norm": 1.8998191356658936, "learning_rate": 8.633098139833227e-07, "loss": 0.813, "step": 68595 }, { "epoch": 0.8360449953079108, "grad_norm": 3.1890387535095215, "learning_rate": 8.629890955740861e-07, "loss": 0.8529, "step": 68600 }, { "epoch": 0.8361059315320586, "grad_norm": 2.8311243057250977, "learning_rate": 8.626683771648493e-07, "loss": 0.8102, "step": 68605 }, { "epoch": 0.8361668677562063, "grad_norm": 1.8352234363555908, "learning_rate": 8.623476587556126e-07, "loss": 0.8155, "step": 68610 }, { "epoch": 0.8362278039803541, "grad_norm": 2.0138051509857178, "learning_rate": 8.62026940346376e-07, "loss": 0.8459, "step": 68615 }, { "epoch": 0.836288740204502, "grad_norm": 2.4508934020996094, "learning_rate": 8.617062219371393e-07, "loss": 0.7934, "step": 68620 }, { "epoch": 0.8363496764286498, "grad_norm": 1.8354032039642334, "learning_rate": 8.613855035279025e-07, "loss": 0.8547, "step": 68625 }, { "epoch": 0.8364106126527976, "grad_norm": 1.781959891319275, "learning_rate": 8.610647851186659e-07, "loss": 0.814, "step": 68630 }, { "epoch": 0.8364715488769454, "grad_norm": 2.0969316959381104, "learning_rate": 8.607440667094292e-07, "loss": 0.8076, "step": 68635 }, { "epoch": 0.8365324851010932, "grad_norm": 2.0070300102233887, "learning_rate": 8.604233483001925e-07, "loss": 0.8248, "step": 68640 }, { "epoch": 0.836593421325241, "grad_norm": 2.0653061866760254, "learning_rate": 8.601026298909558e-07, "loss": 0.7774, "step": 68645 }, { "epoch": 0.8366543575493888, "grad_norm": 2.5139646530151367, "learning_rate": 8.597819114817191e-07, "loss": 0.8059, "step": 68650 }, { "epoch": 0.8367152937735366, "grad_norm": 1.820106863975525, "learning_rate": 8.594611930724824e-07, "loss": 0.8124, "step": 68655 }, { "epoch": 0.8367762299976844, "grad_norm": 2.1592462062835693, "learning_rate": 8.591404746632458e-07, "loss": 0.7893, "step": 68660 }, { "epoch": 0.8368371662218322, "grad_norm": 1.916926383972168, "learning_rate": 8.58819756254009e-07, "loss": 0.8161, "step": 68665 }, { "epoch": 0.8368981024459801, "grad_norm": 2.385467290878296, "learning_rate": 8.584990378447723e-07, "loss": 0.788, "step": 68670 }, { "epoch": 0.8369590386701279, "grad_norm": 1.8215121030807495, "learning_rate": 8.581783194355358e-07, "loss": 0.8132, "step": 68675 }, { "epoch": 0.8370199748942756, "grad_norm": 2.0128331184387207, "learning_rate": 8.57857601026299e-07, "loss": 0.8691, "step": 68680 }, { "epoch": 0.8370809111184234, "grad_norm": 1.9424138069152832, "learning_rate": 8.575368826170622e-07, "loss": 0.8783, "step": 68685 }, { "epoch": 0.8371418473425712, "grad_norm": 2.175363302230835, "learning_rate": 8.572161642078256e-07, "loss": 0.7849, "step": 68690 }, { "epoch": 0.8372027835667191, "grad_norm": 2.128253698348999, "learning_rate": 8.56895445798589e-07, "loss": 0.8105, "step": 68695 }, { "epoch": 0.8372637197908669, "grad_norm": 2.181722640991211, "learning_rate": 8.565747273893522e-07, "loss": 0.8355, "step": 68700 }, { "epoch": 0.8373246560150147, "grad_norm": 1.8955167531967163, "learning_rate": 8.562540089801155e-07, "loss": 0.814, "step": 68705 }, { "epoch": 0.8373855922391625, "grad_norm": 1.919190526008606, "learning_rate": 8.559332905708788e-07, "loss": 0.7294, "step": 68710 }, { "epoch": 0.8374465284633102, "grad_norm": 2.276850700378418, "learning_rate": 8.556125721616422e-07, "loss": 0.8246, "step": 68715 }, { "epoch": 0.8375074646874581, "grad_norm": 1.884413480758667, "learning_rate": 8.552918537524055e-07, "loss": 0.813, "step": 68720 }, { "epoch": 0.8375684009116059, "grad_norm": 1.991646647453308, "learning_rate": 8.549711353431687e-07, "loss": 0.8182, "step": 68725 }, { "epoch": 0.8376293371357537, "grad_norm": 2.195594549179077, "learning_rate": 8.54650416933932e-07, "loss": 0.8119, "step": 68730 }, { "epoch": 0.8376902733599015, "grad_norm": 2.1644699573516846, "learning_rate": 8.543296985246955e-07, "loss": 0.7882, "step": 68735 }, { "epoch": 0.8377512095840494, "grad_norm": 2.5073094367980957, "learning_rate": 8.540089801154587e-07, "loss": 0.8654, "step": 68740 }, { "epoch": 0.8378121458081972, "grad_norm": 2.0486018657684326, "learning_rate": 8.53688261706222e-07, "loss": 0.7914, "step": 68745 }, { "epoch": 0.8378730820323449, "grad_norm": 2.2349514961242676, "learning_rate": 8.533675432969853e-07, "loss": 0.8325, "step": 68750 }, { "epoch": 0.8379340182564927, "grad_norm": 1.74016273021698, "learning_rate": 8.530468248877487e-07, "loss": 0.7736, "step": 68755 }, { "epoch": 0.8379949544806405, "grad_norm": 1.9325939416885376, "learning_rate": 8.527261064785119e-07, "loss": 0.8251, "step": 68760 }, { "epoch": 0.8380558907047884, "grad_norm": 1.9393177032470703, "learning_rate": 8.524053880692753e-07, "loss": 0.8006, "step": 68765 }, { "epoch": 0.8381168269289362, "grad_norm": 2.0223464965820312, "learning_rate": 8.520846696600385e-07, "loss": 0.766, "step": 68770 }, { "epoch": 0.838177763153084, "grad_norm": 2.4457428455352783, "learning_rate": 8.517639512508019e-07, "loss": 0.8866, "step": 68775 }, { "epoch": 0.8382386993772318, "grad_norm": 1.8849786520004272, "learning_rate": 8.514432328415652e-07, "loss": 0.7909, "step": 68780 }, { "epoch": 0.8382996356013795, "grad_norm": 2.176727533340454, "learning_rate": 8.511225144323285e-07, "loss": 0.7751, "step": 68785 }, { "epoch": 0.8383605718255274, "grad_norm": 1.8717862367630005, "learning_rate": 8.508017960230917e-07, "loss": 0.7831, "step": 68790 }, { "epoch": 0.8384215080496752, "grad_norm": 2.1431283950805664, "learning_rate": 8.504810776138552e-07, "loss": 0.7896, "step": 68795 }, { "epoch": 0.838482444273823, "grad_norm": 2.5912067890167236, "learning_rate": 8.501603592046184e-07, "loss": 0.8611, "step": 68800 }, { "epoch": 0.8385433804979708, "grad_norm": 1.9442613124847412, "learning_rate": 8.498396407953817e-07, "loss": 0.8046, "step": 68805 }, { "epoch": 0.8386043167221187, "grad_norm": 2.332829713821411, "learning_rate": 8.49518922386145e-07, "loss": 0.7395, "step": 68810 }, { "epoch": 0.8386652529462665, "grad_norm": 1.9272180795669556, "learning_rate": 8.491982039769084e-07, "loss": 0.8134, "step": 68815 }, { "epoch": 0.8387261891704142, "grad_norm": 2.2461326122283936, "learning_rate": 8.488774855676716e-07, "loss": 0.8287, "step": 68820 }, { "epoch": 0.838787125394562, "grad_norm": 1.8652219772338867, "learning_rate": 8.48556767158435e-07, "loss": 0.8268, "step": 68825 }, { "epoch": 0.8388480616187098, "grad_norm": 1.7503615617752075, "learning_rate": 8.482360487491982e-07, "loss": 0.7762, "step": 68830 }, { "epoch": 0.8389089978428577, "grad_norm": 2.048990488052368, "learning_rate": 8.479153303399616e-07, "loss": 0.7774, "step": 68835 }, { "epoch": 0.8389699340670055, "grad_norm": 2.0526130199432373, "learning_rate": 8.475946119307249e-07, "loss": 0.7956, "step": 68840 }, { "epoch": 0.8390308702911533, "grad_norm": 2.2826855182647705, "learning_rate": 8.472738935214882e-07, "loss": 0.8532, "step": 68845 }, { "epoch": 0.8390918065153011, "grad_norm": 1.8003356456756592, "learning_rate": 8.469531751122514e-07, "loss": 0.7684, "step": 68850 }, { "epoch": 0.8391527427394488, "grad_norm": 2.2218687534332275, "learning_rate": 8.466324567030149e-07, "loss": 0.8039, "step": 68855 }, { "epoch": 0.8392136789635967, "grad_norm": 1.9214576482772827, "learning_rate": 8.463117382937781e-07, "loss": 0.7834, "step": 68860 }, { "epoch": 0.8392746151877445, "grad_norm": 2.1261188983917236, "learning_rate": 8.459910198845414e-07, "loss": 0.8877, "step": 68865 }, { "epoch": 0.8393355514118923, "grad_norm": 1.893834114074707, "learning_rate": 8.456703014753047e-07, "loss": 0.767, "step": 68870 }, { "epoch": 0.8393964876360401, "grad_norm": 2.2917263507843018, "learning_rate": 8.453495830660681e-07, "loss": 0.7359, "step": 68875 }, { "epoch": 0.839457423860188, "grad_norm": 1.9827301502227783, "learning_rate": 8.450288646568313e-07, "loss": 0.773, "step": 68880 }, { "epoch": 0.8395183600843358, "grad_norm": 1.9128055572509766, "learning_rate": 8.447081462475947e-07, "loss": 0.8461, "step": 68885 }, { "epoch": 0.8395792963084835, "grad_norm": 2.349616289138794, "learning_rate": 8.443874278383581e-07, "loss": 0.8207, "step": 68890 }, { "epoch": 0.8396402325326313, "grad_norm": 2.2394862174987793, "learning_rate": 8.440667094291213e-07, "loss": 0.9066, "step": 68895 }, { "epoch": 0.8397011687567791, "grad_norm": 1.9200700521469116, "learning_rate": 8.437459910198846e-07, "loss": 0.7729, "step": 68900 }, { "epoch": 0.839762104980927, "grad_norm": 1.7636477947235107, "learning_rate": 8.434252726106479e-07, "loss": 0.7834, "step": 68905 }, { "epoch": 0.8398230412050748, "grad_norm": 1.7562291622161865, "learning_rate": 8.431045542014113e-07, "loss": 0.7597, "step": 68910 }, { "epoch": 0.8398839774292226, "grad_norm": 2.020275831222534, "learning_rate": 8.427838357921746e-07, "loss": 0.8641, "step": 68915 }, { "epoch": 0.8399449136533704, "grad_norm": 1.911643147468567, "learning_rate": 8.424631173829379e-07, "loss": 0.8649, "step": 68920 }, { "epoch": 0.8400058498775181, "grad_norm": 1.9330552816390991, "learning_rate": 8.421423989737011e-07, "loss": 0.7697, "step": 68925 }, { "epoch": 0.840066786101666, "grad_norm": 2.285884380340576, "learning_rate": 8.418216805644646e-07, "loss": 0.7511, "step": 68930 }, { "epoch": 0.8401277223258138, "grad_norm": 2.0745038986206055, "learning_rate": 8.415009621552278e-07, "loss": 0.8228, "step": 68935 }, { "epoch": 0.8401886585499616, "grad_norm": 1.9782968759536743, "learning_rate": 8.411802437459911e-07, "loss": 0.765, "step": 68940 }, { "epoch": 0.8402495947741094, "grad_norm": 2.309735059738159, "learning_rate": 8.408595253367544e-07, "loss": 0.9107, "step": 68945 }, { "epoch": 0.8403105309982573, "grad_norm": 2.036604642868042, "learning_rate": 8.405388069275178e-07, "loss": 0.7884, "step": 68950 }, { "epoch": 0.8403714672224051, "grad_norm": 1.836584448814392, "learning_rate": 8.40218088518281e-07, "loss": 0.6805, "step": 68955 }, { "epoch": 0.8404324034465528, "grad_norm": 2.090266704559326, "learning_rate": 8.398973701090444e-07, "loss": 0.8263, "step": 68960 }, { "epoch": 0.8404933396707006, "grad_norm": 1.9783684015274048, "learning_rate": 8.395766516998076e-07, "loss": 0.8799, "step": 68965 }, { "epoch": 0.8405542758948484, "grad_norm": 1.7603400945663452, "learning_rate": 8.39255933290571e-07, "loss": 0.8294, "step": 68970 }, { "epoch": 0.8406152121189963, "grad_norm": 1.7576854228973389, "learning_rate": 8.389352148813343e-07, "loss": 0.7821, "step": 68975 }, { "epoch": 0.8406761483431441, "grad_norm": 1.7505353689193726, "learning_rate": 8.386144964720976e-07, "loss": 0.7996, "step": 68980 }, { "epoch": 0.8407370845672919, "grad_norm": 2.0355231761932373, "learning_rate": 8.382937780628608e-07, "loss": 0.7523, "step": 68985 }, { "epoch": 0.8407980207914397, "grad_norm": 1.8908889293670654, "learning_rate": 8.379730596536243e-07, "loss": 0.814, "step": 68990 }, { "epoch": 0.8408589570155874, "grad_norm": 2.387411594390869, "learning_rate": 8.376523412443875e-07, "loss": 0.8042, "step": 68995 }, { "epoch": 0.8409198932397353, "grad_norm": 1.6044902801513672, "learning_rate": 8.373316228351508e-07, "loss": 0.7719, "step": 69000 }, { "epoch": 0.8409808294638831, "grad_norm": 2.111912488937378, "learning_rate": 8.37010904425914e-07, "loss": 0.7847, "step": 69005 }, { "epoch": 0.8410417656880309, "grad_norm": 2.1718695163726807, "learning_rate": 8.366901860166775e-07, "loss": 0.8097, "step": 69010 }, { "epoch": 0.8411027019121787, "grad_norm": 2.100648880004883, "learning_rate": 8.363694676074407e-07, "loss": 0.7842, "step": 69015 }, { "epoch": 0.8411636381363266, "grad_norm": 1.7529199123382568, "learning_rate": 8.360487491982041e-07, "loss": 0.8908, "step": 69020 }, { "epoch": 0.8412245743604744, "grad_norm": 2.256706476211548, "learning_rate": 8.357280307889673e-07, "loss": 0.7837, "step": 69025 }, { "epoch": 0.8412855105846221, "grad_norm": 1.9239072799682617, "learning_rate": 8.354073123797307e-07, "loss": 0.768, "step": 69030 }, { "epoch": 0.8413464468087699, "grad_norm": 1.8290542364120483, "learning_rate": 8.35086593970494e-07, "loss": 0.8093, "step": 69035 }, { "epoch": 0.8414073830329177, "grad_norm": 1.9567925930023193, "learning_rate": 8.347658755612573e-07, "loss": 0.8252, "step": 69040 }, { "epoch": 0.8414683192570656, "grad_norm": 2.299389362335205, "learning_rate": 8.344451571520205e-07, "loss": 0.8017, "step": 69045 }, { "epoch": 0.8415292554812134, "grad_norm": 2.0938737392425537, "learning_rate": 8.34124438742784e-07, "loss": 0.8071, "step": 69050 }, { "epoch": 0.8415901917053612, "grad_norm": 1.830207109451294, "learning_rate": 8.338037203335472e-07, "loss": 0.837, "step": 69055 }, { "epoch": 0.841651127929509, "grad_norm": 2.067612409591675, "learning_rate": 8.334830019243105e-07, "loss": 0.7866, "step": 69060 }, { "epoch": 0.8417120641536567, "grad_norm": 2.3499085903167725, "learning_rate": 8.331622835150737e-07, "loss": 0.8378, "step": 69065 }, { "epoch": 0.8417730003778046, "grad_norm": 1.9868890047073364, "learning_rate": 8.328415651058372e-07, "loss": 0.8342, "step": 69070 }, { "epoch": 0.8418339366019524, "grad_norm": 1.8343015909194946, "learning_rate": 8.325208466966004e-07, "loss": 0.7494, "step": 69075 }, { "epoch": 0.8418948728261002, "grad_norm": 2.0424740314483643, "learning_rate": 8.322001282873638e-07, "loss": 0.8342, "step": 69080 }, { "epoch": 0.841955809050248, "grad_norm": 1.9877560138702393, "learning_rate": 8.31879409878127e-07, "loss": 0.812, "step": 69085 }, { "epoch": 0.8420167452743959, "grad_norm": 2.1102116107940674, "learning_rate": 8.315586914688904e-07, "loss": 0.7417, "step": 69090 }, { "epoch": 0.8420776814985437, "grad_norm": 2.3148458003997803, "learning_rate": 8.312379730596538e-07, "loss": 0.7479, "step": 69095 }, { "epoch": 0.8421386177226914, "grad_norm": 2.0846128463745117, "learning_rate": 8.30917254650417e-07, "loss": 0.8869, "step": 69100 }, { "epoch": 0.8421995539468392, "grad_norm": 2.1091091632843018, "learning_rate": 8.305965362411802e-07, "loss": 0.8247, "step": 69105 }, { "epoch": 0.842260490170987, "grad_norm": 2.042174816131592, "learning_rate": 8.302758178319437e-07, "loss": 0.7725, "step": 69110 }, { "epoch": 0.8423214263951349, "grad_norm": 1.6608917713165283, "learning_rate": 8.29955099422707e-07, "loss": 0.849, "step": 69115 }, { "epoch": 0.8423823626192827, "grad_norm": 1.8834311962127686, "learning_rate": 8.296343810134702e-07, "loss": 0.8146, "step": 69120 }, { "epoch": 0.8424432988434305, "grad_norm": 1.7139593362808228, "learning_rate": 8.293136626042334e-07, "loss": 0.7683, "step": 69125 }, { "epoch": 0.8425042350675783, "grad_norm": 1.9852068424224854, "learning_rate": 8.289929441949969e-07, "loss": 0.8232, "step": 69130 }, { "epoch": 0.842565171291726, "grad_norm": 1.999959945678711, "learning_rate": 8.286722257857602e-07, "loss": 0.7711, "step": 69135 }, { "epoch": 0.8426261075158739, "grad_norm": 1.7320228815078735, "learning_rate": 8.283515073765234e-07, "loss": 0.7516, "step": 69140 }, { "epoch": 0.8426870437400217, "grad_norm": 1.8499351739883423, "learning_rate": 8.280307889672867e-07, "loss": 0.8214, "step": 69145 }, { "epoch": 0.8427479799641695, "grad_norm": 2.2838401794433594, "learning_rate": 8.277100705580501e-07, "loss": 0.8191, "step": 69150 }, { "epoch": 0.8428089161883173, "grad_norm": 2.273375988006592, "learning_rate": 8.273893521488134e-07, "loss": 0.8507, "step": 69155 }, { "epoch": 0.8428698524124651, "grad_norm": 1.788426160812378, "learning_rate": 8.270686337395767e-07, "loss": 0.8652, "step": 69160 }, { "epoch": 0.842930788636613, "grad_norm": 1.8956698179244995, "learning_rate": 8.2674791533034e-07, "loss": 0.8298, "step": 69165 }, { "epoch": 0.8429917248607607, "grad_norm": 1.7466719150543213, "learning_rate": 8.264271969211033e-07, "loss": 0.8145, "step": 69170 }, { "epoch": 0.8430526610849085, "grad_norm": 2.0972812175750732, "learning_rate": 8.261064785118667e-07, "loss": 0.8166, "step": 69175 }, { "epoch": 0.8431135973090563, "grad_norm": 2.024148464202881, "learning_rate": 8.257857601026299e-07, "loss": 0.7936, "step": 69180 }, { "epoch": 0.8431745335332042, "grad_norm": 1.705298662185669, "learning_rate": 8.254650416933934e-07, "loss": 0.8156, "step": 69185 }, { "epoch": 0.843235469757352, "grad_norm": 2.1233062744140625, "learning_rate": 8.251443232841566e-07, "loss": 0.8161, "step": 69190 }, { "epoch": 0.8432964059814998, "grad_norm": 2.35945725440979, "learning_rate": 8.248236048749199e-07, "loss": 0.8702, "step": 69195 }, { "epoch": 0.8433573422056476, "grad_norm": 1.811995506286621, "learning_rate": 8.245028864656831e-07, "loss": 0.8137, "step": 69200 }, { "epoch": 0.8434182784297953, "grad_norm": 2.3262858390808105, "learning_rate": 8.241821680564466e-07, "loss": 0.7937, "step": 69205 }, { "epoch": 0.8434792146539432, "grad_norm": 1.9890224933624268, "learning_rate": 8.238614496472098e-07, "loss": 0.8295, "step": 69210 }, { "epoch": 0.843540150878091, "grad_norm": 1.980186939239502, "learning_rate": 8.235407312379731e-07, "loss": 0.79, "step": 69215 }, { "epoch": 0.8436010871022388, "grad_norm": 1.782752275466919, "learning_rate": 8.232200128287364e-07, "loss": 0.8493, "step": 69220 }, { "epoch": 0.8436620233263866, "grad_norm": 2.0770645141601562, "learning_rate": 8.228992944194998e-07, "loss": 0.8552, "step": 69225 }, { "epoch": 0.8437229595505344, "grad_norm": 1.615732192993164, "learning_rate": 8.22578576010263e-07, "loss": 0.7319, "step": 69230 }, { "epoch": 0.8437838957746823, "grad_norm": 2.0565574169158936, "learning_rate": 8.222578576010264e-07, "loss": 0.7756, "step": 69235 }, { "epoch": 0.84384483199883, "grad_norm": 2.794469118118286, "learning_rate": 8.219371391917896e-07, "loss": 0.8427, "step": 69240 }, { "epoch": 0.8439057682229778, "grad_norm": 1.9760913848876953, "learning_rate": 8.21616420782553e-07, "loss": 0.8065, "step": 69245 }, { "epoch": 0.8439667044471256, "grad_norm": 2.007248878479004, "learning_rate": 8.212957023733163e-07, "loss": 0.7945, "step": 69250 }, { "epoch": 0.8440276406712734, "grad_norm": 1.9769831895828247, "learning_rate": 8.209749839640796e-07, "loss": 0.8962, "step": 69255 }, { "epoch": 0.8440885768954213, "grad_norm": 2.0567212104797363, "learning_rate": 8.206542655548428e-07, "loss": 0.8231, "step": 69260 }, { "epoch": 0.8441495131195691, "grad_norm": 1.7654533386230469, "learning_rate": 8.203335471456063e-07, "loss": 0.7621, "step": 69265 }, { "epoch": 0.8442104493437169, "grad_norm": 1.9587541818618774, "learning_rate": 8.200128287363696e-07, "loss": 0.8772, "step": 69270 }, { "epoch": 0.8442713855678646, "grad_norm": 1.9293749332427979, "learning_rate": 8.196921103271328e-07, "loss": 0.7774, "step": 69275 }, { "epoch": 0.8443323217920125, "grad_norm": 2.4748005867004395, "learning_rate": 8.193713919178961e-07, "loss": 0.762, "step": 69280 }, { "epoch": 0.8443932580161603, "grad_norm": 2.0573272705078125, "learning_rate": 8.190506735086595e-07, "loss": 0.8201, "step": 69285 }, { "epoch": 0.8444541942403081, "grad_norm": 2.27120041847229, "learning_rate": 8.187299550994228e-07, "loss": 0.9083, "step": 69290 }, { "epoch": 0.8445151304644559, "grad_norm": 2.042912483215332, "learning_rate": 8.184092366901861e-07, "loss": 0.7931, "step": 69295 }, { "epoch": 0.8445760666886037, "grad_norm": 2.3870034217834473, "learning_rate": 8.180885182809493e-07, "loss": 0.8575, "step": 69300 }, { "epoch": 0.8446370029127516, "grad_norm": 1.8575342893600464, "learning_rate": 8.177677998717127e-07, "loss": 0.8043, "step": 69305 }, { "epoch": 0.8446979391368993, "grad_norm": 1.678425669670105, "learning_rate": 8.174470814624761e-07, "loss": 0.8371, "step": 69310 }, { "epoch": 0.8447588753610471, "grad_norm": 2.150378704071045, "learning_rate": 8.171263630532393e-07, "loss": 0.8239, "step": 69315 }, { "epoch": 0.8448198115851949, "grad_norm": 2.4316208362579346, "learning_rate": 8.168056446440025e-07, "loss": 0.7708, "step": 69320 }, { "epoch": 0.8448807478093427, "grad_norm": 1.909346342086792, "learning_rate": 8.16484926234766e-07, "loss": 0.8023, "step": 69325 }, { "epoch": 0.8449416840334906, "grad_norm": 2.1897943019866943, "learning_rate": 8.161642078255293e-07, "loss": 0.8319, "step": 69330 }, { "epoch": 0.8450026202576384, "grad_norm": 2.0310592651367188, "learning_rate": 8.158434894162925e-07, "loss": 0.8343, "step": 69335 }, { "epoch": 0.8450635564817862, "grad_norm": 1.8563199043273926, "learning_rate": 8.155227710070559e-07, "loss": 0.7603, "step": 69340 }, { "epoch": 0.8451244927059339, "grad_norm": 2.0393502712249756, "learning_rate": 8.152020525978192e-07, "loss": 0.7678, "step": 69345 }, { "epoch": 0.8451854289300818, "grad_norm": 1.9121809005737305, "learning_rate": 8.148813341885825e-07, "loss": 0.7536, "step": 69350 }, { "epoch": 0.8452463651542296, "grad_norm": 2.032318353652954, "learning_rate": 8.145606157793458e-07, "loss": 0.7657, "step": 69355 }, { "epoch": 0.8453073013783774, "grad_norm": 2.3978958129882812, "learning_rate": 8.142398973701091e-07, "loss": 0.8704, "step": 69360 }, { "epoch": 0.8453682376025252, "grad_norm": 2.1035618782043457, "learning_rate": 8.139191789608724e-07, "loss": 0.8281, "step": 69365 }, { "epoch": 0.845429173826673, "grad_norm": 1.924102544784546, "learning_rate": 8.135984605516358e-07, "loss": 0.8211, "step": 69370 }, { "epoch": 0.8454901100508209, "grad_norm": 1.835288643836975, "learning_rate": 8.13277742142399e-07, "loss": 0.7957, "step": 69375 }, { "epoch": 0.8455510462749686, "grad_norm": 1.719652533531189, "learning_rate": 8.129570237331623e-07, "loss": 0.7804, "step": 69380 }, { "epoch": 0.8456119824991164, "grad_norm": 1.7257767915725708, "learning_rate": 8.126363053239257e-07, "loss": 0.754, "step": 69385 }, { "epoch": 0.8456729187232642, "grad_norm": 2.118978977203369, "learning_rate": 8.12315586914689e-07, "loss": 0.7061, "step": 69390 }, { "epoch": 0.845733854947412, "grad_norm": 1.9887139797210693, "learning_rate": 8.119948685054522e-07, "loss": 0.8228, "step": 69395 }, { "epoch": 0.8457947911715599, "grad_norm": 2.042569398880005, "learning_rate": 8.116741500962156e-07, "loss": 0.8098, "step": 69400 }, { "epoch": 0.8458557273957077, "grad_norm": 1.901060938835144, "learning_rate": 8.113534316869789e-07, "loss": 0.8808, "step": 69405 }, { "epoch": 0.8459166636198555, "grad_norm": 1.9251768589019775, "learning_rate": 8.110327132777422e-07, "loss": 0.7625, "step": 69410 }, { "epoch": 0.8459775998440032, "grad_norm": 1.9810353517532349, "learning_rate": 8.107119948685055e-07, "loss": 0.852, "step": 69415 }, { "epoch": 0.846038536068151, "grad_norm": 1.8312129974365234, "learning_rate": 8.103912764592688e-07, "loss": 0.8051, "step": 69420 }, { "epoch": 0.8460994722922989, "grad_norm": 2.111377477645874, "learning_rate": 8.100705580500321e-07, "loss": 0.8741, "step": 69425 }, { "epoch": 0.8461604085164467, "grad_norm": 1.8306077718734741, "learning_rate": 8.097498396407955e-07, "loss": 0.8209, "step": 69430 }, { "epoch": 0.8462213447405945, "grad_norm": 1.947537899017334, "learning_rate": 8.094291212315587e-07, "loss": 0.8381, "step": 69435 }, { "epoch": 0.8462822809647423, "grad_norm": 1.6876643896102905, "learning_rate": 8.09108402822322e-07, "loss": 0.7657, "step": 69440 }, { "epoch": 0.8463432171888902, "grad_norm": 1.9089014530181885, "learning_rate": 8.087876844130855e-07, "loss": 0.822, "step": 69445 }, { "epoch": 0.8464041534130379, "grad_norm": 2.049567222595215, "learning_rate": 8.084669660038487e-07, "loss": 0.908, "step": 69450 }, { "epoch": 0.8464650896371857, "grad_norm": 2.1858317852020264, "learning_rate": 8.081462475946119e-07, "loss": 0.8268, "step": 69455 }, { "epoch": 0.8465260258613335, "grad_norm": 1.934348464012146, "learning_rate": 8.078255291853753e-07, "loss": 0.8946, "step": 69460 }, { "epoch": 0.8465869620854813, "grad_norm": 2.074894428253174, "learning_rate": 8.075048107761387e-07, "loss": 0.8228, "step": 69465 }, { "epoch": 0.8466478983096292, "grad_norm": 2.4675238132476807, "learning_rate": 8.071840923669019e-07, "loss": 0.8151, "step": 69470 }, { "epoch": 0.846708834533777, "grad_norm": 2.1716926097869873, "learning_rate": 8.068633739576652e-07, "loss": 0.8446, "step": 69475 }, { "epoch": 0.8467697707579248, "grad_norm": 2.1192283630371094, "learning_rate": 8.065426555484285e-07, "loss": 0.8127, "step": 69480 }, { "epoch": 0.8468307069820725, "grad_norm": 2.006038188934326, "learning_rate": 8.062219371391919e-07, "loss": 0.7918, "step": 69485 }, { "epoch": 0.8468916432062203, "grad_norm": 1.9069818258285522, "learning_rate": 8.059012187299552e-07, "loss": 0.7964, "step": 69490 }, { "epoch": 0.8469525794303682, "grad_norm": 1.6528689861297607, "learning_rate": 8.055805003207184e-07, "loss": 0.7913, "step": 69495 }, { "epoch": 0.847013515654516, "grad_norm": 1.986197590827942, "learning_rate": 8.052597819114818e-07, "loss": 0.8246, "step": 69500 }, { "epoch": 0.8470744518786638, "grad_norm": 1.7591954469680786, "learning_rate": 8.049390635022452e-07, "loss": 0.8183, "step": 69505 }, { "epoch": 0.8471353881028116, "grad_norm": 1.9110623598098755, "learning_rate": 8.046183450930084e-07, "loss": 0.81, "step": 69510 }, { "epoch": 0.8471963243269593, "grad_norm": 1.8263615369796753, "learning_rate": 8.042976266837717e-07, "loss": 0.8459, "step": 69515 }, { "epoch": 0.8472572605511072, "grad_norm": 1.9181981086730957, "learning_rate": 8.039769082745351e-07, "loss": 0.8368, "step": 69520 }, { "epoch": 0.847318196775255, "grad_norm": 2.1427860260009766, "learning_rate": 8.036561898652984e-07, "loss": 0.8126, "step": 69525 }, { "epoch": 0.8473791329994028, "grad_norm": 2.3412723541259766, "learning_rate": 8.033354714560616e-07, "loss": 0.8018, "step": 69530 }, { "epoch": 0.8474400692235506, "grad_norm": 2.419118642807007, "learning_rate": 8.03014753046825e-07, "loss": 0.7972, "step": 69535 }, { "epoch": 0.8475010054476985, "grad_norm": 1.954839825630188, "learning_rate": 8.026940346375883e-07, "loss": 0.8256, "step": 69540 }, { "epoch": 0.8475619416718463, "grad_norm": 1.9611696004867554, "learning_rate": 8.023733162283516e-07, "loss": 0.7791, "step": 69545 }, { "epoch": 0.847622877895994, "grad_norm": 1.8900105953216553, "learning_rate": 8.020525978191149e-07, "loss": 0.8585, "step": 69550 }, { "epoch": 0.8476838141201418, "grad_norm": 2.2340033054351807, "learning_rate": 8.017318794098782e-07, "loss": 0.8487, "step": 69555 }, { "epoch": 0.8477447503442896, "grad_norm": 2.1279592514038086, "learning_rate": 8.014111610006415e-07, "loss": 0.8107, "step": 69560 }, { "epoch": 0.8478056865684375, "grad_norm": 2.1893844604492188, "learning_rate": 8.010904425914049e-07, "loss": 0.8118, "step": 69565 }, { "epoch": 0.8478666227925853, "grad_norm": 2.0984995365142822, "learning_rate": 8.007697241821681e-07, "loss": 0.9137, "step": 69570 }, { "epoch": 0.8479275590167331, "grad_norm": 2.0566413402557373, "learning_rate": 8.004490057729314e-07, "loss": 0.8319, "step": 69575 }, { "epoch": 0.8479884952408809, "grad_norm": 1.8954917192459106, "learning_rate": 8.001282873636948e-07, "loss": 0.821, "step": 69580 }, { "epoch": 0.8480494314650286, "grad_norm": 1.8963898420333862, "learning_rate": 7.998075689544581e-07, "loss": 0.7404, "step": 69585 }, { "epoch": 0.8481103676891765, "grad_norm": 2.0709023475646973, "learning_rate": 7.994868505452213e-07, "loss": 0.8122, "step": 69590 }, { "epoch": 0.8481713039133243, "grad_norm": 1.8890366554260254, "learning_rate": 7.991661321359847e-07, "loss": 0.8186, "step": 69595 }, { "epoch": 0.8482322401374721, "grad_norm": 2.449227809906006, "learning_rate": 7.98845413726748e-07, "loss": 0.7388, "step": 69600 }, { "epoch": 0.8482931763616199, "grad_norm": 1.8919093608856201, "learning_rate": 7.985246953175113e-07, "loss": 0.7699, "step": 69605 }, { "epoch": 0.8483541125857678, "grad_norm": 2.168527126312256, "learning_rate": 7.982039769082746e-07, "loss": 0.7791, "step": 69610 }, { "epoch": 0.8484150488099156, "grad_norm": 1.7817996740341187, "learning_rate": 7.978832584990379e-07, "loss": 0.8035, "step": 69615 }, { "epoch": 0.8484759850340633, "grad_norm": 1.8982614278793335, "learning_rate": 7.975625400898013e-07, "loss": 0.8586, "step": 69620 }, { "epoch": 0.8485369212582111, "grad_norm": 2.022592544555664, "learning_rate": 7.972418216805646e-07, "loss": 0.7889, "step": 69625 }, { "epoch": 0.8485978574823589, "grad_norm": 1.6128249168395996, "learning_rate": 7.969211032713278e-07, "loss": 0.7222, "step": 69630 }, { "epoch": 0.8486587937065068, "grad_norm": 2.1066646575927734, "learning_rate": 7.966003848620911e-07, "loss": 0.7609, "step": 69635 }, { "epoch": 0.8487197299306546, "grad_norm": 1.8598424196243286, "learning_rate": 7.962796664528546e-07, "loss": 0.8105, "step": 69640 }, { "epoch": 0.8487806661548024, "grad_norm": 2.2203423976898193, "learning_rate": 7.959589480436178e-07, "loss": 0.8472, "step": 69645 }, { "epoch": 0.8488416023789502, "grad_norm": 2.375110387802124, "learning_rate": 7.95638229634381e-07, "loss": 0.778, "step": 69650 }, { "epoch": 0.848902538603098, "grad_norm": 1.7741248607635498, "learning_rate": 7.953175112251443e-07, "loss": 0.8107, "step": 69655 }, { "epoch": 0.8489634748272458, "grad_norm": 1.8235975503921509, "learning_rate": 7.949967928159078e-07, "loss": 0.7444, "step": 69660 }, { "epoch": 0.8490244110513936, "grad_norm": 1.758717656135559, "learning_rate": 7.94676074406671e-07, "loss": 0.7996, "step": 69665 }, { "epoch": 0.8490853472755414, "grad_norm": 1.931779146194458, "learning_rate": 7.943553559974342e-07, "loss": 0.8051, "step": 69670 }, { "epoch": 0.8491462834996892, "grad_norm": 1.8551151752471924, "learning_rate": 7.940346375881976e-07, "loss": 0.84, "step": 69675 }, { "epoch": 0.8492072197238371, "grad_norm": 1.9983880519866943, "learning_rate": 7.93713919178961e-07, "loss": 0.7981, "step": 69680 }, { "epoch": 0.8492681559479849, "grad_norm": 2.241028070449829, "learning_rate": 7.933932007697243e-07, "loss": 0.7375, "step": 69685 }, { "epoch": 0.8493290921721326, "grad_norm": 1.991152048110962, "learning_rate": 7.930724823604876e-07, "loss": 0.7967, "step": 69690 }, { "epoch": 0.8493900283962804, "grad_norm": 1.9374443292617798, "learning_rate": 7.927517639512508e-07, "loss": 0.7638, "step": 69695 }, { "epoch": 0.8494509646204282, "grad_norm": 2.1424076557159424, "learning_rate": 7.924310455420143e-07, "loss": 0.8708, "step": 69700 }, { "epoch": 0.8495119008445761, "grad_norm": 2.0315518379211426, "learning_rate": 7.921103271327775e-07, "loss": 0.8285, "step": 69705 }, { "epoch": 0.8495728370687239, "grad_norm": 1.7986501455307007, "learning_rate": 7.917896087235408e-07, "loss": 0.839, "step": 69710 }, { "epoch": 0.8496337732928717, "grad_norm": 1.8551294803619385, "learning_rate": 7.91468890314304e-07, "loss": 0.8575, "step": 69715 }, { "epoch": 0.8496947095170195, "grad_norm": 2.0185253620147705, "learning_rate": 7.911481719050675e-07, "loss": 0.7935, "step": 69720 }, { "epoch": 0.8497556457411672, "grad_norm": 1.9201364517211914, "learning_rate": 7.908274534958307e-07, "loss": 0.7462, "step": 69725 }, { "epoch": 0.8498165819653151, "grad_norm": 2.0031299591064453, "learning_rate": 7.90506735086594e-07, "loss": 0.6994, "step": 69730 }, { "epoch": 0.8498775181894629, "grad_norm": 1.983571171760559, "learning_rate": 7.901860166773573e-07, "loss": 0.8425, "step": 69735 }, { "epoch": 0.8499384544136107, "grad_norm": 1.772541880607605, "learning_rate": 7.898652982681207e-07, "loss": 0.7774, "step": 69740 }, { "epoch": 0.8499993906377585, "grad_norm": 1.6552804708480835, "learning_rate": 7.895445798588839e-07, "loss": 0.799, "step": 69745 }, { "epoch": 0.8500603268619064, "grad_norm": 2.1899073123931885, "learning_rate": 7.892238614496473e-07, "loss": 0.8072, "step": 69750 }, { "epoch": 0.8501212630860542, "grad_norm": 1.7943882942199707, "learning_rate": 7.889031430404105e-07, "loss": 0.9192, "step": 69755 }, { "epoch": 0.8501821993102019, "grad_norm": 1.8920048475265503, "learning_rate": 7.88582424631174e-07, "loss": 0.8369, "step": 69760 }, { "epoch": 0.8502431355343497, "grad_norm": 1.7388149499893188, "learning_rate": 7.882617062219372e-07, "loss": 0.7833, "step": 69765 }, { "epoch": 0.8503040717584975, "grad_norm": 2.058772087097168, "learning_rate": 7.879409878127005e-07, "loss": 0.7967, "step": 69770 }, { "epoch": 0.8503650079826454, "grad_norm": 2.012078046798706, "learning_rate": 7.876202694034637e-07, "loss": 0.7721, "step": 69775 }, { "epoch": 0.8504259442067932, "grad_norm": 2.2825238704681396, "learning_rate": 7.872995509942272e-07, "loss": 0.7757, "step": 69780 }, { "epoch": 0.850486880430941, "grad_norm": 2.3001708984375, "learning_rate": 7.869788325849904e-07, "loss": 0.7664, "step": 69785 }, { "epoch": 0.8505478166550888, "grad_norm": 2.305103302001953, "learning_rate": 7.866581141757537e-07, "loss": 0.7738, "step": 69790 }, { "epoch": 0.8506087528792365, "grad_norm": 1.7038414478302002, "learning_rate": 7.863373957665171e-07, "loss": 0.776, "step": 69795 }, { "epoch": 0.8506696891033844, "grad_norm": 2.0324437618255615, "learning_rate": 7.860166773572804e-07, "loss": 0.7493, "step": 69800 }, { "epoch": 0.8507306253275322, "grad_norm": 2.3743736743927, "learning_rate": 7.856959589480436e-07, "loss": 0.7667, "step": 69805 }, { "epoch": 0.85079156155168, "grad_norm": 1.9418957233428955, "learning_rate": 7.85375240538807e-07, "loss": 0.8557, "step": 69810 }, { "epoch": 0.8508524977758278, "grad_norm": 1.6973756551742554, "learning_rate": 7.850545221295704e-07, "loss": 0.7746, "step": 69815 }, { "epoch": 0.8509134339999757, "grad_norm": 1.947564721107483, "learning_rate": 7.847338037203336e-07, "loss": 0.8222, "step": 69820 }, { "epoch": 0.8509743702241235, "grad_norm": 2.0948853492736816, "learning_rate": 7.844130853110969e-07, "loss": 0.7855, "step": 69825 }, { "epoch": 0.8510353064482712, "grad_norm": 1.6552547216415405, "learning_rate": 7.840923669018602e-07, "loss": 0.735, "step": 69830 }, { "epoch": 0.851096242672419, "grad_norm": 1.7972049713134766, "learning_rate": 7.837716484926236e-07, "loss": 0.874, "step": 69835 }, { "epoch": 0.8511571788965668, "grad_norm": 2.131777048110962, "learning_rate": 7.834509300833869e-07, "loss": 0.7938, "step": 69840 }, { "epoch": 0.8512181151207147, "grad_norm": 2.175153970718384, "learning_rate": 7.831302116741501e-07, "loss": 0.791, "step": 69845 }, { "epoch": 0.8512790513448625, "grad_norm": 1.8988133668899536, "learning_rate": 7.828094932649134e-07, "loss": 0.7922, "step": 69850 }, { "epoch": 0.8513399875690103, "grad_norm": 2.199519395828247, "learning_rate": 7.824887748556769e-07, "loss": 0.849, "step": 69855 }, { "epoch": 0.8514009237931581, "grad_norm": 1.934519648551941, "learning_rate": 7.821680564464401e-07, "loss": 0.7982, "step": 69860 }, { "epoch": 0.8514618600173058, "grad_norm": 1.8784462213516235, "learning_rate": 7.818473380372034e-07, "loss": 0.8111, "step": 69865 }, { "epoch": 0.8515227962414537, "grad_norm": 1.8818674087524414, "learning_rate": 7.815266196279667e-07, "loss": 0.8153, "step": 69870 }, { "epoch": 0.8515837324656015, "grad_norm": 2.0594639778137207, "learning_rate": 7.812059012187301e-07, "loss": 0.7808, "step": 69875 }, { "epoch": 0.8516446686897493, "grad_norm": 2.0710999965667725, "learning_rate": 7.808851828094933e-07, "loss": 0.7833, "step": 69880 }, { "epoch": 0.8517056049138971, "grad_norm": 1.828352451324463, "learning_rate": 7.805644644002567e-07, "loss": 0.8296, "step": 69885 }, { "epoch": 0.851766541138045, "grad_norm": 1.9228174686431885, "learning_rate": 7.802437459910199e-07, "loss": 0.7983, "step": 69890 }, { "epoch": 0.8518274773621928, "grad_norm": 2.069021463394165, "learning_rate": 7.799230275817833e-07, "loss": 0.9161, "step": 69895 }, { "epoch": 0.8518884135863405, "grad_norm": 2.0541751384735107, "learning_rate": 7.796023091725466e-07, "loss": 0.7748, "step": 69900 }, { "epoch": 0.8519493498104883, "grad_norm": 2.248021125793457, "learning_rate": 7.792815907633099e-07, "loss": 0.8135, "step": 69905 }, { "epoch": 0.8520102860346361, "grad_norm": 2.1919543743133545, "learning_rate": 7.789608723540731e-07, "loss": 0.8317, "step": 69910 }, { "epoch": 0.852071222258784, "grad_norm": 1.8711342811584473, "learning_rate": 7.786401539448366e-07, "loss": 0.8027, "step": 69915 }, { "epoch": 0.8521321584829318, "grad_norm": 2.1810648441314697, "learning_rate": 7.783194355355998e-07, "loss": 0.7878, "step": 69920 }, { "epoch": 0.8521930947070796, "grad_norm": 1.8034454584121704, "learning_rate": 7.779987171263631e-07, "loss": 0.8058, "step": 69925 }, { "epoch": 0.8522540309312274, "grad_norm": 2.7440011501312256, "learning_rate": 7.776779987171264e-07, "loss": 0.7845, "step": 69930 }, { "epoch": 0.8523149671553751, "grad_norm": 1.8788137435913086, "learning_rate": 7.773572803078898e-07, "loss": 0.7713, "step": 69935 }, { "epoch": 0.852375903379523, "grad_norm": 1.8142414093017578, "learning_rate": 7.77036561898653e-07, "loss": 0.8177, "step": 69940 }, { "epoch": 0.8524368396036708, "grad_norm": 1.8090155124664307, "learning_rate": 7.767158434894164e-07, "loss": 0.7598, "step": 69945 }, { "epoch": 0.8524977758278186, "grad_norm": 1.9371087551116943, "learning_rate": 7.763951250801796e-07, "loss": 0.8419, "step": 69950 }, { "epoch": 0.8525587120519664, "grad_norm": 1.8749377727508545, "learning_rate": 7.76074406670943e-07, "loss": 0.7498, "step": 69955 }, { "epoch": 0.8526196482761143, "grad_norm": 2.365892171859741, "learning_rate": 7.757536882617063e-07, "loss": 0.8198, "step": 69960 }, { "epoch": 0.8526805845002621, "grad_norm": 1.9015917778015137, "learning_rate": 7.754329698524696e-07, "loss": 0.8378, "step": 69965 }, { "epoch": 0.8527415207244098, "grad_norm": 1.7315889596939087, "learning_rate": 7.751122514432328e-07, "loss": 0.7979, "step": 69970 }, { "epoch": 0.8528024569485576, "grad_norm": 1.7419490814208984, "learning_rate": 7.747915330339963e-07, "loss": 0.8266, "step": 69975 }, { "epoch": 0.8528633931727054, "grad_norm": 1.880324125289917, "learning_rate": 7.744708146247595e-07, "loss": 0.7954, "step": 69980 }, { "epoch": 0.8529243293968533, "grad_norm": 1.8068759441375732, "learning_rate": 7.741500962155228e-07, "loss": 0.8017, "step": 69985 }, { "epoch": 0.8529852656210011, "grad_norm": 2.1587138175964355, "learning_rate": 7.738293778062861e-07, "loss": 0.8677, "step": 69990 }, { "epoch": 0.8530462018451489, "grad_norm": 1.78635835647583, "learning_rate": 7.735086593970495e-07, "loss": 0.8193, "step": 69995 }, { "epoch": 0.8531071380692967, "grad_norm": 2.2555859088897705, "learning_rate": 7.731879409878127e-07, "loss": 0.8547, "step": 70000 }, { "epoch": 0.8531680742934444, "grad_norm": 1.7514127492904663, "learning_rate": 7.728672225785761e-07, "loss": 0.7826, "step": 70005 }, { "epoch": 0.8532290105175923, "grad_norm": 1.9536329507827759, "learning_rate": 7.725465041693393e-07, "loss": 0.8233, "step": 70010 }, { "epoch": 0.8532899467417401, "grad_norm": 1.8781871795654297, "learning_rate": 7.722257857601027e-07, "loss": 0.8232, "step": 70015 }, { "epoch": 0.8533508829658879, "grad_norm": 1.7409735918045044, "learning_rate": 7.71905067350866e-07, "loss": 0.7966, "step": 70020 }, { "epoch": 0.8534118191900357, "grad_norm": 1.9398571252822876, "learning_rate": 7.715843489416293e-07, "loss": 0.8034, "step": 70025 }, { "epoch": 0.8534727554141835, "grad_norm": 1.7303128242492676, "learning_rate": 7.712636305323925e-07, "loss": 0.8704, "step": 70030 }, { "epoch": 0.8535336916383314, "grad_norm": 1.7724205255508423, "learning_rate": 7.70942912123156e-07, "loss": 0.7542, "step": 70035 }, { "epoch": 0.8535946278624791, "grad_norm": 1.8722608089447021, "learning_rate": 7.706221937139193e-07, "loss": 0.8406, "step": 70040 }, { "epoch": 0.8536555640866269, "grad_norm": 1.721725583076477, "learning_rate": 7.703014753046825e-07, "loss": 0.7449, "step": 70045 }, { "epoch": 0.8537165003107747, "grad_norm": 2.1362147331237793, "learning_rate": 7.699807568954458e-07, "loss": 0.8193, "step": 70050 }, { "epoch": 0.8537774365349226, "grad_norm": 1.9184132814407349, "learning_rate": 7.696600384862092e-07, "loss": 0.7648, "step": 70055 }, { "epoch": 0.8538383727590704, "grad_norm": 1.8149243593215942, "learning_rate": 7.693393200769725e-07, "loss": 0.7915, "step": 70060 }, { "epoch": 0.8538993089832182, "grad_norm": 2.051036834716797, "learning_rate": 7.690186016677358e-07, "loss": 0.7444, "step": 70065 }, { "epoch": 0.853960245207366, "grad_norm": 1.9814581871032715, "learning_rate": 7.68697883258499e-07, "loss": 0.7936, "step": 70070 }, { "epoch": 0.8540211814315137, "grad_norm": 2.036993980407715, "learning_rate": 7.683771648492624e-07, "loss": 0.7749, "step": 70075 }, { "epoch": 0.8540821176556616, "grad_norm": 1.8335514068603516, "learning_rate": 7.680564464400258e-07, "loss": 0.7852, "step": 70080 }, { "epoch": 0.8541430538798094, "grad_norm": 2.251288414001465, "learning_rate": 7.67735728030789e-07, "loss": 0.8148, "step": 70085 }, { "epoch": 0.8542039901039572, "grad_norm": 2.155392646789551, "learning_rate": 7.674150096215524e-07, "loss": 0.8244, "step": 70090 }, { "epoch": 0.854264926328105, "grad_norm": 1.9905542135238647, "learning_rate": 7.670942912123157e-07, "loss": 0.8013, "step": 70095 }, { "epoch": 0.8543258625522528, "grad_norm": 1.7872328758239746, "learning_rate": 7.66773572803079e-07, "loss": 0.7821, "step": 70100 }, { "epoch": 0.8543867987764007, "grad_norm": 2.06906795501709, "learning_rate": 7.664528543938422e-07, "loss": 0.8164, "step": 70105 }, { "epoch": 0.8544477350005484, "grad_norm": 2.3028512001037598, "learning_rate": 7.661321359846057e-07, "loss": 0.8273, "step": 70110 }, { "epoch": 0.8545086712246962, "grad_norm": 1.9959979057312012, "learning_rate": 7.658114175753689e-07, "loss": 0.8023, "step": 70115 }, { "epoch": 0.854569607448844, "grad_norm": 2.106166362762451, "learning_rate": 7.654906991661322e-07, "loss": 0.7604, "step": 70120 }, { "epoch": 0.8546305436729918, "grad_norm": 1.8154913187026978, "learning_rate": 7.651699807568955e-07, "loss": 0.7734, "step": 70125 }, { "epoch": 0.8546914798971397, "grad_norm": 1.8056126832962036, "learning_rate": 7.648492623476589e-07, "loss": 0.8345, "step": 70130 }, { "epoch": 0.8547524161212875, "grad_norm": 1.923529863357544, "learning_rate": 7.645285439384221e-07, "loss": 0.803, "step": 70135 }, { "epoch": 0.8548133523454353, "grad_norm": 1.7499439716339111, "learning_rate": 7.642078255291855e-07, "loss": 0.7942, "step": 70140 }, { "epoch": 0.854874288569583, "grad_norm": 1.8952492475509644, "learning_rate": 7.638871071199487e-07, "loss": 0.8253, "step": 70145 }, { "epoch": 0.8549352247937309, "grad_norm": 1.7896747589111328, "learning_rate": 7.635663887107121e-07, "loss": 0.7282, "step": 70150 }, { "epoch": 0.8549961610178787, "grad_norm": 1.845438003540039, "learning_rate": 7.632456703014754e-07, "loss": 0.8005, "step": 70155 }, { "epoch": 0.8550570972420265, "grad_norm": 2.125948667526245, "learning_rate": 7.629249518922387e-07, "loss": 0.8065, "step": 70160 }, { "epoch": 0.8551180334661743, "grad_norm": 2.007791042327881, "learning_rate": 7.626042334830019e-07, "loss": 0.818, "step": 70165 }, { "epoch": 0.8551789696903221, "grad_norm": 1.8626958131790161, "learning_rate": 7.622835150737654e-07, "loss": 0.8265, "step": 70170 }, { "epoch": 0.85523990591447, "grad_norm": 1.7257696390151978, "learning_rate": 7.619627966645286e-07, "loss": 0.7814, "step": 70175 }, { "epoch": 0.8553008421386177, "grad_norm": 2.0032379627227783, "learning_rate": 7.616420782552919e-07, "loss": 0.7949, "step": 70180 }, { "epoch": 0.8553617783627655, "grad_norm": 1.9392915964126587, "learning_rate": 7.613213598460552e-07, "loss": 0.7983, "step": 70185 }, { "epoch": 0.8554227145869133, "grad_norm": 2.905329942703247, "learning_rate": 7.610006414368186e-07, "loss": 0.8464, "step": 70190 }, { "epoch": 0.8554836508110611, "grad_norm": 1.9170506000518799, "learning_rate": 7.606799230275818e-07, "loss": 0.8539, "step": 70195 }, { "epoch": 0.855544587035209, "grad_norm": 2.0178489685058594, "learning_rate": 7.603592046183452e-07, "loss": 0.7931, "step": 70200 }, { "epoch": 0.8556055232593568, "grad_norm": 1.9454350471496582, "learning_rate": 7.600384862091084e-07, "loss": 0.8278, "step": 70205 }, { "epoch": 0.8556664594835046, "grad_norm": 1.9843385219573975, "learning_rate": 7.597177677998718e-07, "loss": 0.824, "step": 70210 }, { "epoch": 0.8557273957076523, "grad_norm": 1.7231130599975586, "learning_rate": 7.593970493906352e-07, "loss": 0.7642, "step": 70215 }, { "epoch": 0.8557883319318002, "grad_norm": 1.7822296619415283, "learning_rate": 7.590763309813984e-07, "loss": 0.7168, "step": 70220 }, { "epoch": 0.855849268155948, "grad_norm": 1.75599205493927, "learning_rate": 7.587556125721616e-07, "loss": 0.8327, "step": 70225 }, { "epoch": 0.8559102043800958, "grad_norm": 2.0341780185699463, "learning_rate": 7.584348941629251e-07, "loss": 0.8333, "step": 70230 }, { "epoch": 0.8559711406042436, "grad_norm": 1.9745182991027832, "learning_rate": 7.581141757536884e-07, "loss": 0.8803, "step": 70235 }, { "epoch": 0.8560320768283914, "grad_norm": 1.8306574821472168, "learning_rate": 7.577934573444516e-07, "loss": 0.829, "step": 70240 }, { "epoch": 0.8560930130525393, "grad_norm": 2.0654380321502686, "learning_rate": 7.574727389352148e-07, "loss": 0.8336, "step": 70245 }, { "epoch": 0.856153949276687, "grad_norm": 1.8591276407241821, "learning_rate": 7.571520205259783e-07, "loss": 0.8616, "step": 70250 }, { "epoch": 0.8562148855008348, "grad_norm": 2.015254020690918, "learning_rate": 7.568313021167416e-07, "loss": 0.7785, "step": 70255 }, { "epoch": 0.8562758217249826, "grad_norm": 1.8623970746994019, "learning_rate": 7.565105837075048e-07, "loss": 0.8252, "step": 70260 }, { "epoch": 0.8563367579491304, "grad_norm": 2.184537649154663, "learning_rate": 7.561898652982681e-07, "loss": 0.8676, "step": 70265 }, { "epoch": 0.8563976941732783, "grad_norm": 2.055863618850708, "learning_rate": 7.558691468890315e-07, "loss": 0.774, "step": 70270 }, { "epoch": 0.8564586303974261, "grad_norm": 1.8678593635559082, "learning_rate": 7.555484284797949e-07, "loss": 0.7664, "step": 70275 }, { "epoch": 0.8565195666215739, "grad_norm": 2.0065736770629883, "learning_rate": 7.552277100705581e-07, "loss": 0.8728, "step": 70280 }, { "epoch": 0.8565805028457216, "grad_norm": 2.067702054977417, "learning_rate": 7.549069916613214e-07, "loss": 0.7838, "step": 70285 }, { "epoch": 0.8566414390698694, "grad_norm": 2.2178173065185547, "learning_rate": 7.545862732520847e-07, "loss": 0.7568, "step": 70290 }, { "epoch": 0.8567023752940173, "grad_norm": 2.0016322135925293, "learning_rate": 7.542655548428481e-07, "loss": 0.882, "step": 70295 }, { "epoch": 0.8567633115181651, "grad_norm": 1.887978434562683, "learning_rate": 7.539448364336113e-07, "loss": 0.8359, "step": 70300 }, { "epoch": 0.8568242477423129, "grad_norm": 1.893708348274231, "learning_rate": 7.536241180243746e-07, "loss": 0.8496, "step": 70305 }, { "epoch": 0.8568851839664607, "grad_norm": 2.8332290649414062, "learning_rate": 7.53303399615138e-07, "loss": 0.7413, "step": 70310 }, { "epoch": 0.8569461201906086, "grad_norm": 1.8331068754196167, "learning_rate": 7.529826812059013e-07, "loss": 0.7935, "step": 70315 }, { "epoch": 0.8570070564147563, "grad_norm": 1.960471749305725, "learning_rate": 7.526619627966645e-07, "loss": 0.8155, "step": 70320 }, { "epoch": 0.8570679926389041, "grad_norm": 2.8950107097625732, "learning_rate": 7.523412443874279e-07, "loss": 0.8412, "step": 70325 }, { "epoch": 0.8571289288630519, "grad_norm": 1.9445797204971313, "learning_rate": 7.520205259781912e-07, "loss": 0.8374, "step": 70330 }, { "epoch": 0.8571898650871997, "grad_norm": 2.5554018020629883, "learning_rate": 7.516998075689545e-07, "loss": 0.7918, "step": 70335 }, { "epoch": 0.8572508013113476, "grad_norm": 1.675188660621643, "learning_rate": 7.513790891597178e-07, "loss": 0.815, "step": 70340 }, { "epoch": 0.8573117375354954, "grad_norm": 1.841907024383545, "learning_rate": 7.510583707504811e-07, "loss": 0.7722, "step": 70345 }, { "epoch": 0.8573726737596432, "grad_norm": 2.339024782180786, "learning_rate": 7.507376523412444e-07, "loss": 0.8299, "step": 70350 }, { "epoch": 0.8574336099837909, "grad_norm": 1.977470874786377, "learning_rate": 7.504169339320078e-07, "loss": 0.8335, "step": 70355 }, { "epoch": 0.8574945462079387, "grad_norm": 1.7687740325927734, "learning_rate": 7.50096215522771e-07, "loss": 0.8521, "step": 70360 }, { "epoch": 0.8575554824320866, "grad_norm": 2.258471727371216, "learning_rate": 7.497754971135343e-07, "loss": 0.8201, "step": 70365 }, { "epoch": 0.8576164186562344, "grad_norm": 2.3523788452148438, "learning_rate": 7.494547787042977e-07, "loss": 0.8291, "step": 70370 }, { "epoch": 0.8576773548803822, "grad_norm": 1.8386274576187134, "learning_rate": 7.49134060295061e-07, "loss": 0.7756, "step": 70375 }, { "epoch": 0.85773829110453, "grad_norm": 2.2354860305786133, "learning_rate": 7.488133418858242e-07, "loss": 0.8832, "step": 70380 }, { "epoch": 0.8577992273286779, "grad_norm": 1.8965932130813599, "learning_rate": 7.484926234765876e-07, "loss": 0.814, "step": 70385 }, { "epoch": 0.8578601635528256, "grad_norm": 2.142474412918091, "learning_rate": 7.48171905067351e-07, "loss": 0.8556, "step": 70390 }, { "epoch": 0.8579210997769734, "grad_norm": 1.9966374635696411, "learning_rate": 7.478511866581142e-07, "loss": 0.8763, "step": 70395 }, { "epoch": 0.8579820360011212, "grad_norm": 2.2827770709991455, "learning_rate": 7.475304682488775e-07, "loss": 0.8765, "step": 70400 }, { "epoch": 0.858042972225269, "grad_norm": 1.9305723905563354, "learning_rate": 7.472097498396409e-07, "loss": 0.8446, "step": 70405 }, { "epoch": 0.8581039084494169, "grad_norm": 2.026684045791626, "learning_rate": 7.468890314304042e-07, "loss": 0.8165, "step": 70410 }, { "epoch": 0.8581648446735647, "grad_norm": 2.156450033187866, "learning_rate": 7.465683130211675e-07, "loss": 0.8839, "step": 70415 }, { "epoch": 0.8582257808977125, "grad_norm": 1.825992465019226, "learning_rate": 7.462475946119307e-07, "loss": 0.841, "step": 70420 }, { "epoch": 0.8582867171218602, "grad_norm": 2.073166608810425, "learning_rate": 7.459268762026941e-07, "loss": 0.7715, "step": 70425 }, { "epoch": 0.858347653346008, "grad_norm": 2.137465000152588, "learning_rate": 7.456061577934575e-07, "loss": 0.8319, "step": 70430 }, { "epoch": 0.8584085895701559, "grad_norm": 1.7993086576461792, "learning_rate": 7.452854393842207e-07, "loss": 0.8577, "step": 70435 }, { "epoch": 0.8584695257943037, "grad_norm": 2.782748222351074, "learning_rate": 7.449647209749839e-07, "loss": 0.8442, "step": 70440 }, { "epoch": 0.8585304620184515, "grad_norm": 2.2774925231933594, "learning_rate": 7.446440025657474e-07, "loss": 0.7642, "step": 70445 }, { "epoch": 0.8585913982425993, "grad_norm": 2.4926278591156006, "learning_rate": 7.443232841565107e-07, "loss": 0.9231, "step": 70450 }, { "epoch": 0.858652334466747, "grad_norm": 1.647847294807434, "learning_rate": 7.440025657472739e-07, "loss": 0.8015, "step": 70455 }, { "epoch": 0.8587132706908949, "grad_norm": 1.9978796243667603, "learning_rate": 7.436818473380373e-07, "loss": 0.8217, "step": 70460 }, { "epoch": 0.8587742069150427, "grad_norm": 1.719810128211975, "learning_rate": 7.433611289288006e-07, "loss": 0.7453, "step": 70465 }, { "epoch": 0.8588351431391905, "grad_norm": 2.0550878047943115, "learning_rate": 7.430404105195639e-07, "loss": 0.8652, "step": 70470 }, { "epoch": 0.8588960793633383, "grad_norm": 1.7913308143615723, "learning_rate": 7.427196921103272e-07, "loss": 0.8115, "step": 70475 }, { "epoch": 0.8589570155874862, "grad_norm": 2.1116318702697754, "learning_rate": 7.423989737010905e-07, "loss": 0.8957, "step": 70480 }, { "epoch": 0.859017951811634, "grad_norm": 1.6326655149459839, "learning_rate": 7.420782552918538e-07, "loss": 0.8006, "step": 70485 }, { "epoch": 0.8590788880357817, "grad_norm": 2.9737114906311035, "learning_rate": 7.417575368826172e-07, "loss": 0.7599, "step": 70490 }, { "epoch": 0.8591398242599295, "grad_norm": 1.9756377935409546, "learning_rate": 7.414368184733804e-07, "loss": 0.7752, "step": 70495 }, { "epoch": 0.8592007604840773, "grad_norm": 1.8927034139633179, "learning_rate": 7.411161000641437e-07, "loss": 0.8633, "step": 70500 }, { "epoch": 0.8592616967082252, "grad_norm": 1.8604165315628052, "learning_rate": 7.407953816549071e-07, "loss": 0.8229, "step": 70505 }, { "epoch": 0.859322632932373, "grad_norm": 2.1760778427124023, "learning_rate": 7.404746632456704e-07, "loss": 0.7928, "step": 70510 }, { "epoch": 0.8593835691565208, "grad_norm": 2.1582181453704834, "learning_rate": 7.401539448364336e-07, "loss": 0.7885, "step": 70515 }, { "epoch": 0.8594445053806686, "grad_norm": 1.7417179346084595, "learning_rate": 7.39833226427197e-07, "loss": 0.7659, "step": 70520 }, { "epoch": 0.8595054416048163, "grad_norm": 1.8251373767852783, "learning_rate": 7.395125080179603e-07, "loss": 0.8627, "step": 70525 }, { "epoch": 0.8595663778289642, "grad_norm": 1.883222222328186, "learning_rate": 7.391917896087236e-07, "loss": 0.8025, "step": 70530 }, { "epoch": 0.859627314053112, "grad_norm": 2.0501556396484375, "learning_rate": 7.388710711994869e-07, "loss": 0.8176, "step": 70535 }, { "epoch": 0.8596882502772598, "grad_norm": 1.9473272562026978, "learning_rate": 7.385503527902502e-07, "loss": 0.8379, "step": 70540 }, { "epoch": 0.8597491865014076, "grad_norm": 1.971936821937561, "learning_rate": 7.382296343810135e-07, "loss": 0.844, "step": 70545 }, { "epoch": 0.8598101227255555, "grad_norm": 2.0750484466552734, "learning_rate": 7.379089159717769e-07, "loss": 0.8295, "step": 70550 }, { "epoch": 0.8598710589497033, "grad_norm": 2.1007163524627686, "learning_rate": 7.375881975625401e-07, "loss": 0.7902, "step": 70555 }, { "epoch": 0.859931995173851, "grad_norm": 1.9927856922149658, "learning_rate": 7.372674791533034e-07, "loss": 0.8476, "step": 70560 }, { "epoch": 0.8599929313979988, "grad_norm": 1.7947698831558228, "learning_rate": 7.369467607440669e-07, "loss": 0.8837, "step": 70565 }, { "epoch": 0.8600538676221466, "grad_norm": 2.083439826965332, "learning_rate": 7.366260423348301e-07, "loss": 0.7787, "step": 70570 }, { "epoch": 0.8601148038462945, "grad_norm": 2.0075478553771973, "learning_rate": 7.363053239255933e-07, "loss": 0.7713, "step": 70575 }, { "epoch": 0.8601757400704423, "grad_norm": 2.134335517883301, "learning_rate": 7.359846055163567e-07, "loss": 0.8366, "step": 70580 }, { "epoch": 0.8602366762945901, "grad_norm": 1.8147245645523071, "learning_rate": 7.356638871071201e-07, "loss": 0.8098, "step": 70585 }, { "epoch": 0.8602976125187379, "grad_norm": 1.8124215602874756, "learning_rate": 7.353431686978833e-07, "loss": 0.8709, "step": 70590 }, { "epoch": 0.8603585487428856, "grad_norm": 2.0803091526031494, "learning_rate": 7.350224502886466e-07, "loss": 0.8841, "step": 70595 }, { "epoch": 0.8604194849670335, "grad_norm": 1.9623794555664062, "learning_rate": 7.347017318794099e-07, "loss": 0.7675, "step": 70600 }, { "epoch": 0.8604804211911813, "grad_norm": 1.9134892225265503, "learning_rate": 7.343810134701733e-07, "loss": 0.8412, "step": 70605 }, { "epoch": 0.8605413574153291, "grad_norm": 2.050053834915161, "learning_rate": 7.340602950609366e-07, "loss": 0.8423, "step": 70610 }, { "epoch": 0.8606022936394769, "grad_norm": 2.0965890884399414, "learning_rate": 7.337395766516998e-07, "loss": 0.7811, "step": 70615 }, { "epoch": 0.8606632298636248, "grad_norm": 1.9885892868041992, "learning_rate": 7.334188582424631e-07, "loss": 0.7523, "step": 70620 }, { "epoch": 0.8607241660877726, "grad_norm": 2.021366834640503, "learning_rate": 7.330981398332266e-07, "loss": 0.8047, "step": 70625 }, { "epoch": 0.8607851023119203, "grad_norm": 2.1236536502838135, "learning_rate": 7.327774214239898e-07, "loss": 0.8009, "step": 70630 }, { "epoch": 0.8608460385360681, "grad_norm": 2.0348877906799316, "learning_rate": 7.324567030147531e-07, "loss": 0.8207, "step": 70635 }, { "epoch": 0.8609069747602159, "grad_norm": 2.2269959449768066, "learning_rate": 7.321359846055164e-07, "loss": 0.858, "step": 70640 }, { "epoch": 0.8609679109843638, "grad_norm": 2.1158933639526367, "learning_rate": 7.318152661962798e-07, "loss": 0.8354, "step": 70645 }, { "epoch": 0.8610288472085116, "grad_norm": 2.2157154083251953, "learning_rate": 7.31494547787043e-07, "loss": 0.8633, "step": 70650 }, { "epoch": 0.8610897834326594, "grad_norm": 1.7693121433258057, "learning_rate": 7.311738293778064e-07, "loss": 0.8284, "step": 70655 }, { "epoch": 0.8611507196568072, "grad_norm": 1.979906678199768, "learning_rate": 7.308531109685696e-07, "loss": 0.8503, "step": 70660 }, { "epoch": 0.8612116558809549, "grad_norm": 2.342585325241089, "learning_rate": 7.30532392559333e-07, "loss": 0.8045, "step": 70665 }, { "epoch": 0.8612725921051028, "grad_norm": 1.9361263513565063, "learning_rate": 7.302116741500963e-07, "loss": 0.8279, "step": 70670 }, { "epoch": 0.8613335283292506, "grad_norm": 1.9232097864151, "learning_rate": 7.298909557408596e-07, "loss": 0.7797, "step": 70675 }, { "epoch": 0.8613944645533984, "grad_norm": 2.0376858711242676, "learning_rate": 7.295702373316228e-07, "loss": 0.8357, "step": 70680 }, { "epoch": 0.8614554007775462, "grad_norm": 1.7255159616470337, "learning_rate": 7.292495189223863e-07, "loss": 0.8109, "step": 70685 }, { "epoch": 0.861516337001694, "grad_norm": 1.678887963294983, "learning_rate": 7.289288005131495e-07, "loss": 0.795, "step": 70690 }, { "epoch": 0.8615772732258419, "grad_norm": 2.1395485401153564, "learning_rate": 7.286080821039128e-07, "loss": 0.8053, "step": 70695 }, { "epoch": 0.8616382094499896, "grad_norm": 1.947008490562439, "learning_rate": 7.282873636946762e-07, "loss": 0.8497, "step": 70700 }, { "epoch": 0.8616991456741374, "grad_norm": 1.8719862699508667, "learning_rate": 7.279666452854395e-07, "loss": 0.8919, "step": 70705 }, { "epoch": 0.8617600818982852, "grad_norm": 2.0154871940612793, "learning_rate": 7.276459268762027e-07, "loss": 0.7773, "step": 70710 }, { "epoch": 0.8618210181224331, "grad_norm": 1.6792612075805664, "learning_rate": 7.273252084669661e-07, "loss": 0.7918, "step": 70715 }, { "epoch": 0.8618819543465809, "grad_norm": 1.7382917404174805, "learning_rate": 7.270044900577294e-07, "loss": 0.7934, "step": 70720 }, { "epoch": 0.8619428905707287, "grad_norm": 1.9271470308303833, "learning_rate": 7.266837716484927e-07, "loss": 0.8232, "step": 70725 }, { "epoch": 0.8620038267948765, "grad_norm": 2.136753797531128, "learning_rate": 7.26363053239256e-07, "loss": 0.8151, "step": 70730 }, { "epoch": 0.8620647630190242, "grad_norm": 1.7621041536331177, "learning_rate": 7.260423348300193e-07, "loss": 0.8373, "step": 70735 }, { "epoch": 0.8621256992431721, "grad_norm": 1.6678005456924438, "learning_rate": 7.257216164207826e-07, "loss": 0.7529, "step": 70740 }, { "epoch": 0.8621866354673199, "grad_norm": 2.2416019439697266, "learning_rate": 7.25400898011546e-07, "loss": 0.834, "step": 70745 }, { "epoch": 0.8622475716914677, "grad_norm": 2.3851804733276367, "learning_rate": 7.250801796023092e-07, "loss": 0.7702, "step": 70750 }, { "epoch": 0.8623085079156155, "grad_norm": 1.9362566471099854, "learning_rate": 7.247594611930725e-07, "loss": 0.8256, "step": 70755 }, { "epoch": 0.8623694441397634, "grad_norm": 1.8336151838302612, "learning_rate": 7.24438742783836e-07, "loss": 0.7802, "step": 70760 }, { "epoch": 0.8624303803639112, "grad_norm": 1.980164647102356, "learning_rate": 7.241180243745992e-07, "loss": 0.8471, "step": 70765 }, { "epoch": 0.8624913165880589, "grad_norm": 2.1949071884155273, "learning_rate": 7.237973059653624e-07, "loss": 0.861, "step": 70770 }, { "epoch": 0.8625522528122067, "grad_norm": 2.036515712738037, "learning_rate": 7.234765875561258e-07, "loss": 0.7906, "step": 70775 }, { "epoch": 0.8626131890363545, "grad_norm": 3.0878970623016357, "learning_rate": 7.231558691468892e-07, "loss": 0.8908, "step": 70780 }, { "epoch": 0.8626741252605024, "grad_norm": 1.7188215255737305, "learning_rate": 7.228351507376524e-07, "loss": 0.8137, "step": 70785 }, { "epoch": 0.8627350614846502, "grad_norm": 2.409769296646118, "learning_rate": 7.225144323284157e-07, "loss": 0.7653, "step": 70790 }, { "epoch": 0.862795997708798, "grad_norm": 2.0299794673919678, "learning_rate": 7.22193713919179e-07, "loss": 0.7316, "step": 70795 }, { "epoch": 0.8628569339329458, "grad_norm": 1.9984924793243408, "learning_rate": 7.218729955099424e-07, "loss": 0.8067, "step": 70800 }, { "epoch": 0.8629178701570935, "grad_norm": 1.8584012985229492, "learning_rate": 7.215522771007057e-07, "loss": 0.7681, "step": 70805 }, { "epoch": 0.8629788063812414, "grad_norm": 1.8255048990249634, "learning_rate": 7.21231558691469e-07, "loss": 0.7698, "step": 70810 }, { "epoch": 0.8630397426053892, "grad_norm": 2.1382486820220947, "learning_rate": 7.209108402822322e-07, "loss": 0.7747, "step": 70815 }, { "epoch": 0.863100678829537, "grad_norm": 2.0186848640441895, "learning_rate": 7.205901218729957e-07, "loss": 0.7929, "step": 70820 }, { "epoch": 0.8631616150536848, "grad_norm": 2.057595729827881, "learning_rate": 7.202694034637589e-07, "loss": 0.8261, "step": 70825 }, { "epoch": 0.8632225512778327, "grad_norm": 2.047853469848633, "learning_rate": 7.199486850545222e-07, "loss": 0.7908, "step": 70830 }, { "epoch": 0.8632834875019805, "grad_norm": 1.9026051759719849, "learning_rate": 7.196279666452854e-07, "loss": 0.8282, "step": 70835 }, { "epoch": 0.8633444237261282, "grad_norm": 2.002095937728882, "learning_rate": 7.193072482360489e-07, "loss": 0.8418, "step": 70840 }, { "epoch": 0.863405359950276, "grad_norm": 2.461057424545288, "learning_rate": 7.189865298268121e-07, "loss": 0.8122, "step": 70845 }, { "epoch": 0.8634662961744238, "grad_norm": 2.506169557571411, "learning_rate": 7.186658114175755e-07, "loss": 0.859, "step": 70850 }, { "epoch": 0.8635272323985717, "grad_norm": 1.691151738166809, "learning_rate": 7.183450930083387e-07, "loss": 0.797, "step": 70855 }, { "epoch": 0.8635881686227195, "grad_norm": 1.5761475563049316, "learning_rate": 7.180243745991021e-07, "loss": 0.8166, "step": 70860 }, { "epoch": 0.8636491048468673, "grad_norm": 1.7671070098876953, "learning_rate": 7.177036561898653e-07, "loss": 0.7825, "step": 70865 }, { "epoch": 0.8637100410710151, "grad_norm": 3.252943515777588, "learning_rate": 7.173829377806287e-07, "loss": 0.8083, "step": 70870 }, { "epoch": 0.8637709772951628, "grad_norm": 2.171196460723877, "learning_rate": 7.170622193713919e-07, "loss": 0.7716, "step": 70875 }, { "epoch": 0.8638319135193107, "grad_norm": 2.135157823562622, "learning_rate": 7.167415009621554e-07, "loss": 0.7424, "step": 70880 }, { "epoch": 0.8638928497434585, "grad_norm": 1.7729116678237915, "learning_rate": 7.164207825529186e-07, "loss": 0.8108, "step": 70885 }, { "epoch": 0.8639537859676063, "grad_norm": 2.203242301940918, "learning_rate": 7.161000641436819e-07, "loss": 0.8205, "step": 70890 }, { "epoch": 0.8640147221917541, "grad_norm": 2.204925775527954, "learning_rate": 7.157793457344451e-07, "loss": 0.8206, "step": 70895 }, { "epoch": 0.864075658415902, "grad_norm": 1.9173109531402588, "learning_rate": 7.154586273252086e-07, "loss": 0.8324, "step": 70900 }, { "epoch": 0.8641365946400498, "grad_norm": 2.5534961223602295, "learning_rate": 7.151379089159718e-07, "loss": 0.7613, "step": 70905 }, { "epoch": 0.8641975308641975, "grad_norm": 2.5385117530822754, "learning_rate": 7.148171905067351e-07, "loss": 0.6861, "step": 70910 }, { "epoch": 0.8642584670883453, "grad_norm": 1.915265679359436, "learning_rate": 7.144964720974984e-07, "loss": 0.7543, "step": 70915 }, { "epoch": 0.8643194033124931, "grad_norm": 2.0218095779418945, "learning_rate": 7.141757536882618e-07, "loss": 0.8183, "step": 70920 }, { "epoch": 0.864380339536641, "grad_norm": 1.8752862215042114, "learning_rate": 7.13855035279025e-07, "loss": 0.8345, "step": 70925 }, { "epoch": 0.8644412757607888, "grad_norm": 2.0339725017547607, "learning_rate": 7.135343168697884e-07, "loss": 0.8329, "step": 70930 }, { "epoch": 0.8645022119849366, "grad_norm": 2.0299339294433594, "learning_rate": 7.132135984605516e-07, "loss": 0.7951, "step": 70935 }, { "epoch": 0.8645631482090844, "grad_norm": 2.2058794498443604, "learning_rate": 7.12892880051315e-07, "loss": 0.8473, "step": 70940 }, { "epoch": 0.8646240844332321, "grad_norm": 2.0376410484313965, "learning_rate": 7.125721616420783e-07, "loss": 0.7835, "step": 70945 }, { "epoch": 0.86468502065738, "grad_norm": 2.4962949752807617, "learning_rate": 7.122514432328416e-07, "loss": 0.8621, "step": 70950 }, { "epoch": 0.8647459568815278, "grad_norm": 1.9660497903823853, "learning_rate": 7.119307248236048e-07, "loss": 0.8525, "step": 70955 }, { "epoch": 0.8648068931056756, "grad_norm": 1.99818754196167, "learning_rate": 7.116100064143683e-07, "loss": 0.8381, "step": 70960 }, { "epoch": 0.8648678293298234, "grad_norm": 1.7647950649261475, "learning_rate": 7.112892880051315e-07, "loss": 0.8336, "step": 70965 }, { "epoch": 0.8649287655539712, "grad_norm": 1.9913568496704102, "learning_rate": 7.109685695958948e-07, "loss": 0.8278, "step": 70970 }, { "epoch": 0.8649897017781191, "grad_norm": 1.8352643251419067, "learning_rate": 7.106478511866581e-07, "loss": 0.876, "step": 70975 }, { "epoch": 0.8650506380022668, "grad_norm": 1.9783294200897217, "learning_rate": 7.103271327774215e-07, "loss": 0.8477, "step": 70980 }, { "epoch": 0.8651115742264146, "grad_norm": 2.4798367023468018, "learning_rate": 7.100064143681848e-07, "loss": 0.8027, "step": 70985 }, { "epoch": 0.8651725104505624, "grad_norm": 1.8513696193695068, "learning_rate": 7.096856959589481e-07, "loss": 0.8513, "step": 70990 }, { "epoch": 0.8652334466747102, "grad_norm": 2.2115256786346436, "learning_rate": 7.093649775497113e-07, "loss": 0.8428, "step": 70995 }, { "epoch": 0.8652943828988581, "grad_norm": 1.9556310176849365, "learning_rate": 7.090442591404747e-07, "loss": 0.8718, "step": 71000 }, { "epoch": 0.8653553191230059, "grad_norm": 2.2911911010742188, "learning_rate": 7.087235407312381e-07, "loss": 0.7695, "step": 71005 }, { "epoch": 0.8654162553471537, "grad_norm": 2.1401615142822266, "learning_rate": 7.084028223220013e-07, "loss": 0.7479, "step": 71010 }, { "epoch": 0.8654771915713014, "grad_norm": 2.1226437091827393, "learning_rate": 7.080821039127647e-07, "loss": 0.7433, "step": 71015 }, { "epoch": 0.8655381277954493, "grad_norm": 1.9997732639312744, "learning_rate": 7.07761385503528e-07, "loss": 0.8152, "step": 71020 }, { "epoch": 0.8655990640195971, "grad_norm": 1.7959610223770142, "learning_rate": 7.074406670942913e-07, "loss": 0.7608, "step": 71025 }, { "epoch": 0.8656600002437449, "grad_norm": 1.9054527282714844, "learning_rate": 7.071199486850545e-07, "loss": 0.7988, "step": 71030 }, { "epoch": 0.8657209364678927, "grad_norm": 1.9616292715072632, "learning_rate": 7.06799230275818e-07, "loss": 0.7754, "step": 71035 }, { "epoch": 0.8657818726920405, "grad_norm": 1.71487557888031, "learning_rate": 7.064785118665812e-07, "loss": 0.764, "step": 71040 }, { "epoch": 0.8658428089161884, "grad_norm": 2.050147294998169, "learning_rate": 7.061577934573445e-07, "loss": 0.8047, "step": 71045 }, { "epoch": 0.8659037451403361, "grad_norm": 1.9619475603103638, "learning_rate": 7.058370750481078e-07, "loss": 0.859, "step": 71050 }, { "epoch": 0.8659646813644839, "grad_norm": 2.1579535007476807, "learning_rate": 7.055163566388712e-07, "loss": 0.8634, "step": 71055 }, { "epoch": 0.8660256175886317, "grad_norm": 1.8122622966766357, "learning_rate": 7.051956382296344e-07, "loss": 0.7684, "step": 71060 }, { "epoch": 0.8660865538127795, "grad_norm": 2.9324984550476074, "learning_rate": 7.048749198203978e-07, "loss": 0.846, "step": 71065 }, { "epoch": 0.8661474900369274, "grad_norm": 2.284787654876709, "learning_rate": 7.04554201411161e-07, "loss": 0.8288, "step": 71070 }, { "epoch": 0.8662084262610752, "grad_norm": 1.9816035032272339, "learning_rate": 7.042334830019244e-07, "loss": 0.7773, "step": 71075 }, { "epoch": 0.866269362485223, "grad_norm": 1.975046992301941, "learning_rate": 7.039127645926877e-07, "loss": 0.8279, "step": 71080 }, { "epoch": 0.8663302987093707, "grad_norm": 1.917762279510498, "learning_rate": 7.03592046183451e-07, "loss": 0.781, "step": 71085 }, { "epoch": 0.8663912349335186, "grad_norm": 3.0251591205596924, "learning_rate": 7.032713277742142e-07, "loss": 0.8668, "step": 71090 }, { "epoch": 0.8664521711576664, "grad_norm": 2.1280603408813477, "learning_rate": 7.029506093649777e-07, "loss": 0.7764, "step": 71095 }, { "epoch": 0.8665131073818142, "grad_norm": 1.8526924848556519, "learning_rate": 7.026298909557409e-07, "loss": 0.8229, "step": 71100 }, { "epoch": 0.866574043605962, "grad_norm": 2.0130062103271484, "learning_rate": 7.023091725465042e-07, "loss": 0.8106, "step": 71105 }, { "epoch": 0.8666349798301098, "grad_norm": 1.943121314048767, "learning_rate": 7.019884541372675e-07, "loss": 0.8354, "step": 71110 }, { "epoch": 0.8666959160542577, "grad_norm": 2.0875468254089355, "learning_rate": 7.016677357280309e-07, "loss": 0.7803, "step": 71115 }, { "epoch": 0.8667568522784054, "grad_norm": 1.798164963722229, "learning_rate": 7.013470173187941e-07, "loss": 0.8471, "step": 71120 }, { "epoch": 0.8668177885025532, "grad_norm": 1.6474990844726562, "learning_rate": 7.010262989095575e-07, "loss": 0.7905, "step": 71125 }, { "epoch": 0.866878724726701, "grad_norm": 1.9669090509414673, "learning_rate": 7.007055805003207e-07, "loss": 0.8081, "step": 71130 }, { "epoch": 0.8669396609508488, "grad_norm": 2.0790553092956543, "learning_rate": 7.003848620910841e-07, "loss": 0.8067, "step": 71135 }, { "epoch": 0.8670005971749967, "grad_norm": 2.0861005783081055, "learning_rate": 7.000641436818474e-07, "loss": 0.8076, "step": 71140 }, { "epoch": 0.8670615333991445, "grad_norm": 2.012037515640259, "learning_rate": 6.997434252726107e-07, "loss": 0.8625, "step": 71145 }, { "epoch": 0.8671224696232923, "grad_norm": 1.7394843101501465, "learning_rate": 6.994227068633739e-07, "loss": 0.8508, "step": 71150 }, { "epoch": 0.86718340584744, "grad_norm": 2.317281723022461, "learning_rate": 6.991019884541374e-07, "loss": 0.7787, "step": 71155 }, { "epoch": 0.8672443420715878, "grad_norm": 2.172097682952881, "learning_rate": 6.987812700449007e-07, "loss": 0.8975, "step": 71160 }, { "epoch": 0.8673052782957357, "grad_norm": 1.7899084091186523, "learning_rate": 6.984605516356639e-07, "loss": 0.8901, "step": 71165 }, { "epoch": 0.8673662145198835, "grad_norm": 1.9877804517745972, "learning_rate": 6.981398332264272e-07, "loss": 0.7892, "step": 71170 }, { "epoch": 0.8674271507440313, "grad_norm": 1.9505921602249146, "learning_rate": 6.978191148171906e-07, "loss": 0.8433, "step": 71175 }, { "epoch": 0.8674880869681791, "grad_norm": 2.0897812843322754, "learning_rate": 6.974983964079539e-07, "loss": 0.8228, "step": 71180 }, { "epoch": 0.867549023192327, "grad_norm": 2.0329625606536865, "learning_rate": 6.971776779987172e-07, "loss": 0.7407, "step": 71185 }, { "epoch": 0.8676099594164747, "grad_norm": 1.9218441247940063, "learning_rate": 6.968569595894804e-07, "loss": 0.7681, "step": 71190 }, { "epoch": 0.8676708956406225, "grad_norm": 1.8729169368743896, "learning_rate": 6.965362411802438e-07, "loss": 0.7467, "step": 71195 }, { "epoch": 0.8677318318647703, "grad_norm": 1.787408471107483, "learning_rate": 6.962155227710072e-07, "loss": 0.7977, "step": 71200 }, { "epoch": 0.8677927680889181, "grad_norm": 2.447889804840088, "learning_rate": 6.958948043617704e-07, "loss": 0.8565, "step": 71205 }, { "epoch": 0.867853704313066, "grad_norm": 2.1668882369995117, "learning_rate": 6.955740859525336e-07, "loss": 0.7885, "step": 71210 }, { "epoch": 0.8679146405372138, "grad_norm": 1.8653080463409424, "learning_rate": 6.952533675432971e-07, "loss": 0.8617, "step": 71215 }, { "epoch": 0.8679755767613616, "grad_norm": 2.038841962814331, "learning_rate": 6.949326491340604e-07, "loss": 0.7947, "step": 71220 }, { "epoch": 0.8680365129855093, "grad_norm": 1.8471359014511108, "learning_rate": 6.946119307248236e-07, "loss": 0.7794, "step": 71225 }, { "epoch": 0.8680974492096571, "grad_norm": 1.7408521175384521, "learning_rate": 6.94291212315587e-07, "loss": 0.757, "step": 71230 }, { "epoch": 0.868158385433805, "grad_norm": 2.0234122276306152, "learning_rate": 6.939704939063503e-07, "loss": 0.7864, "step": 71235 }, { "epoch": 0.8682193216579528, "grad_norm": 2.248147487640381, "learning_rate": 6.936497754971136e-07, "loss": 0.7948, "step": 71240 }, { "epoch": 0.8682802578821006, "grad_norm": 1.6546905040740967, "learning_rate": 6.933290570878769e-07, "loss": 0.9, "step": 71245 }, { "epoch": 0.8683411941062484, "grad_norm": 2.0034821033477783, "learning_rate": 6.930083386786402e-07, "loss": 0.8614, "step": 71250 }, { "epoch": 0.8684021303303963, "grad_norm": 2.1616368293762207, "learning_rate": 6.926876202694035e-07, "loss": 0.8476, "step": 71255 }, { "epoch": 0.868463066554544, "grad_norm": 2.913604736328125, "learning_rate": 6.923669018601669e-07, "loss": 0.8808, "step": 71260 }, { "epoch": 0.8685240027786918, "grad_norm": 1.9060465097427368, "learning_rate": 6.920461834509301e-07, "loss": 0.7856, "step": 71265 }, { "epoch": 0.8685849390028396, "grad_norm": 2.1164822578430176, "learning_rate": 6.917254650416934e-07, "loss": 0.8358, "step": 71270 }, { "epoch": 0.8686458752269874, "grad_norm": 1.8372632265090942, "learning_rate": 6.914047466324568e-07, "loss": 0.8891, "step": 71275 }, { "epoch": 0.8687068114511353, "grad_norm": 1.917647123336792, "learning_rate": 6.910840282232201e-07, "loss": 0.8679, "step": 71280 }, { "epoch": 0.8687677476752831, "grad_norm": 1.9829721450805664, "learning_rate": 6.907633098139833e-07, "loss": 0.8181, "step": 71285 }, { "epoch": 0.8688286838994309, "grad_norm": 1.7005618810653687, "learning_rate": 6.904425914047467e-07, "loss": 0.7907, "step": 71290 }, { "epoch": 0.8688896201235786, "grad_norm": 1.8175482749938965, "learning_rate": 6.9012187299551e-07, "loss": 0.7733, "step": 71295 }, { "epoch": 0.8689505563477264, "grad_norm": 1.712854266166687, "learning_rate": 6.898011545862733e-07, "loss": 0.7514, "step": 71300 }, { "epoch": 0.8690114925718743, "grad_norm": 2.030949592590332, "learning_rate": 6.894804361770366e-07, "loss": 0.7877, "step": 71305 }, { "epoch": 0.8690724287960221, "grad_norm": 2.038008213043213, "learning_rate": 6.891597177678e-07, "loss": 0.767, "step": 71310 }, { "epoch": 0.8691333650201699, "grad_norm": 1.918625831604004, "learning_rate": 6.888389993585632e-07, "loss": 0.8088, "step": 71315 }, { "epoch": 0.8691943012443177, "grad_norm": 1.9348912239074707, "learning_rate": 6.885182809493266e-07, "loss": 0.7943, "step": 71320 }, { "epoch": 0.8692552374684656, "grad_norm": 1.7461612224578857, "learning_rate": 6.881975625400898e-07, "loss": 0.7972, "step": 71325 }, { "epoch": 0.8693161736926133, "grad_norm": 2.0179624557495117, "learning_rate": 6.878768441308532e-07, "loss": 0.7649, "step": 71330 }, { "epoch": 0.8693771099167611, "grad_norm": 1.9977126121520996, "learning_rate": 6.875561257216166e-07, "loss": 0.8211, "step": 71335 }, { "epoch": 0.8694380461409089, "grad_norm": 1.97406804561615, "learning_rate": 6.872354073123798e-07, "loss": 0.8555, "step": 71340 }, { "epoch": 0.8694989823650567, "grad_norm": 1.8936535120010376, "learning_rate": 6.86914688903143e-07, "loss": 0.845, "step": 71345 }, { "epoch": 0.8695599185892046, "grad_norm": 1.9945228099822998, "learning_rate": 6.865939704939065e-07, "loss": 0.734, "step": 71350 }, { "epoch": 0.8696208548133524, "grad_norm": 2.0937910079956055, "learning_rate": 6.862732520846698e-07, "loss": 0.8593, "step": 71355 }, { "epoch": 0.8696817910375002, "grad_norm": 2.052845001220703, "learning_rate": 6.85952533675433e-07, "loss": 0.7527, "step": 71360 }, { "epoch": 0.8697427272616479, "grad_norm": 1.9514449834823608, "learning_rate": 6.856318152661962e-07, "loss": 0.8692, "step": 71365 }, { "epoch": 0.8698036634857957, "grad_norm": 2.2074339389801025, "learning_rate": 6.853110968569597e-07, "loss": 0.8369, "step": 71370 }, { "epoch": 0.8698645997099436, "grad_norm": 2.0003998279571533, "learning_rate": 6.84990378447723e-07, "loss": 0.8027, "step": 71375 }, { "epoch": 0.8699255359340914, "grad_norm": 2.003685235977173, "learning_rate": 6.846696600384863e-07, "loss": 0.7767, "step": 71380 }, { "epoch": 0.8699864721582392, "grad_norm": 2.169168710708618, "learning_rate": 6.843489416292495e-07, "loss": 0.8519, "step": 71385 }, { "epoch": 0.870047408382387, "grad_norm": 2.2098634243011475, "learning_rate": 6.840282232200129e-07, "loss": 0.8553, "step": 71390 }, { "epoch": 0.8701083446065349, "grad_norm": 2.111337184906006, "learning_rate": 6.837075048107763e-07, "loss": 0.764, "step": 71395 }, { "epoch": 0.8701692808306826, "grad_norm": 1.798448920249939, "learning_rate": 6.833867864015395e-07, "loss": 0.8382, "step": 71400 }, { "epoch": 0.8702302170548304, "grad_norm": 2.0475122928619385, "learning_rate": 6.830660679923028e-07, "loss": 0.7944, "step": 71405 }, { "epoch": 0.8702911532789782, "grad_norm": 1.8040591478347778, "learning_rate": 6.827453495830662e-07, "loss": 0.8044, "step": 71410 }, { "epoch": 0.870352089503126, "grad_norm": 2.0220282077789307, "learning_rate": 6.824246311738295e-07, "loss": 0.8117, "step": 71415 }, { "epoch": 0.8704130257272739, "grad_norm": 1.8682748079299927, "learning_rate": 6.821039127645927e-07, "loss": 0.7661, "step": 71420 }, { "epoch": 0.8704739619514217, "grad_norm": 1.9487121105194092, "learning_rate": 6.81783194355356e-07, "loss": 0.7993, "step": 71425 }, { "epoch": 0.8705348981755694, "grad_norm": 1.9983211755752563, "learning_rate": 6.814624759461194e-07, "loss": 0.8199, "step": 71430 }, { "epoch": 0.8705958343997172, "grad_norm": 2.269859552383423, "learning_rate": 6.811417575368827e-07, "loss": 0.8196, "step": 71435 }, { "epoch": 0.870656770623865, "grad_norm": 2.0672178268432617, "learning_rate": 6.80821039127646e-07, "loss": 0.8572, "step": 71440 }, { "epoch": 0.8707177068480129, "grad_norm": 1.98084557056427, "learning_rate": 6.805003207184093e-07, "loss": 0.7714, "step": 71445 }, { "epoch": 0.8707786430721607, "grad_norm": 2.228609800338745, "learning_rate": 6.801796023091726e-07, "loss": 0.8532, "step": 71450 }, { "epoch": 0.8708395792963085, "grad_norm": 1.9067370891571045, "learning_rate": 6.79858883899936e-07, "loss": 0.834, "step": 71455 }, { "epoch": 0.8709005155204563, "grad_norm": 1.8980998992919922, "learning_rate": 6.795381654906992e-07, "loss": 0.8505, "step": 71460 }, { "epoch": 0.870961451744604, "grad_norm": 2.0445005893707275, "learning_rate": 6.792174470814625e-07, "loss": 0.7916, "step": 71465 }, { "epoch": 0.8710223879687519, "grad_norm": 2.3504750728607178, "learning_rate": 6.788967286722258e-07, "loss": 0.7615, "step": 71470 }, { "epoch": 0.8710833241928997, "grad_norm": 1.893692970275879, "learning_rate": 6.785760102629892e-07, "loss": 0.8049, "step": 71475 }, { "epoch": 0.8711442604170475, "grad_norm": 2.1920485496520996, "learning_rate": 6.782552918537524e-07, "loss": 0.8475, "step": 71480 }, { "epoch": 0.8712051966411953, "grad_norm": 1.9717183113098145, "learning_rate": 6.779345734445157e-07, "loss": 0.8257, "step": 71485 }, { "epoch": 0.8712661328653432, "grad_norm": 1.8262487649917603, "learning_rate": 6.776138550352791e-07, "loss": 0.7976, "step": 71490 }, { "epoch": 0.871327069089491, "grad_norm": 2.1051793098449707, "learning_rate": 6.772931366260424e-07, "loss": 0.7787, "step": 71495 }, { "epoch": 0.8713880053136387, "grad_norm": 2.040644645690918, "learning_rate": 6.769724182168056e-07, "loss": 0.88, "step": 71500 }, { "epoch": 0.8714489415377865, "grad_norm": 2.0322813987731934, "learning_rate": 6.76651699807569e-07, "loss": 0.8385, "step": 71505 }, { "epoch": 0.8715098777619343, "grad_norm": 1.98542320728302, "learning_rate": 6.763309813983324e-07, "loss": 0.848, "step": 71510 }, { "epoch": 0.8715708139860822, "grad_norm": 1.9719895124435425, "learning_rate": 6.760102629890956e-07, "loss": 0.8423, "step": 71515 }, { "epoch": 0.87163175021023, "grad_norm": 2.3835482597351074, "learning_rate": 6.756895445798589e-07, "loss": 0.8109, "step": 71520 }, { "epoch": 0.8716926864343778, "grad_norm": 1.6504592895507812, "learning_rate": 6.753688261706222e-07, "loss": 0.7529, "step": 71525 }, { "epoch": 0.8717536226585256, "grad_norm": 2.6284000873565674, "learning_rate": 6.750481077613856e-07, "loss": 0.8232, "step": 71530 }, { "epoch": 0.8718145588826733, "grad_norm": 2.0536227226257324, "learning_rate": 6.747273893521489e-07, "loss": 0.8358, "step": 71535 }, { "epoch": 0.8718754951068212, "grad_norm": 1.7737270593643188, "learning_rate": 6.744066709429121e-07, "loss": 0.7865, "step": 71540 }, { "epoch": 0.871936431330969, "grad_norm": 1.8123644590377808, "learning_rate": 6.740859525336754e-07, "loss": 0.8031, "step": 71545 }, { "epoch": 0.8719973675551168, "grad_norm": 1.925856113433838, "learning_rate": 6.737652341244389e-07, "loss": 0.7723, "step": 71550 }, { "epoch": 0.8720583037792646, "grad_norm": 1.9125064611434937, "learning_rate": 6.734445157152021e-07, "loss": 0.8813, "step": 71555 }, { "epoch": 0.8721192400034125, "grad_norm": 1.7721140384674072, "learning_rate": 6.731237973059653e-07, "loss": 0.8082, "step": 71560 }, { "epoch": 0.8721801762275603, "grad_norm": 2.031851291656494, "learning_rate": 6.728030788967287e-07, "loss": 0.837, "step": 71565 }, { "epoch": 0.872241112451708, "grad_norm": 1.961037516593933, "learning_rate": 6.724823604874921e-07, "loss": 0.7456, "step": 71570 }, { "epoch": 0.8723020486758558, "grad_norm": 1.9201234579086304, "learning_rate": 6.721616420782553e-07, "loss": 0.7413, "step": 71575 }, { "epoch": 0.8723629849000036, "grad_norm": 1.930322289466858, "learning_rate": 6.718409236690187e-07, "loss": 0.8043, "step": 71580 }, { "epoch": 0.8724239211241515, "grad_norm": 1.8833463191986084, "learning_rate": 6.715202052597819e-07, "loss": 0.8381, "step": 71585 }, { "epoch": 0.8724848573482993, "grad_norm": 2.0502302646636963, "learning_rate": 6.711994868505453e-07, "loss": 0.793, "step": 71590 }, { "epoch": 0.8725457935724471, "grad_norm": 1.7587624788284302, "learning_rate": 6.708787684413086e-07, "loss": 0.7672, "step": 71595 }, { "epoch": 0.8726067297965949, "grad_norm": 1.9496935606002808, "learning_rate": 6.705580500320719e-07, "loss": 0.8011, "step": 71600 }, { "epoch": 0.8726676660207426, "grad_norm": 2.0768685340881348, "learning_rate": 6.702373316228352e-07, "loss": 0.8121, "step": 71605 }, { "epoch": 0.8727286022448905, "grad_norm": 1.8991632461547852, "learning_rate": 6.699166132135986e-07, "loss": 0.7888, "step": 71610 }, { "epoch": 0.8727895384690383, "grad_norm": 2.059610605239868, "learning_rate": 6.695958948043618e-07, "loss": 0.7975, "step": 71615 }, { "epoch": 0.8728504746931861, "grad_norm": 2.468435049057007, "learning_rate": 6.692751763951251e-07, "loss": 0.8101, "step": 71620 }, { "epoch": 0.8729114109173339, "grad_norm": 1.9121025800704956, "learning_rate": 6.689544579858885e-07, "loss": 0.8535, "step": 71625 }, { "epoch": 0.8729723471414818, "grad_norm": 1.951924443244934, "learning_rate": 6.686337395766518e-07, "loss": 0.8222, "step": 71630 }, { "epoch": 0.8730332833656296, "grad_norm": 2.221604824066162, "learning_rate": 6.68313021167415e-07, "loss": 0.7682, "step": 71635 }, { "epoch": 0.8730942195897773, "grad_norm": 2.019246816635132, "learning_rate": 6.679923027581784e-07, "loss": 0.7895, "step": 71640 }, { "epoch": 0.8731551558139251, "grad_norm": 1.7911596298217773, "learning_rate": 6.676715843489417e-07, "loss": 0.8145, "step": 71645 }, { "epoch": 0.8732160920380729, "grad_norm": 1.7811893224716187, "learning_rate": 6.67350865939705e-07, "loss": 0.7804, "step": 71650 }, { "epoch": 0.8732770282622208, "grad_norm": 2.8579797744750977, "learning_rate": 6.670301475304683e-07, "loss": 0.787, "step": 71655 }, { "epoch": 0.8733379644863686, "grad_norm": 2.081594228744507, "learning_rate": 6.667094291212316e-07, "loss": 0.7463, "step": 71660 }, { "epoch": 0.8733989007105164, "grad_norm": 2.0511789321899414, "learning_rate": 6.663887107119949e-07, "loss": 0.8228, "step": 71665 }, { "epoch": 0.8734598369346642, "grad_norm": 1.8175994157791138, "learning_rate": 6.660679923027583e-07, "loss": 0.7672, "step": 71670 }, { "epoch": 0.8735207731588119, "grad_norm": 1.7906723022460938, "learning_rate": 6.657472738935215e-07, "loss": 0.838, "step": 71675 }, { "epoch": 0.8735817093829598, "grad_norm": 1.5976741313934326, "learning_rate": 6.654265554842848e-07, "loss": 0.7688, "step": 71680 }, { "epoch": 0.8736426456071076, "grad_norm": 2.016883373260498, "learning_rate": 6.651058370750482e-07, "loss": 0.7711, "step": 71685 }, { "epoch": 0.8737035818312554, "grad_norm": 1.7711338996887207, "learning_rate": 6.647851186658115e-07, "loss": 0.7848, "step": 71690 }, { "epoch": 0.8737645180554032, "grad_norm": 1.9499468803405762, "learning_rate": 6.644644002565747e-07, "loss": 0.827, "step": 71695 }, { "epoch": 0.873825454279551, "grad_norm": 3.4516866207122803, "learning_rate": 6.641436818473381e-07, "loss": 0.8339, "step": 71700 }, { "epoch": 0.8738863905036989, "grad_norm": 1.7434710264205933, "learning_rate": 6.638229634381015e-07, "loss": 0.8248, "step": 71705 }, { "epoch": 0.8739473267278466, "grad_norm": 1.9260896444320679, "learning_rate": 6.635022450288647e-07, "loss": 0.9114, "step": 71710 }, { "epoch": 0.8740082629519944, "grad_norm": 1.743446707725525, "learning_rate": 6.63181526619628e-07, "loss": 0.807, "step": 71715 }, { "epoch": 0.8740691991761422, "grad_norm": 1.9916011095046997, "learning_rate": 6.628608082103913e-07, "loss": 0.8051, "step": 71720 }, { "epoch": 0.87413013540029, "grad_norm": 2.0126664638519287, "learning_rate": 6.625400898011547e-07, "loss": 0.7995, "step": 71725 }, { "epoch": 0.8741910716244379, "grad_norm": 1.9524483680725098, "learning_rate": 6.62219371391918e-07, "loss": 0.8504, "step": 71730 }, { "epoch": 0.8742520078485857, "grad_norm": 2.047147512435913, "learning_rate": 6.618986529826812e-07, "loss": 0.875, "step": 71735 }, { "epoch": 0.8743129440727335, "grad_norm": 1.995693564414978, "learning_rate": 6.615779345734445e-07, "loss": 0.8892, "step": 71740 }, { "epoch": 0.8743738802968812, "grad_norm": 1.8966649770736694, "learning_rate": 6.61257216164208e-07, "loss": 0.7877, "step": 71745 }, { "epoch": 0.8744348165210291, "grad_norm": 1.9109909534454346, "learning_rate": 6.609364977549712e-07, "loss": 0.7973, "step": 71750 }, { "epoch": 0.8744957527451769, "grad_norm": 1.9232631921768188, "learning_rate": 6.606157793457345e-07, "loss": 0.8218, "step": 71755 }, { "epoch": 0.8745566889693247, "grad_norm": 1.982970952987671, "learning_rate": 6.602950609364978e-07, "loss": 0.8879, "step": 71760 }, { "epoch": 0.8746176251934725, "grad_norm": 1.9823230504989624, "learning_rate": 6.599743425272612e-07, "loss": 0.8735, "step": 71765 }, { "epoch": 0.8746785614176203, "grad_norm": 1.781274676322937, "learning_rate": 6.596536241180244e-07, "loss": 0.803, "step": 71770 }, { "epoch": 0.8747394976417682, "grad_norm": 2.298433542251587, "learning_rate": 6.593329057087878e-07, "loss": 0.7862, "step": 71775 }, { "epoch": 0.8748004338659159, "grad_norm": 1.9723337888717651, "learning_rate": 6.59012187299551e-07, "loss": 0.8005, "step": 71780 }, { "epoch": 0.8748613700900637, "grad_norm": 1.9398161172866821, "learning_rate": 6.586914688903144e-07, "loss": 0.7621, "step": 71785 }, { "epoch": 0.8749223063142115, "grad_norm": 2.074127197265625, "learning_rate": 6.583707504810777e-07, "loss": 0.8421, "step": 71790 }, { "epoch": 0.8749832425383594, "grad_norm": 1.585099697113037, "learning_rate": 6.58050032071841e-07, "loss": 0.8056, "step": 71795 }, { "epoch": 0.8750441787625072, "grad_norm": 1.68211829662323, "learning_rate": 6.577293136626042e-07, "loss": 0.8522, "step": 71800 }, { "epoch": 0.875105114986655, "grad_norm": 2.05084490776062, "learning_rate": 6.574085952533677e-07, "loss": 0.8189, "step": 71805 }, { "epoch": 0.8751660512108028, "grad_norm": 2.0514702796936035, "learning_rate": 6.570878768441309e-07, "loss": 0.8251, "step": 71810 }, { "epoch": 0.8752269874349505, "grad_norm": 2.174302101135254, "learning_rate": 6.567671584348942e-07, "loss": 0.8158, "step": 71815 }, { "epoch": 0.8752879236590984, "grad_norm": 2.1042447090148926, "learning_rate": 6.564464400256575e-07, "loss": 0.836, "step": 71820 }, { "epoch": 0.8753488598832462, "grad_norm": 2.0704588890075684, "learning_rate": 6.561257216164209e-07, "loss": 0.8114, "step": 71825 }, { "epoch": 0.875409796107394, "grad_norm": 2.0584564208984375, "learning_rate": 6.558050032071841e-07, "loss": 0.7676, "step": 71830 }, { "epoch": 0.8754707323315418, "grad_norm": 2.1145994663238525, "learning_rate": 6.554842847979475e-07, "loss": 0.8569, "step": 71835 }, { "epoch": 0.8755316685556896, "grad_norm": 1.9578138589859009, "learning_rate": 6.551635663887107e-07, "loss": 0.8128, "step": 71840 }, { "epoch": 0.8755926047798375, "grad_norm": 1.9181731939315796, "learning_rate": 6.548428479794741e-07, "loss": 0.7836, "step": 71845 }, { "epoch": 0.8756535410039852, "grad_norm": 1.6059834957122803, "learning_rate": 6.545221295702374e-07, "loss": 0.7142, "step": 71850 }, { "epoch": 0.875714477228133, "grad_norm": 1.775281310081482, "learning_rate": 6.542014111610007e-07, "loss": 0.7969, "step": 71855 }, { "epoch": 0.8757754134522808, "grad_norm": 2.205756187438965, "learning_rate": 6.538806927517639e-07, "loss": 0.7751, "step": 71860 }, { "epoch": 0.8758363496764286, "grad_norm": 1.8516526222229004, "learning_rate": 6.535599743425274e-07, "loss": 0.7788, "step": 71865 }, { "epoch": 0.8758972859005765, "grad_norm": 1.8779624700546265, "learning_rate": 6.532392559332906e-07, "loss": 0.7855, "step": 71870 }, { "epoch": 0.8759582221247243, "grad_norm": 1.9476019144058228, "learning_rate": 6.529185375240539e-07, "loss": 0.7874, "step": 71875 }, { "epoch": 0.8760191583488721, "grad_norm": 1.8040879964828491, "learning_rate": 6.525978191148172e-07, "loss": 0.7842, "step": 71880 }, { "epoch": 0.8760800945730198, "grad_norm": 1.9456312656402588, "learning_rate": 6.522771007055806e-07, "loss": 0.8145, "step": 71885 }, { "epoch": 0.8761410307971677, "grad_norm": 1.8232324123382568, "learning_rate": 6.519563822963438e-07, "loss": 0.6965, "step": 71890 }, { "epoch": 0.8762019670213155, "grad_norm": 1.7580456733703613, "learning_rate": 6.516356638871072e-07, "loss": 0.864, "step": 71895 }, { "epoch": 0.8762629032454633, "grad_norm": 2.064572334289551, "learning_rate": 6.513149454778704e-07, "loss": 0.9131, "step": 71900 }, { "epoch": 0.8763238394696111, "grad_norm": 1.983298420906067, "learning_rate": 6.509942270686338e-07, "loss": 0.8008, "step": 71905 }, { "epoch": 0.8763847756937589, "grad_norm": 1.662291407585144, "learning_rate": 6.506735086593971e-07, "loss": 0.8255, "step": 71910 }, { "epoch": 0.8764457119179068, "grad_norm": 1.7417594194412231, "learning_rate": 6.503527902501604e-07, "loss": 0.8505, "step": 71915 }, { "epoch": 0.8765066481420545, "grad_norm": 1.8343428373336792, "learning_rate": 6.500320718409238e-07, "loss": 0.8302, "step": 71920 }, { "epoch": 0.8765675843662023, "grad_norm": 1.8942683935165405, "learning_rate": 6.497113534316871e-07, "loss": 0.8489, "step": 71925 }, { "epoch": 0.8766285205903501, "grad_norm": 2.089341640472412, "learning_rate": 6.493906350224504e-07, "loss": 0.8377, "step": 71930 }, { "epoch": 0.876689456814498, "grad_norm": 1.8848048448562622, "learning_rate": 6.490699166132136e-07, "loss": 0.8328, "step": 71935 }, { "epoch": 0.8767503930386458, "grad_norm": 2.082538366317749, "learning_rate": 6.487491982039771e-07, "loss": 0.8213, "step": 71940 }, { "epoch": 0.8768113292627936, "grad_norm": 1.7838468551635742, "learning_rate": 6.484284797947403e-07, "loss": 0.7127, "step": 71945 }, { "epoch": 0.8768722654869414, "grad_norm": 1.9808580875396729, "learning_rate": 6.481077613855036e-07, "loss": 0.8117, "step": 71950 }, { "epoch": 0.8769332017110891, "grad_norm": 2.076685667037964, "learning_rate": 6.477870429762669e-07, "loss": 0.8203, "step": 71955 }, { "epoch": 0.876994137935237, "grad_norm": 1.9763376712799072, "learning_rate": 6.474663245670303e-07, "loss": 0.7817, "step": 71960 }, { "epoch": 0.8770550741593848, "grad_norm": 1.9410990476608276, "learning_rate": 6.471456061577935e-07, "loss": 0.8126, "step": 71965 }, { "epoch": 0.8771160103835326, "grad_norm": 3.0949389934539795, "learning_rate": 6.468248877485569e-07, "loss": 0.8198, "step": 71970 }, { "epoch": 0.8771769466076804, "grad_norm": 2.0211234092712402, "learning_rate": 6.465041693393201e-07, "loss": 0.7949, "step": 71975 }, { "epoch": 0.8772378828318282, "grad_norm": 1.9380733966827393, "learning_rate": 6.461834509300835e-07, "loss": 0.8348, "step": 71980 }, { "epoch": 0.8772988190559761, "grad_norm": 2.0130372047424316, "learning_rate": 6.458627325208468e-07, "loss": 0.8291, "step": 71985 }, { "epoch": 0.8773597552801238, "grad_norm": 1.864450454711914, "learning_rate": 6.455420141116101e-07, "loss": 0.8247, "step": 71990 }, { "epoch": 0.8774206915042716, "grad_norm": 1.7222968339920044, "learning_rate": 6.452212957023733e-07, "loss": 0.8581, "step": 71995 }, { "epoch": 0.8774816277284194, "grad_norm": 1.8387550115585327, "learning_rate": 6.449005772931368e-07, "loss": 0.8097, "step": 72000 }, { "epoch": 0.8775425639525672, "grad_norm": 1.9026767015457153, "learning_rate": 6.445798588839e-07, "loss": 0.7512, "step": 72005 }, { "epoch": 0.8776035001767151, "grad_norm": 1.6673310995101929, "learning_rate": 6.442591404746633e-07, "loss": 0.7909, "step": 72010 }, { "epoch": 0.8776644364008629, "grad_norm": 2.0557234287261963, "learning_rate": 6.439384220654265e-07, "loss": 0.7781, "step": 72015 }, { "epoch": 0.8777253726250107, "grad_norm": 1.9388915300369263, "learning_rate": 6.4361770365619e-07, "loss": 0.7637, "step": 72020 }, { "epoch": 0.8777863088491584, "grad_norm": 1.8895708322525024, "learning_rate": 6.432969852469532e-07, "loss": 0.8872, "step": 72025 }, { "epoch": 0.8778472450733062, "grad_norm": 1.79702889919281, "learning_rate": 6.429762668377165e-07, "loss": 0.8378, "step": 72030 }, { "epoch": 0.8779081812974541, "grad_norm": 2.30745792388916, "learning_rate": 6.426555484284798e-07, "loss": 0.8156, "step": 72035 }, { "epoch": 0.8779691175216019, "grad_norm": 1.7619255781173706, "learning_rate": 6.423348300192432e-07, "loss": 0.7501, "step": 72040 }, { "epoch": 0.8780300537457497, "grad_norm": 1.95161771774292, "learning_rate": 6.420141116100064e-07, "loss": 0.8301, "step": 72045 }, { "epoch": 0.8780909899698975, "grad_norm": 1.9508616924285889, "learning_rate": 6.416933932007698e-07, "loss": 0.8482, "step": 72050 }, { "epoch": 0.8781519261940454, "grad_norm": 2.1297590732574463, "learning_rate": 6.41372674791533e-07, "loss": 0.7942, "step": 72055 }, { "epoch": 0.8782128624181931, "grad_norm": 1.8068938255310059, "learning_rate": 6.410519563822965e-07, "loss": 0.8153, "step": 72060 }, { "epoch": 0.8782737986423409, "grad_norm": 2.039652109146118, "learning_rate": 6.407312379730597e-07, "loss": 0.7643, "step": 72065 }, { "epoch": 0.8783347348664887, "grad_norm": 1.7753055095672607, "learning_rate": 6.40410519563823e-07, "loss": 0.6963, "step": 72070 }, { "epoch": 0.8783956710906365, "grad_norm": 1.91044282913208, "learning_rate": 6.400898011545862e-07, "loss": 0.8126, "step": 72075 }, { "epoch": 0.8784566073147844, "grad_norm": 1.9729564189910889, "learning_rate": 6.397690827453497e-07, "loss": 0.849, "step": 72080 }, { "epoch": 0.8785175435389322, "grad_norm": 1.9575003385543823, "learning_rate": 6.394483643361129e-07, "loss": 0.8163, "step": 72085 }, { "epoch": 0.87857847976308, "grad_norm": 2.9081432819366455, "learning_rate": 6.391276459268762e-07, "loss": 0.8208, "step": 72090 }, { "epoch": 0.8786394159872277, "grad_norm": 2.06466007232666, "learning_rate": 6.388069275176395e-07, "loss": 0.8425, "step": 72095 }, { "epoch": 0.8787003522113755, "grad_norm": 1.7710081338882446, "learning_rate": 6.384862091084029e-07, "loss": 0.8112, "step": 72100 }, { "epoch": 0.8787612884355234, "grad_norm": 2.072294235229492, "learning_rate": 6.381654906991662e-07, "loss": 0.7603, "step": 72105 }, { "epoch": 0.8788222246596712, "grad_norm": 1.834588885307312, "learning_rate": 6.378447722899295e-07, "loss": 0.7699, "step": 72110 }, { "epoch": 0.878883160883819, "grad_norm": 1.9302603006362915, "learning_rate": 6.375240538806927e-07, "loss": 0.8192, "step": 72115 }, { "epoch": 0.8789440971079668, "grad_norm": 2.3068504333496094, "learning_rate": 6.372033354714561e-07, "loss": 0.8494, "step": 72120 }, { "epoch": 0.8790050333321147, "grad_norm": 1.6787716150283813, "learning_rate": 6.368826170622195e-07, "loss": 0.8806, "step": 72125 }, { "epoch": 0.8790659695562624, "grad_norm": 2.0525639057159424, "learning_rate": 6.365618986529827e-07, "loss": 0.782, "step": 72130 }, { "epoch": 0.8791269057804102, "grad_norm": 2.3372879028320312, "learning_rate": 6.362411802437459e-07, "loss": 0.8281, "step": 72135 }, { "epoch": 0.879187842004558, "grad_norm": 2.037787675857544, "learning_rate": 6.359204618345094e-07, "loss": 0.8511, "step": 72140 }, { "epoch": 0.8792487782287058, "grad_norm": 1.9346390962600708, "learning_rate": 6.355997434252727e-07, "loss": 0.7525, "step": 72145 }, { "epoch": 0.8793097144528537, "grad_norm": 1.9456981420516968, "learning_rate": 6.352790250160359e-07, "loss": 0.8155, "step": 72150 }, { "epoch": 0.8793706506770015, "grad_norm": 1.8532534837722778, "learning_rate": 6.349583066067992e-07, "loss": 0.8343, "step": 72155 }, { "epoch": 0.8794315869011493, "grad_norm": 1.9220681190490723, "learning_rate": 6.346375881975626e-07, "loss": 0.8169, "step": 72160 }, { "epoch": 0.879492523125297, "grad_norm": 2.393488883972168, "learning_rate": 6.343168697883259e-07, "loss": 0.834, "step": 72165 }, { "epoch": 0.8795534593494448, "grad_norm": 2.8095624446868896, "learning_rate": 6.339961513790892e-07, "loss": 0.8384, "step": 72170 }, { "epoch": 0.8796143955735927, "grad_norm": 1.839954137802124, "learning_rate": 6.336754329698525e-07, "loss": 0.7843, "step": 72175 }, { "epoch": 0.8796753317977405, "grad_norm": 1.8973294496536255, "learning_rate": 6.333547145606158e-07, "loss": 0.731, "step": 72180 }, { "epoch": 0.8797362680218883, "grad_norm": 1.7321816682815552, "learning_rate": 6.330339961513792e-07, "loss": 0.77, "step": 72185 }, { "epoch": 0.8797972042460361, "grad_norm": 1.9857815504074097, "learning_rate": 6.327132777421424e-07, "loss": 0.8435, "step": 72190 }, { "epoch": 0.879858140470184, "grad_norm": 1.8938453197479248, "learning_rate": 6.323925593329057e-07, "loss": 0.7698, "step": 72195 }, { "epoch": 0.8799190766943317, "grad_norm": 1.899349570274353, "learning_rate": 6.320718409236691e-07, "loss": 0.7942, "step": 72200 }, { "epoch": 0.8799800129184795, "grad_norm": 2.4606895446777344, "learning_rate": 6.317511225144324e-07, "loss": 0.8043, "step": 72205 }, { "epoch": 0.8800409491426273, "grad_norm": 1.9749513864517212, "learning_rate": 6.314304041051956e-07, "loss": 0.8673, "step": 72210 }, { "epoch": 0.8801018853667751, "grad_norm": 1.8742473125457764, "learning_rate": 6.311096856959591e-07, "loss": 0.8203, "step": 72215 }, { "epoch": 0.880162821590923, "grad_norm": 1.9848238229751587, "learning_rate": 6.307889672867223e-07, "loss": 0.7772, "step": 72220 }, { "epoch": 0.8802237578150708, "grad_norm": 2.0578253269195557, "learning_rate": 6.304682488774856e-07, "loss": 0.7975, "step": 72225 }, { "epoch": 0.8802846940392186, "grad_norm": 2.2813711166381836, "learning_rate": 6.301475304682489e-07, "loss": 0.8625, "step": 72230 }, { "epoch": 0.8803456302633663, "grad_norm": 2.497899293899536, "learning_rate": 6.298268120590123e-07, "loss": 0.8481, "step": 72235 }, { "epoch": 0.8804065664875141, "grad_norm": 1.8676921129226685, "learning_rate": 6.295060936497755e-07, "loss": 0.8351, "step": 72240 }, { "epoch": 0.880467502711662, "grad_norm": 2.0693724155426025, "learning_rate": 6.291853752405389e-07, "loss": 0.7477, "step": 72245 }, { "epoch": 0.8805284389358098, "grad_norm": 2.0183019638061523, "learning_rate": 6.288646568313021e-07, "loss": 0.8283, "step": 72250 }, { "epoch": 0.8805893751599576, "grad_norm": 1.899459719657898, "learning_rate": 6.285439384220655e-07, "loss": 0.9229, "step": 72255 }, { "epoch": 0.8806503113841054, "grad_norm": 1.780676245689392, "learning_rate": 6.282232200128288e-07, "loss": 0.797, "step": 72260 }, { "epoch": 0.8807112476082533, "grad_norm": 2.0580339431762695, "learning_rate": 6.279025016035921e-07, "loss": 0.7406, "step": 72265 }, { "epoch": 0.880772183832401, "grad_norm": 2.019519329071045, "learning_rate": 6.275817831943553e-07, "loss": 0.7563, "step": 72270 }, { "epoch": 0.8808331200565488, "grad_norm": 2.6009581089019775, "learning_rate": 6.272610647851188e-07, "loss": 0.825, "step": 72275 }, { "epoch": 0.8808940562806966, "grad_norm": 1.7895379066467285, "learning_rate": 6.269403463758821e-07, "loss": 0.8061, "step": 72280 }, { "epoch": 0.8809549925048444, "grad_norm": 2.148109197616577, "learning_rate": 6.266196279666453e-07, "loss": 0.8733, "step": 72285 }, { "epoch": 0.8810159287289923, "grad_norm": 1.9543753862380981, "learning_rate": 6.262989095574086e-07, "loss": 0.7364, "step": 72290 }, { "epoch": 0.8810768649531401, "grad_norm": 2.114238977432251, "learning_rate": 6.25978191148172e-07, "loss": 0.8053, "step": 72295 }, { "epoch": 0.8811378011772879, "grad_norm": 2.2069969177246094, "learning_rate": 6.256574727389353e-07, "loss": 0.7428, "step": 72300 }, { "epoch": 0.8811987374014356, "grad_norm": 2.172745704650879, "learning_rate": 6.253367543296986e-07, "loss": 0.8012, "step": 72305 }, { "epoch": 0.8812596736255834, "grad_norm": 2.2548940181732178, "learning_rate": 6.250160359204618e-07, "loss": 0.8861, "step": 72310 }, { "epoch": 0.8813206098497313, "grad_norm": 2.264164924621582, "learning_rate": 6.246953175112252e-07, "loss": 0.7836, "step": 72315 }, { "epoch": 0.8813815460738791, "grad_norm": 1.7963614463806152, "learning_rate": 6.243745991019886e-07, "loss": 0.8391, "step": 72320 }, { "epoch": 0.8814424822980269, "grad_norm": 2.1201858520507812, "learning_rate": 6.240538806927518e-07, "loss": 0.8267, "step": 72325 }, { "epoch": 0.8815034185221747, "grad_norm": 2.260042905807495, "learning_rate": 6.237331622835151e-07, "loss": 0.8245, "step": 72330 }, { "epoch": 0.8815643547463226, "grad_norm": 2.1809353828430176, "learning_rate": 6.234124438742785e-07, "loss": 0.8205, "step": 72335 }, { "epoch": 0.8816252909704703, "grad_norm": 1.9622976779937744, "learning_rate": 6.230917254650418e-07, "loss": 0.8026, "step": 72340 }, { "epoch": 0.8816862271946181, "grad_norm": 2.069826602935791, "learning_rate": 6.22771007055805e-07, "loss": 0.7808, "step": 72345 }, { "epoch": 0.8817471634187659, "grad_norm": 1.9612642526626587, "learning_rate": 6.224502886465684e-07, "loss": 0.718, "step": 72350 }, { "epoch": 0.8818080996429137, "grad_norm": 1.9358038902282715, "learning_rate": 6.221295702373317e-07, "loss": 0.8066, "step": 72355 }, { "epoch": 0.8818690358670616, "grad_norm": 2.0267086029052734, "learning_rate": 6.21808851828095e-07, "loss": 0.7832, "step": 72360 }, { "epoch": 0.8819299720912094, "grad_norm": 1.9316902160644531, "learning_rate": 6.214881334188583e-07, "loss": 0.8346, "step": 72365 }, { "epoch": 0.8819909083153572, "grad_norm": 1.9636436700820923, "learning_rate": 6.211674150096216e-07, "loss": 0.8142, "step": 72370 }, { "epoch": 0.8820518445395049, "grad_norm": 1.5781376361846924, "learning_rate": 6.208466966003849e-07, "loss": 0.7906, "step": 72375 }, { "epoch": 0.8821127807636527, "grad_norm": 1.9197536706924438, "learning_rate": 6.205259781911483e-07, "loss": 0.7753, "step": 72380 }, { "epoch": 0.8821737169878006, "grad_norm": 1.8128443956375122, "learning_rate": 6.202052597819115e-07, "loss": 0.7941, "step": 72385 }, { "epoch": 0.8822346532119484, "grad_norm": 2.0785748958587646, "learning_rate": 6.198845413726748e-07, "loss": 0.7957, "step": 72390 }, { "epoch": 0.8822955894360962, "grad_norm": 1.960324764251709, "learning_rate": 6.195638229634382e-07, "loss": 0.8266, "step": 72395 }, { "epoch": 0.882356525660244, "grad_norm": 1.9607137441635132, "learning_rate": 6.192431045542015e-07, "loss": 0.7879, "step": 72400 }, { "epoch": 0.8824174618843917, "grad_norm": 2.276341676712036, "learning_rate": 6.189223861449647e-07, "loss": 0.8042, "step": 72405 }, { "epoch": 0.8824783981085396, "grad_norm": 1.9721189737319946, "learning_rate": 6.186016677357281e-07, "loss": 0.8633, "step": 72410 }, { "epoch": 0.8825393343326874, "grad_norm": 2.1583943367004395, "learning_rate": 6.182809493264914e-07, "loss": 0.7725, "step": 72415 }, { "epoch": 0.8826002705568352, "grad_norm": 1.9261592626571655, "learning_rate": 6.179602309172547e-07, "loss": 0.7327, "step": 72420 }, { "epoch": 0.882661206780983, "grad_norm": 1.9345000982284546, "learning_rate": 6.17639512508018e-07, "loss": 0.8349, "step": 72425 }, { "epoch": 0.8827221430051309, "grad_norm": 1.8402061462402344, "learning_rate": 6.173187940987813e-07, "loss": 0.7642, "step": 72430 }, { "epoch": 0.8827830792292787, "grad_norm": 1.7662482261657715, "learning_rate": 6.169980756895446e-07, "loss": 0.7792, "step": 72435 }, { "epoch": 0.8828440154534264, "grad_norm": 1.912208080291748, "learning_rate": 6.16677357280308e-07, "loss": 0.769, "step": 72440 }, { "epoch": 0.8829049516775742, "grad_norm": 1.9537373781204224, "learning_rate": 6.163566388710712e-07, "loss": 0.77, "step": 72445 }, { "epoch": 0.882965887901722, "grad_norm": 1.9433565139770508, "learning_rate": 6.160359204618345e-07, "loss": 0.8137, "step": 72450 }, { "epoch": 0.8830268241258699, "grad_norm": 2.1259918212890625, "learning_rate": 6.157152020525979e-07, "loss": 0.8279, "step": 72455 }, { "epoch": 0.8830877603500177, "grad_norm": 1.8387470245361328, "learning_rate": 6.153944836433612e-07, "loss": 0.8365, "step": 72460 }, { "epoch": 0.8831486965741655, "grad_norm": 1.9774267673492432, "learning_rate": 6.150737652341244e-07, "loss": 0.8215, "step": 72465 }, { "epoch": 0.8832096327983133, "grad_norm": 2.363619804382324, "learning_rate": 6.147530468248878e-07, "loss": 0.7836, "step": 72470 }, { "epoch": 0.883270569022461, "grad_norm": 1.613864779472351, "learning_rate": 6.144323284156511e-07, "loss": 0.8264, "step": 72475 }, { "epoch": 0.8833315052466089, "grad_norm": 2.2273013591766357, "learning_rate": 6.141116100064144e-07, "loss": 0.8404, "step": 72480 }, { "epoch": 0.8833924414707567, "grad_norm": 2.1964073181152344, "learning_rate": 6.137908915971777e-07, "loss": 0.7883, "step": 72485 }, { "epoch": 0.8834533776949045, "grad_norm": 1.9096615314483643, "learning_rate": 6.134701731879411e-07, "loss": 0.7769, "step": 72490 }, { "epoch": 0.8835143139190523, "grad_norm": 2.641315460205078, "learning_rate": 6.131494547787043e-07, "loss": 0.8351, "step": 72495 }, { "epoch": 0.8835752501432002, "grad_norm": 2.0531880855560303, "learning_rate": 6.128287363694677e-07, "loss": 0.7826, "step": 72500 }, { "epoch": 0.883636186367348, "grad_norm": 1.913243055343628, "learning_rate": 6.125080179602309e-07, "loss": 0.7745, "step": 72505 }, { "epoch": 0.8836971225914957, "grad_norm": 1.71001136302948, "learning_rate": 6.121872995509943e-07, "loss": 0.7807, "step": 72510 }, { "epoch": 0.8837580588156435, "grad_norm": 1.9071534872055054, "learning_rate": 6.118665811417577e-07, "loss": 0.8385, "step": 72515 }, { "epoch": 0.8838189950397913, "grad_norm": 1.8838862180709839, "learning_rate": 6.115458627325209e-07, "loss": 0.8865, "step": 72520 }, { "epoch": 0.8838799312639392, "grad_norm": 1.8949495553970337, "learning_rate": 6.112251443232842e-07, "loss": 0.8012, "step": 72525 }, { "epoch": 0.883940867488087, "grad_norm": 2.0846376419067383, "learning_rate": 6.109044259140476e-07, "loss": 0.906, "step": 72530 }, { "epoch": 0.8840018037122348, "grad_norm": 1.8544665575027466, "learning_rate": 6.105837075048109e-07, "loss": 0.7927, "step": 72535 }, { "epoch": 0.8840627399363826, "grad_norm": 1.6751576662063599, "learning_rate": 6.102629890955741e-07, "loss": 0.8777, "step": 72540 }, { "epoch": 0.8841236761605303, "grad_norm": 2.6249098777770996, "learning_rate": 6.099422706863375e-07, "loss": 0.7827, "step": 72545 }, { "epoch": 0.8841846123846782, "grad_norm": 2.302229404449463, "learning_rate": 6.096215522771008e-07, "loss": 0.8177, "step": 72550 }, { "epoch": 0.884245548608826, "grad_norm": 2.16983962059021, "learning_rate": 6.093008338678641e-07, "loss": 0.8354, "step": 72555 }, { "epoch": 0.8843064848329738, "grad_norm": 1.8810791969299316, "learning_rate": 6.089801154586274e-07, "loss": 0.8261, "step": 72560 }, { "epoch": 0.8843674210571216, "grad_norm": 2.0111706256866455, "learning_rate": 6.086593970493907e-07, "loss": 0.8505, "step": 72565 }, { "epoch": 0.8844283572812695, "grad_norm": 1.8863978385925293, "learning_rate": 6.08338678640154e-07, "loss": 0.8236, "step": 72570 }, { "epoch": 0.8844892935054173, "grad_norm": 1.9204367399215698, "learning_rate": 6.080179602309174e-07, "loss": 0.8291, "step": 72575 }, { "epoch": 0.884550229729565, "grad_norm": 1.8712208271026611, "learning_rate": 6.076972418216806e-07, "loss": 0.8228, "step": 72580 }, { "epoch": 0.8846111659537128, "grad_norm": 2.112410306930542, "learning_rate": 6.073765234124439e-07, "loss": 0.8528, "step": 72585 }, { "epoch": 0.8846721021778606, "grad_norm": 2.139940023422241, "learning_rate": 6.070558050032073e-07, "loss": 0.7716, "step": 72590 }, { "epoch": 0.8847330384020085, "grad_norm": 1.903439998626709, "learning_rate": 6.067350865939706e-07, "loss": 0.7872, "step": 72595 }, { "epoch": 0.8847939746261563, "grad_norm": 1.7967756986618042, "learning_rate": 6.064143681847338e-07, "loss": 0.7608, "step": 72600 }, { "epoch": 0.8848549108503041, "grad_norm": 2.186744451522827, "learning_rate": 6.060936497754971e-07, "loss": 0.7762, "step": 72605 }, { "epoch": 0.8849158470744519, "grad_norm": 1.728749394416809, "learning_rate": 6.057729313662605e-07, "loss": 0.8237, "step": 72610 }, { "epoch": 0.8849767832985996, "grad_norm": 1.863913893699646, "learning_rate": 6.054522129570238e-07, "loss": 0.7718, "step": 72615 }, { "epoch": 0.8850377195227475, "grad_norm": 1.9369953870773315, "learning_rate": 6.05131494547787e-07, "loss": 0.7859, "step": 72620 }, { "epoch": 0.8850986557468953, "grad_norm": 1.8860588073730469, "learning_rate": 6.048107761385504e-07, "loss": 0.8286, "step": 72625 }, { "epoch": 0.8851595919710431, "grad_norm": 1.7501164674758911, "learning_rate": 6.044900577293137e-07, "loss": 0.7461, "step": 72630 }, { "epoch": 0.8852205281951909, "grad_norm": 2.330601692199707, "learning_rate": 6.04169339320077e-07, "loss": 0.8334, "step": 72635 }, { "epoch": 0.8852814644193387, "grad_norm": 2.0083165168762207, "learning_rate": 6.038486209108403e-07, "loss": 0.8116, "step": 72640 }, { "epoch": 0.8853424006434866, "grad_norm": 1.8431535959243774, "learning_rate": 6.035279025016036e-07, "loss": 0.8658, "step": 72645 }, { "epoch": 0.8854033368676343, "grad_norm": 1.9617700576782227, "learning_rate": 6.032071840923669e-07, "loss": 0.9049, "step": 72650 }, { "epoch": 0.8854642730917821, "grad_norm": 1.9125688076019287, "learning_rate": 6.028864656831303e-07, "loss": 0.7824, "step": 72655 }, { "epoch": 0.8855252093159299, "grad_norm": 2.144763708114624, "learning_rate": 6.025657472738935e-07, "loss": 0.8596, "step": 72660 }, { "epoch": 0.8855861455400778, "grad_norm": 1.9929877519607544, "learning_rate": 6.02245028864657e-07, "loss": 0.8635, "step": 72665 }, { "epoch": 0.8856470817642256, "grad_norm": 1.9471527338027954, "learning_rate": 6.019243104554202e-07, "loss": 0.8392, "step": 72670 }, { "epoch": 0.8857080179883734, "grad_norm": 1.8425650596618652, "learning_rate": 6.016035920461835e-07, "loss": 0.8611, "step": 72675 }, { "epoch": 0.8857689542125212, "grad_norm": 1.9942604303359985, "learning_rate": 6.012828736369467e-07, "loss": 0.8406, "step": 72680 }, { "epoch": 0.8858298904366689, "grad_norm": 2.012756824493408, "learning_rate": 6.009621552277102e-07, "loss": 0.8209, "step": 72685 }, { "epoch": 0.8858908266608168, "grad_norm": 1.8512500524520874, "learning_rate": 6.006414368184734e-07, "loss": 0.8254, "step": 72690 }, { "epoch": 0.8859517628849646, "grad_norm": 2.0671770572662354, "learning_rate": 6.003207184092367e-07, "loss": 0.7938, "step": 72695 }, { "epoch": 0.8860126991091124, "grad_norm": 1.8312952518463135, "learning_rate": 6.000000000000001e-07, "loss": 0.798, "step": 72700 }, { "epoch": 0.8860736353332602, "grad_norm": 1.8386540412902832, "learning_rate": 5.996792815907634e-07, "loss": 0.8196, "step": 72705 }, { "epoch": 0.886134571557408, "grad_norm": 1.6938002109527588, "learning_rate": 5.993585631815266e-07, "loss": 0.7804, "step": 72710 }, { "epoch": 0.8861955077815559, "grad_norm": 2.599579095840454, "learning_rate": 5.9903784477229e-07, "loss": 0.828, "step": 72715 }, { "epoch": 0.8862564440057036, "grad_norm": 1.9244869947433472, "learning_rate": 5.987171263630533e-07, "loss": 0.835, "step": 72720 }, { "epoch": 0.8863173802298514, "grad_norm": 2.4257285594940186, "learning_rate": 5.983964079538166e-07, "loss": 0.8049, "step": 72725 }, { "epoch": 0.8863783164539992, "grad_norm": 2.1651065349578857, "learning_rate": 5.980756895445799e-07, "loss": 0.7334, "step": 72730 }, { "epoch": 0.886439252678147, "grad_norm": 1.7210599184036255, "learning_rate": 5.977549711353432e-07, "loss": 0.7954, "step": 72735 }, { "epoch": 0.8865001889022949, "grad_norm": 2.214280366897583, "learning_rate": 5.974342527261065e-07, "loss": 0.7304, "step": 72740 }, { "epoch": 0.8865611251264427, "grad_norm": 3.2758021354675293, "learning_rate": 5.971135343168699e-07, "loss": 0.8256, "step": 72745 }, { "epoch": 0.8866220613505905, "grad_norm": 2.2365729808807373, "learning_rate": 5.967928159076331e-07, "loss": 0.7304, "step": 72750 }, { "epoch": 0.8866829975747382, "grad_norm": 1.7974811792373657, "learning_rate": 5.964720974983964e-07, "loss": 0.849, "step": 72755 }, { "epoch": 0.886743933798886, "grad_norm": 2.1150224208831787, "learning_rate": 5.961513790891598e-07, "loss": 0.8529, "step": 72760 }, { "epoch": 0.8868048700230339, "grad_norm": 1.9646722078323364, "learning_rate": 5.958306606799231e-07, "loss": 0.8238, "step": 72765 }, { "epoch": 0.8868658062471817, "grad_norm": 1.9398576021194458, "learning_rate": 5.955099422706863e-07, "loss": 0.7824, "step": 72770 }, { "epoch": 0.8869267424713295, "grad_norm": 1.8733158111572266, "learning_rate": 5.951892238614497e-07, "loss": 0.8609, "step": 72775 }, { "epoch": 0.8869876786954773, "grad_norm": 1.9508434534072876, "learning_rate": 5.94868505452213e-07, "loss": 0.8142, "step": 72780 }, { "epoch": 0.8870486149196252, "grad_norm": 1.8734803199768066, "learning_rate": 5.945477870429763e-07, "loss": 0.8016, "step": 72785 }, { "epoch": 0.8871095511437729, "grad_norm": 2.1317062377929688, "learning_rate": 5.942270686337396e-07, "loss": 0.7018, "step": 72790 }, { "epoch": 0.8871704873679207, "grad_norm": 1.9045442342758179, "learning_rate": 5.939063502245029e-07, "loss": 0.8478, "step": 72795 }, { "epoch": 0.8872314235920685, "grad_norm": 2.5174591541290283, "learning_rate": 5.935856318152662e-07, "loss": 0.8424, "step": 72800 }, { "epoch": 0.8872923598162163, "grad_norm": 1.9520775079727173, "learning_rate": 5.932649134060296e-07, "loss": 0.8407, "step": 72805 }, { "epoch": 0.8873532960403642, "grad_norm": 2.0678815841674805, "learning_rate": 5.929441949967929e-07, "loss": 0.8516, "step": 72810 }, { "epoch": 0.887414232264512, "grad_norm": 2.0608158111572266, "learning_rate": 5.926234765875561e-07, "loss": 0.8343, "step": 72815 }, { "epoch": 0.8874751684886598, "grad_norm": 1.7859582901000977, "learning_rate": 5.923027581783195e-07, "loss": 0.818, "step": 72820 }, { "epoch": 0.8875361047128075, "grad_norm": 2.0598092079162598, "learning_rate": 5.919820397690828e-07, "loss": 0.8098, "step": 72825 }, { "epoch": 0.8875970409369554, "grad_norm": 1.7760361433029175, "learning_rate": 5.916613213598461e-07, "loss": 0.8366, "step": 72830 }, { "epoch": 0.8876579771611032, "grad_norm": 2.0843214988708496, "learning_rate": 5.913406029506094e-07, "loss": 0.7547, "step": 72835 }, { "epoch": 0.887718913385251, "grad_norm": 1.6175639629364014, "learning_rate": 5.910198845413728e-07, "loss": 0.7756, "step": 72840 }, { "epoch": 0.8877798496093988, "grad_norm": 1.9557979106903076, "learning_rate": 5.90699166132136e-07, "loss": 0.7758, "step": 72845 }, { "epoch": 0.8878407858335466, "grad_norm": 1.844849944114685, "learning_rate": 5.903784477228994e-07, "loss": 0.8133, "step": 72850 }, { "epoch": 0.8879017220576945, "grad_norm": 2.287259578704834, "learning_rate": 5.900577293136626e-07, "loss": 0.7918, "step": 72855 }, { "epoch": 0.8879626582818422, "grad_norm": 2.3866913318634033, "learning_rate": 5.89737010904426e-07, "loss": 0.8178, "step": 72860 }, { "epoch": 0.88802359450599, "grad_norm": 2.1398277282714844, "learning_rate": 5.894162924951893e-07, "loss": 0.8413, "step": 72865 }, { "epoch": 0.8880845307301378, "grad_norm": 1.887554407119751, "learning_rate": 5.890955740859526e-07, "loss": 0.7814, "step": 72870 }, { "epoch": 0.8881454669542856, "grad_norm": 1.923253059387207, "learning_rate": 5.887748556767159e-07, "loss": 0.8357, "step": 72875 }, { "epoch": 0.8882064031784335, "grad_norm": 2.1091983318328857, "learning_rate": 5.884541372674793e-07, "loss": 0.7906, "step": 72880 }, { "epoch": 0.8882673394025813, "grad_norm": 1.835794448852539, "learning_rate": 5.881334188582425e-07, "loss": 0.7636, "step": 72885 }, { "epoch": 0.8883282756267291, "grad_norm": 2.113161087036133, "learning_rate": 5.878127004490058e-07, "loss": 0.8209, "step": 72890 }, { "epoch": 0.8883892118508768, "grad_norm": 2.403029203414917, "learning_rate": 5.874919820397692e-07, "loss": 0.7922, "step": 72895 }, { "epoch": 0.8884501480750246, "grad_norm": 1.9901262521743774, "learning_rate": 5.871712636305325e-07, "loss": 0.7811, "step": 72900 }, { "epoch": 0.8885110842991725, "grad_norm": 2.284022092819214, "learning_rate": 5.868505452212957e-07, "loss": 0.7807, "step": 72905 }, { "epoch": 0.8885720205233203, "grad_norm": 2.107729434967041, "learning_rate": 5.865298268120591e-07, "loss": 0.7724, "step": 72910 }, { "epoch": 0.8886329567474681, "grad_norm": 1.8677573204040527, "learning_rate": 5.862091084028224e-07, "loss": 0.7817, "step": 72915 }, { "epoch": 0.8886938929716159, "grad_norm": 2.4244258403778076, "learning_rate": 5.858883899935857e-07, "loss": 0.8118, "step": 72920 }, { "epoch": 0.8887548291957638, "grad_norm": 1.9872970581054688, "learning_rate": 5.85567671584349e-07, "loss": 0.783, "step": 72925 }, { "epoch": 0.8888157654199115, "grad_norm": 1.9201436042785645, "learning_rate": 5.852469531751123e-07, "loss": 0.7837, "step": 72930 }, { "epoch": 0.8888767016440593, "grad_norm": 1.9560097455978394, "learning_rate": 5.849262347658756e-07, "loss": 0.865, "step": 72935 }, { "epoch": 0.8889376378682071, "grad_norm": 1.8169302940368652, "learning_rate": 5.84605516356639e-07, "loss": 0.799, "step": 72940 }, { "epoch": 0.8889985740923549, "grad_norm": 1.8678758144378662, "learning_rate": 5.842847979474022e-07, "loss": 0.8251, "step": 72945 }, { "epoch": 0.8890595103165028, "grad_norm": 2.3228189945220947, "learning_rate": 5.839640795381655e-07, "loss": 0.8343, "step": 72950 }, { "epoch": 0.8891204465406506, "grad_norm": 1.6567715406417847, "learning_rate": 5.836433611289289e-07, "loss": 0.887, "step": 72955 }, { "epoch": 0.8891813827647984, "grad_norm": 1.9358532428741455, "learning_rate": 5.833226427196922e-07, "loss": 0.7928, "step": 72960 }, { "epoch": 0.8892423189889461, "grad_norm": 2.1847171783447266, "learning_rate": 5.830019243104554e-07, "loss": 0.7569, "step": 72965 }, { "epoch": 0.889303255213094, "grad_norm": 1.8806381225585938, "learning_rate": 5.826812059012188e-07, "loss": 0.8119, "step": 72970 }, { "epoch": 0.8893641914372418, "grad_norm": 2.0952696800231934, "learning_rate": 5.823604874919821e-07, "loss": 0.823, "step": 72975 }, { "epoch": 0.8894251276613896, "grad_norm": 1.7805808782577515, "learning_rate": 5.820397690827454e-07, "loss": 0.8522, "step": 72980 }, { "epoch": 0.8894860638855374, "grad_norm": 1.900212287902832, "learning_rate": 5.817190506735087e-07, "loss": 0.7727, "step": 72985 }, { "epoch": 0.8895470001096852, "grad_norm": 1.8868027925491333, "learning_rate": 5.81398332264272e-07, "loss": 0.7661, "step": 72990 }, { "epoch": 0.8896079363338331, "grad_norm": 2.6929585933685303, "learning_rate": 5.810776138550353e-07, "loss": 0.8144, "step": 72995 }, { "epoch": 0.8896688725579808, "grad_norm": 2.0622756481170654, "learning_rate": 5.807568954457987e-07, "loss": 0.8452, "step": 73000 }, { "epoch": 0.8897298087821286, "grad_norm": 1.8752809762954712, "learning_rate": 5.804361770365619e-07, "loss": 0.7974, "step": 73005 }, { "epoch": 0.8897907450062764, "grad_norm": 2.1491968631744385, "learning_rate": 5.801154586273252e-07, "loss": 0.8004, "step": 73010 }, { "epoch": 0.8898516812304242, "grad_norm": 2.1625254154205322, "learning_rate": 5.797947402180886e-07, "loss": 0.8451, "step": 73015 }, { "epoch": 0.8899126174545721, "grad_norm": 1.7046482563018799, "learning_rate": 5.794740218088519e-07, "loss": 0.7275, "step": 73020 }, { "epoch": 0.8899735536787199, "grad_norm": 2.231098175048828, "learning_rate": 5.791533033996151e-07, "loss": 0.855, "step": 73025 }, { "epoch": 0.8900344899028677, "grad_norm": 2.3359644412994385, "learning_rate": 5.788325849903785e-07, "loss": 0.7851, "step": 73030 }, { "epoch": 0.8900954261270154, "grad_norm": 2.1507716178894043, "learning_rate": 5.785118665811418e-07, "loss": 0.844, "step": 73035 }, { "epoch": 0.8901563623511632, "grad_norm": 1.964407205581665, "learning_rate": 5.781911481719051e-07, "loss": 0.8457, "step": 73040 }, { "epoch": 0.8902172985753111, "grad_norm": 1.8482261896133423, "learning_rate": 5.778704297626684e-07, "loss": 0.7796, "step": 73045 }, { "epoch": 0.8902782347994589, "grad_norm": 2.186249256134033, "learning_rate": 5.775497113534318e-07, "loss": 0.7759, "step": 73050 }, { "epoch": 0.8903391710236067, "grad_norm": 2.0919713973999023, "learning_rate": 5.77228992944195e-07, "loss": 0.8237, "step": 73055 }, { "epoch": 0.8904001072477545, "grad_norm": 2.0447916984558105, "learning_rate": 5.769082745349584e-07, "loss": 0.8009, "step": 73060 }, { "epoch": 0.8904610434719024, "grad_norm": 1.802598476409912, "learning_rate": 5.765875561257216e-07, "loss": 0.7939, "step": 73065 }, { "epoch": 0.8905219796960501, "grad_norm": 2.138885021209717, "learning_rate": 5.76266837716485e-07, "loss": 0.857, "step": 73070 }, { "epoch": 0.8905829159201979, "grad_norm": 1.9844787120819092, "learning_rate": 5.759461193072483e-07, "loss": 0.7587, "step": 73075 }, { "epoch": 0.8906438521443457, "grad_norm": 1.9935848712921143, "learning_rate": 5.756254008980116e-07, "loss": 0.8015, "step": 73080 }, { "epoch": 0.8907047883684935, "grad_norm": 1.8594578504562378, "learning_rate": 5.753046824887749e-07, "loss": 0.7897, "step": 73085 }, { "epoch": 0.8907657245926414, "grad_norm": 1.8973699808120728, "learning_rate": 5.749839640795383e-07, "loss": 0.8428, "step": 73090 }, { "epoch": 0.8908266608167892, "grad_norm": 1.8931183815002441, "learning_rate": 5.746632456703015e-07, "loss": 0.8164, "step": 73095 }, { "epoch": 0.890887597040937, "grad_norm": 2.0078792572021484, "learning_rate": 5.743425272610648e-07, "loss": 0.7467, "step": 73100 }, { "epoch": 0.8909485332650847, "grad_norm": 1.836172103881836, "learning_rate": 5.740218088518282e-07, "loss": 0.7983, "step": 73105 }, { "epoch": 0.8910094694892325, "grad_norm": 1.9887224435806274, "learning_rate": 5.737010904425915e-07, "loss": 0.8144, "step": 73110 }, { "epoch": 0.8910704057133804, "grad_norm": 1.8749756813049316, "learning_rate": 5.733803720333548e-07, "loss": 0.7175, "step": 73115 }, { "epoch": 0.8911313419375282, "grad_norm": 1.7558034658432007, "learning_rate": 5.73059653624118e-07, "loss": 0.7464, "step": 73120 }, { "epoch": 0.891192278161676, "grad_norm": 1.7422960996627808, "learning_rate": 5.727389352148814e-07, "loss": 0.7969, "step": 73125 }, { "epoch": 0.8912532143858238, "grad_norm": 1.8785475492477417, "learning_rate": 5.724182168056447e-07, "loss": 0.8347, "step": 73130 }, { "epoch": 0.8913141506099717, "grad_norm": 1.990335464477539, "learning_rate": 5.720974983964081e-07, "loss": 0.8221, "step": 73135 }, { "epoch": 0.8913750868341194, "grad_norm": 2.212296724319458, "learning_rate": 5.717767799871713e-07, "loss": 0.7807, "step": 73140 }, { "epoch": 0.8914360230582672, "grad_norm": 1.7455476522445679, "learning_rate": 5.714560615779346e-07, "loss": 0.7974, "step": 73145 }, { "epoch": 0.891496959282415, "grad_norm": 2.006110906600952, "learning_rate": 5.71135343168698e-07, "loss": 0.8858, "step": 73150 }, { "epoch": 0.8915578955065628, "grad_norm": 2.2548036575317383, "learning_rate": 5.708146247594613e-07, "loss": 0.8363, "step": 73155 }, { "epoch": 0.8916188317307107, "grad_norm": 1.7462245225906372, "learning_rate": 5.704939063502245e-07, "loss": 0.7642, "step": 73160 }, { "epoch": 0.8916797679548585, "grad_norm": 2.0274810791015625, "learning_rate": 5.701731879409878e-07, "loss": 0.8985, "step": 73165 }, { "epoch": 0.8917407041790063, "grad_norm": 1.8106356859207153, "learning_rate": 5.698524695317512e-07, "loss": 0.7827, "step": 73170 }, { "epoch": 0.891801640403154, "grad_norm": 2.2139339447021484, "learning_rate": 5.695317511225145e-07, "loss": 0.7095, "step": 73175 }, { "epoch": 0.8918625766273018, "grad_norm": 2.4101905822753906, "learning_rate": 5.692110327132777e-07, "loss": 0.809, "step": 73180 }, { "epoch": 0.8919235128514497, "grad_norm": 1.8006287813186646, "learning_rate": 5.688903143040411e-07, "loss": 0.8256, "step": 73185 }, { "epoch": 0.8919844490755975, "grad_norm": 2.292896270751953, "learning_rate": 5.685695958948044e-07, "loss": 0.8176, "step": 73190 }, { "epoch": 0.8920453852997453, "grad_norm": 1.5944814682006836, "learning_rate": 5.682488774855678e-07, "loss": 0.7848, "step": 73195 }, { "epoch": 0.8921063215238931, "grad_norm": 1.6712433099746704, "learning_rate": 5.67928159076331e-07, "loss": 0.7835, "step": 73200 }, { "epoch": 0.892167257748041, "grad_norm": 2.2068021297454834, "learning_rate": 5.676074406670943e-07, "loss": 0.8204, "step": 73205 }, { "epoch": 0.8922281939721887, "grad_norm": 1.934643268585205, "learning_rate": 5.672867222578576e-07, "loss": 0.8064, "step": 73210 }, { "epoch": 0.8922891301963365, "grad_norm": 1.7809606790542603, "learning_rate": 5.66966003848621e-07, "loss": 0.7671, "step": 73215 }, { "epoch": 0.8923500664204843, "grad_norm": 2.572373628616333, "learning_rate": 5.666452854393842e-07, "loss": 0.7349, "step": 73220 }, { "epoch": 0.8924110026446321, "grad_norm": 2.1824851036071777, "learning_rate": 5.663245670301477e-07, "loss": 0.8299, "step": 73225 }, { "epoch": 0.89247193886878, "grad_norm": 2.133030891418457, "learning_rate": 5.660038486209109e-07, "loss": 0.8063, "step": 73230 }, { "epoch": 0.8925328750929278, "grad_norm": 1.6769611835479736, "learning_rate": 5.656831302116742e-07, "loss": 0.7807, "step": 73235 }, { "epoch": 0.8925938113170756, "grad_norm": 1.9779549837112427, "learning_rate": 5.653624118024374e-07, "loss": 0.8637, "step": 73240 }, { "epoch": 0.8926547475412233, "grad_norm": 2.035893201828003, "learning_rate": 5.650416933932009e-07, "loss": 0.7596, "step": 73245 }, { "epoch": 0.8927156837653711, "grad_norm": 1.7201391458511353, "learning_rate": 5.647209749839641e-07, "loss": 0.8703, "step": 73250 }, { "epoch": 0.892776619989519, "grad_norm": 1.6280503273010254, "learning_rate": 5.644002565747274e-07, "loss": 0.8237, "step": 73255 }, { "epoch": 0.8928375562136668, "grad_norm": 2.354557991027832, "learning_rate": 5.640795381654908e-07, "loss": 0.8076, "step": 73260 }, { "epoch": 0.8928984924378146, "grad_norm": 2.2738356590270996, "learning_rate": 5.637588197562541e-07, "loss": 0.8276, "step": 73265 }, { "epoch": 0.8929594286619624, "grad_norm": 1.8008075952529907, "learning_rate": 5.634381013470173e-07, "loss": 0.838, "step": 73270 }, { "epoch": 0.8930203648861103, "grad_norm": 1.8825956583023071, "learning_rate": 5.631173829377807e-07, "loss": 0.7912, "step": 73275 }, { "epoch": 0.893081301110258, "grad_norm": 1.9902396202087402, "learning_rate": 5.62796664528544e-07, "loss": 0.8236, "step": 73280 }, { "epoch": 0.8931422373344058, "grad_norm": 2.16884446144104, "learning_rate": 5.624759461193073e-07, "loss": 0.7982, "step": 73285 }, { "epoch": 0.8932031735585536, "grad_norm": 1.7868002653121948, "learning_rate": 5.621552277100706e-07, "loss": 0.7085, "step": 73290 }, { "epoch": 0.8932641097827014, "grad_norm": 1.7966951131820679, "learning_rate": 5.618345093008339e-07, "loss": 0.7659, "step": 73295 }, { "epoch": 0.8933250460068493, "grad_norm": 2.23327898979187, "learning_rate": 5.615137908915972e-07, "loss": 0.7873, "step": 73300 }, { "epoch": 0.8933859822309971, "grad_norm": 2.1087729930877686, "learning_rate": 5.611930724823606e-07, "loss": 0.7713, "step": 73305 }, { "epoch": 0.8934469184551449, "grad_norm": 2.05299711227417, "learning_rate": 5.608723540731238e-07, "loss": 0.8453, "step": 73310 }, { "epoch": 0.8935078546792926, "grad_norm": 1.9450302124023438, "learning_rate": 5.605516356638871e-07, "loss": 0.8491, "step": 73315 }, { "epoch": 0.8935687909034404, "grad_norm": 2.1143205165863037, "learning_rate": 5.602309172546505e-07, "loss": 0.8482, "step": 73320 }, { "epoch": 0.8936297271275883, "grad_norm": 1.9385874271392822, "learning_rate": 5.599101988454138e-07, "loss": 0.8228, "step": 73325 }, { "epoch": 0.8936906633517361, "grad_norm": 1.963870882987976, "learning_rate": 5.59589480436177e-07, "loss": 0.8131, "step": 73330 }, { "epoch": 0.8937515995758839, "grad_norm": 1.9315115213394165, "learning_rate": 5.592687620269404e-07, "loss": 0.8272, "step": 73335 }, { "epoch": 0.8938125358000317, "grad_norm": 1.789259433746338, "learning_rate": 5.589480436177037e-07, "loss": 0.7883, "step": 73340 }, { "epoch": 0.8938734720241794, "grad_norm": 2.2358455657958984, "learning_rate": 5.58627325208467e-07, "loss": 0.802, "step": 73345 }, { "epoch": 0.8939344082483273, "grad_norm": 2.1156413555145264, "learning_rate": 5.583066067992303e-07, "loss": 0.8396, "step": 73350 }, { "epoch": 0.8939953444724751, "grad_norm": 1.8358345031738281, "learning_rate": 5.579858883899936e-07, "loss": 0.7495, "step": 73355 }, { "epoch": 0.8940562806966229, "grad_norm": 1.6468844413757324, "learning_rate": 5.576651699807569e-07, "loss": 0.8318, "step": 73360 }, { "epoch": 0.8941172169207707, "grad_norm": 2.1897637844085693, "learning_rate": 5.573444515715203e-07, "loss": 0.8306, "step": 73365 }, { "epoch": 0.8941781531449186, "grad_norm": 1.9533298015594482, "learning_rate": 5.570237331622835e-07, "loss": 0.8508, "step": 73370 }, { "epoch": 0.8942390893690664, "grad_norm": 1.6038388013839722, "learning_rate": 5.567030147530468e-07, "loss": 0.774, "step": 73375 }, { "epoch": 0.8943000255932141, "grad_norm": 1.6376898288726807, "learning_rate": 5.563822963438102e-07, "loss": 0.7577, "step": 73380 }, { "epoch": 0.8943609618173619, "grad_norm": 2.3676466941833496, "learning_rate": 5.560615779345735e-07, "loss": 0.7609, "step": 73385 }, { "epoch": 0.8944218980415097, "grad_norm": 2.3323822021484375, "learning_rate": 5.557408595253367e-07, "loss": 0.7743, "step": 73390 }, { "epoch": 0.8944828342656576, "grad_norm": 1.8133882284164429, "learning_rate": 5.554201411161001e-07, "loss": 0.7971, "step": 73395 }, { "epoch": 0.8945437704898054, "grad_norm": 1.9310319423675537, "learning_rate": 5.550994227068634e-07, "loss": 0.7633, "step": 73400 }, { "epoch": 0.8946047067139532, "grad_norm": 2.1580419540405273, "learning_rate": 5.547787042976267e-07, "loss": 0.8315, "step": 73405 }, { "epoch": 0.894665642938101, "grad_norm": 2.136509895324707, "learning_rate": 5.5445798588839e-07, "loss": 0.8364, "step": 73410 }, { "epoch": 0.8947265791622487, "grad_norm": 1.6964466571807861, "learning_rate": 5.541372674791533e-07, "loss": 0.7946, "step": 73415 }, { "epoch": 0.8947875153863966, "grad_norm": 1.9560202360153198, "learning_rate": 5.538165490699167e-07, "loss": 0.8119, "step": 73420 }, { "epoch": 0.8948484516105444, "grad_norm": 1.9253135919570923, "learning_rate": 5.5349583066068e-07, "loss": 0.8372, "step": 73425 }, { "epoch": 0.8949093878346922, "grad_norm": 1.8204630613327026, "learning_rate": 5.531751122514433e-07, "loss": 0.832, "step": 73430 }, { "epoch": 0.89497032405884, "grad_norm": 1.993808388710022, "learning_rate": 5.528543938422066e-07, "loss": 0.852, "step": 73435 }, { "epoch": 0.8950312602829879, "grad_norm": 1.8660074472427368, "learning_rate": 5.5253367543297e-07, "loss": 0.89, "step": 73440 }, { "epoch": 0.8950921965071357, "grad_norm": 1.6952108144760132, "learning_rate": 5.522129570237332e-07, "loss": 0.7959, "step": 73445 }, { "epoch": 0.8951531327312834, "grad_norm": 1.8690770864486694, "learning_rate": 5.518922386144965e-07, "loss": 0.7493, "step": 73450 }, { "epoch": 0.8952140689554312, "grad_norm": 1.8120628595352173, "learning_rate": 5.515715202052599e-07, "loss": 0.7825, "step": 73455 }, { "epoch": 0.895275005179579, "grad_norm": 2.1741814613342285, "learning_rate": 5.512508017960232e-07, "loss": 0.852, "step": 73460 }, { "epoch": 0.8953359414037269, "grad_norm": 2.2762534618377686, "learning_rate": 5.509300833867864e-07, "loss": 0.7676, "step": 73465 }, { "epoch": 0.8953968776278747, "grad_norm": 1.8983865976333618, "learning_rate": 5.506093649775498e-07, "loss": 0.8337, "step": 73470 }, { "epoch": 0.8954578138520225, "grad_norm": 1.7446571588516235, "learning_rate": 5.502886465683131e-07, "loss": 0.8073, "step": 73475 }, { "epoch": 0.8955187500761703, "grad_norm": 1.9026670455932617, "learning_rate": 5.499679281590764e-07, "loss": 0.7813, "step": 73480 }, { "epoch": 0.895579686300318, "grad_norm": 1.8996069431304932, "learning_rate": 5.496472097498397e-07, "loss": 0.8053, "step": 73485 }, { "epoch": 0.8956406225244659, "grad_norm": 1.9705086946487427, "learning_rate": 5.49326491340603e-07, "loss": 0.8222, "step": 73490 }, { "epoch": 0.8957015587486137, "grad_norm": 2.032999277114868, "learning_rate": 5.490057729313663e-07, "loss": 0.8403, "step": 73495 }, { "epoch": 0.8957624949727615, "grad_norm": 1.799707055091858, "learning_rate": 5.486850545221297e-07, "loss": 0.8341, "step": 73500 }, { "epoch": 0.8958234311969093, "grad_norm": 1.8075382709503174, "learning_rate": 5.483643361128929e-07, "loss": 0.7535, "step": 73505 }, { "epoch": 0.8958843674210571, "grad_norm": 1.6201030015945435, "learning_rate": 5.480436177036562e-07, "loss": 0.7792, "step": 73510 }, { "epoch": 0.895945303645205, "grad_norm": 2.074301242828369, "learning_rate": 5.477228992944196e-07, "loss": 0.8049, "step": 73515 }, { "epoch": 0.8960062398693527, "grad_norm": 1.7445764541625977, "learning_rate": 5.474021808851829e-07, "loss": 0.8556, "step": 73520 }, { "epoch": 0.8960671760935005, "grad_norm": 1.851446509361267, "learning_rate": 5.470814624759461e-07, "loss": 0.8747, "step": 73525 }, { "epoch": 0.8961281123176483, "grad_norm": 1.8841440677642822, "learning_rate": 5.467607440667095e-07, "loss": 0.7746, "step": 73530 }, { "epoch": 0.8961890485417962, "grad_norm": 2.2058541774749756, "learning_rate": 5.464400256574728e-07, "loss": 0.777, "step": 73535 }, { "epoch": 0.896249984765944, "grad_norm": 2.148926019668579, "learning_rate": 5.461193072482361e-07, "loss": 0.7132, "step": 73540 }, { "epoch": 0.8963109209900918, "grad_norm": 2.0640764236450195, "learning_rate": 5.457985888389994e-07, "loss": 0.8245, "step": 73545 }, { "epoch": 0.8963718572142396, "grad_norm": 2.2804954051971436, "learning_rate": 5.454778704297627e-07, "loss": 0.7427, "step": 73550 }, { "epoch": 0.8964327934383873, "grad_norm": 1.890217900276184, "learning_rate": 5.45157152020526e-07, "loss": 0.8345, "step": 73555 }, { "epoch": 0.8964937296625352, "grad_norm": 2.004441976547241, "learning_rate": 5.448364336112894e-07, "loss": 0.7214, "step": 73560 }, { "epoch": 0.896554665886683, "grad_norm": 2.443129301071167, "learning_rate": 5.445157152020526e-07, "loss": 0.8181, "step": 73565 }, { "epoch": 0.8966156021108308, "grad_norm": 1.7483197450637817, "learning_rate": 5.441949967928159e-07, "loss": 0.8093, "step": 73570 }, { "epoch": 0.8966765383349786, "grad_norm": 1.8766433000564575, "learning_rate": 5.438742783835793e-07, "loss": 0.7865, "step": 73575 }, { "epoch": 0.8967374745591264, "grad_norm": 1.8198497295379639, "learning_rate": 5.435535599743426e-07, "loss": 0.7588, "step": 73580 }, { "epoch": 0.8967984107832743, "grad_norm": 1.8545805215835571, "learning_rate": 5.432328415651058e-07, "loss": 0.7426, "step": 73585 }, { "epoch": 0.896859347007422, "grad_norm": 1.8531365394592285, "learning_rate": 5.429121231558692e-07, "loss": 0.7943, "step": 73590 }, { "epoch": 0.8969202832315698, "grad_norm": 2.0651791095733643, "learning_rate": 5.425914047466325e-07, "loss": 0.7639, "step": 73595 }, { "epoch": 0.8969812194557176, "grad_norm": 2.1224288940429688, "learning_rate": 5.422706863373958e-07, "loss": 0.7806, "step": 73600 }, { "epoch": 0.8970421556798654, "grad_norm": 1.8471972942352295, "learning_rate": 5.419499679281591e-07, "loss": 0.7669, "step": 73605 }, { "epoch": 0.8971030919040133, "grad_norm": 2.1807804107666016, "learning_rate": 5.416292495189225e-07, "loss": 0.7532, "step": 73610 }, { "epoch": 0.8971640281281611, "grad_norm": 2.075620412826538, "learning_rate": 5.413085311096857e-07, "loss": 0.8535, "step": 73615 }, { "epoch": 0.8972249643523089, "grad_norm": 2.276031732559204, "learning_rate": 5.409878127004491e-07, "loss": 0.808, "step": 73620 }, { "epoch": 0.8972859005764566, "grad_norm": 2.365870475769043, "learning_rate": 5.406670942912123e-07, "loss": 0.7701, "step": 73625 }, { "epoch": 0.8973468368006045, "grad_norm": 2.251286506652832, "learning_rate": 5.403463758819757e-07, "loss": 0.8806, "step": 73630 }, { "epoch": 0.8974077730247523, "grad_norm": 2.212167263031006, "learning_rate": 5.40025657472739e-07, "loss": 0.8533, "step": 73635 }, { "epoch": 0.8974687092489001, "grad_norm": 2.2327916622161865, "learning_rate": 5.397049390635023e-07, "loss": 0.7464, "step": 73640 }, { "epoch": 0.8975296454730479, "grad_norm": 1.8194348812103271, "learning_rate": 5.393842206542656e-07, "loss": 0.7447, "step": 73645 }, { "epoch": 0.8975905816971957, "grad_norm": 2.0667898654937744, "learning_rate": 5.39063502245029e-07, "loss": 0.8178, "step": 73650 }, { "epoch": 0.8976515179213436, "grad_norm": 1.9761462211608887, "learning_rate": 5.387427838357922e-07, "loss": 0.7874, "step": 73655 }, { "epoch": 0.8977124541454913, "grad_norm": 2.337143898010254, "learning_rate": 5.384220654265555e-07, "loss": 0.8376, "step": 73660 }, { "epoch": 0.8977733903696391, "grad_norm": 2.127816677093506, "learning_rate": 5.381013470173189e-07, "loss": 0.7827, "step": 73665 }, { "epoch": 0.8978343265937869, "grad_norm": 1.9230397939682007, "learning_rate": 5.377806286080822e-07, "loss": 0.7691, "step": 73670 }, { "epoch": 0.8978952628179347, "grad_norm": 2.3871943950653076, "learning_rate": 5.374599101988454e-07, "loss": 0.7989, "step": 73675 }, { "epoch": 0.8979561990420826, "grad_norm": 2.1827805042266846, "learning_rate": 5.371391917896088e-07, "loss": 0.8607, "step": 73680 }, { "epoch": 0.8980171352662304, "grad_norm": 1.8081303834915161, "learning_rate": 5.368184733803721e-07, "loss": 0.8276, "step": 73685 }, { "epoch": 0.8980780714903782, "grad_norm": 1.847135066986084, "learning_rate": 5.364977549711354e-07, "loss": 0.7989, "step": 73690 }, { "epoch": 0.8981390077145259, "grad_norm": 1.973780870437622, "learning_rate": 5.361770365618987e-07, "loss": 0.7959, "step": 73695 }, { "epoch": 0.8981999439386738, "grad_norm": 1.8412120342254639, "learning_rate": 5.35856318152662e-07, "loss": 0.8326, "step": 73700 }, { "epoch": 0.8982608801628216, "grad_norm": 1.905024528503418, "learning_rate": 5.355355997434253e-07, "loss": 0.8601, "step": 73705 }, { "epoch": 0.8983218163869694, "grad_norm": 1.7365435361862183, "learning_rate": 5.352148813341887e-07, "loss": 0.804, "step": 73710 }, { "epoch": 0.8983827526111172, "grad_norm": 1.9797253608703613, "learning_rate": 5.348941629249519e-07, "loss": 0.7888, "step": 73715 }, { "epoch": 0.898443688835265, "grad_norm": 2.2045934200286865, "learning_rate": 5.345734445157152e-07, "loss": 0.8429, "step": 73720 }, { "epoch": 0.8985046250594129, "grad_norm": 2.423903703689575, "learning_rate": 5.342527261064786e-07, "loss": 0.7809, "step": 73725 }, { "epoch": 0.8985655612835606, "grad_norm": 2.410875082015991, "learning_rate": 5.339320076972419e-07, "loss": 0.853, "step": 73730 }, { "epoch": 0.8986264975077084, "grad_norm": 1.964717149734497, "learning_rate": 5.336112892880052e-07, "loss": 0.8668, "step": 73735 }, { "epoch": 0.8986874337318562, "grad_norm": 2.2613580226898193, "learning_rate": 5.332905708787684e-07, "loss": 0.8016, "step": 73740 }, { "epoch": 0.898748369956004, "grad_norm": 2.1756296157836914, "learning_rate": 5.329698524695318e-07, "loss": 0.8068, "step": 73745 }, { "epoch": 0.8988093061801519, "grad_norm": 1.9572371244430542, "learning_rate": 5.326491340602951e-07, "loss": 0.7687, "step": 73750 }, { "epoch": 0.8988702424042997, "grad_norm": 2.3274271488189697, "learning_rate": 5.323284156510585e-07, "loss": 0.8486, "step": 73755 }, { "epoch": 0.8989311786284475, "grad_norm": 1.9241547584533691, "learning_rate": 5.320076972418217e-07, "loss": 0.8034, "step": 73760 }, { "epoch": 0.8989921148525952, "grad_norm": 1.9170295000076294, "learning_rate": 5.31686978832585e-07, "loss": 0.753, "step": 73765 }, { "epoch": 0.899053051076743, "grad_norm": 2.0617713928222656, "learning_rate": 5.313662604233483e-07, "loss": 0.8526, "step": 73770 }, { "epoch": 0.8991139873008909, "grad_norm": 1.7743589878082275, "learning_rate": 5.310455420141117e-07, "loss": 0.7978, "step": 73775 }, { "epoch": 0.8991749235250387, "grad_norm": 2.172694206237793, "learning_rate": 5.307248236048749e-07, "loss": 0.828, "step": 73780 }, { "epoch": 0.8992358597491865, "grad_norm": 1.879430890083313, "learning_rate": 5.304041051956384e-07, "loss": 0.7243, "step": 73785 }, { "epoch": 0.8992967959733343, "grad_norm": 1.6311256885528564, "learning_rate": 5.300833867864016e-07, "loss": 0.7713, "step": 73790 }, { "epoch": 0.8993577321974822, "grad_norm": 2.0312323570251465, "learning_rate": 5.297626683771649e-07, "loss": 0.7605, "step": 73795 }, { "epoch": 0.8994186684216299, "grad_norm": 2.1886258125305176, "learning_rate": 5.294419499679281e-07, "loss": 0.846, "step": 73800 }, { "epoch": 0.8994796046457777, "grad_norm": 1.8517177104949951, "learning_rate": 5.291212315586916e-07, "loss": 0.7656, "step": 73805 }, { "epoch": 0.8995405408699255, "grad_norm": 1.95193350315094, "learning_rate": 5.288005131494548e-07, "loss": 0.7706, "step": 73810 }, { "epoch": 0.8996014770940733, "grad_norm": 1.7853813171386719, "learning_rate": 5.284797947402181e-07, "loss": 0.7708, "step": 73815 }, { "epoch": 0.8996624133182212, "grad_norm": 1.8633207082748413, "learning_rate": 5.281590763309815e-07, "loss": 0.8706, "step": 73820 }, { "epoch": 0.899723349542369, "grad_norm": 2.299635887145996, "learning_rate": 5.278383579217448e-07, "loss": 0.8293, "step": 73825 }, { "epoch": 0.8997842857665168, "grad_norm": 1.7590789794921875, "learning_rate": 5.27517639512508e-07, "loss": 0.7793, "step": 73830 }, { "epoch": 0.8998452219906645, "grad_norm": 2.2917466163635254, "learning_rate": 5.271969211032714e-07, "loss": 0.8612, "step": 73835 }, { "epoch": 0.8999061582148123, "grad_norm": 1.8157062530517578, "learning_rate": 5.268762026940347e-07, "loss": 0.8856, "step": 73840 }, { "epoch": 0.8999670944389602, "grad_norm": 1.8782634735107422, "learning_rate": 5.26555484284798e-07, "loss": 0.8225, "step": 73845 }, { "epoch": 0.900028030663108, "grad_norm": 2.075427770614624, "learning_rate": 5.262347658755613e-07, "loss": 0.8348, "step": 73850 }, { "epoch": 0.9000889668872558, "grad_norm": 2.118701696395874, "learning_rate": 5.259140474663246e-07, "loss": 0.7765, "step": 73855 }, { "epoch": 0.9001499031114036, "grad_norm": 1.6948118209838867, "learning_rate": 5.255933290570879e-07, "loss": 0.7763, "step": 73860 }, { "epoch": 0.9002108393355515, "grad_norm": 2.032489061355591, "learning_rate": 5.252726106478513e-07, "loss": 0.8141, "step": 73865 }, { "epoch": 0.9002717755596992, "grad_norm": 2.1704351902008057, "learning_rate": 5.249518922386145e-07, "loss": 0.8497, "step": 73870 }, { "epoch": 0.900332711783847, "grad_norm": 1.9301624298095703, "learning_rate": 5.246311738293778e-07, "loss": 0.8072, "step": 73875 }, { "epoch": 0.9003936480079948, "grad_norm": 2.116241931915283, "learning_rate": 5.243104554201412e-07, "loss": 0.8342, "step": 73880 }, { "epoch": 0.9004545842321426, "grad_norm": 2.0763354301452637, "learning_rate": 5.239897370109045e-07, "loss": 0.7559, "step": 73885 }, { "epoch": 0.9005155204562905, "grad_norm": 1.9821200370788574, "learning_rate": 5.236690186016677e-07, "loss": 0.8735, "step": 73890 }, { "epoch": 0.9005764566804383, "grad_norm": 1.8534023761749268, "learning_rate": 5.233483001924311e-07, "loss": 0.8201, "step": 73895 }, { "epoch": 0.9006373929045861, "grad_norm": 2.49881911277771, "learning_rate": 5.230275817831944e-07, "loss": 0.7838, "step": 73900 }, { "epoch": 0.9006983291287338, "grad_norm": 1.9224318265914917, "learning_rate": 5.227068633739577e-07, "loss": 0.7972, "step": 73905 }, { "epoch": 0.9007592653528816, "grad_norm": 1.9657336473464966, "learning_rate": 5.22386144964721e-07, "loss": 0.7925, "step": 73910 }, { "epoch": 0.9008202015770295, "grad_norm": 2.1816086769104004, "learning_rate": 5.220654265554843e-07, "loss": 0.8341, "step": 73915 }, { "epoch": 0.9008811378011773, "grad_norm": 1.7237974405288696, "learning_rate": 5.217447081462476e-07, "loss": 0.8062, "step": 73920 }, { "epoch": 0.9009420740253251, "grad_norm": 1.8974759578704834, "learning_rate": 5.21423989737011e-07, "loss": 0.7766, "step": 73925 }, { "epoch": 0.9010030102494729, "grad_norm": 1.9047027826309204, "learning_rate": 5.211032713277742e-07, "loss": 0.7608, "step": 73930 }, { "epoch": 0.9010639464736208, "grad_norm": 1.8319036960601807, "learning_rate": 5.207825529185375e-07, "loss": 0.8231, "step": 73935 }, { "epoch": 0.9011248826977685, "grad_norm": 1.734179139137268, "learning_rate": 5.204618345093009e-07, "loss": 0.8202, "step": 73940 }, { "epoch": 0.9011858189219163, "grad_norm": 1.7993203401565552, "learning_rate": 5.201411161000642e-07, "loss": 0.7036, "step": 73945 }, { "epoch": 0.9012467551460641, "grad_norm": 1.9368990659713745, "learning_rate": 5.198203976908274e-07, "loss": 0.7447, "step": 73950 }, { "epoch": 0.9013076913702119, "grad_norm": 2.214986562728882, "learning_rate": 5.194996792815908e-07, "loss": 0.8153, "step": 73955 }, { "epoch": 0.9013686275943598, "grad_norm": 2.119659900665283, "learning_rate": 5.191789608723541e-07, "loss": 0.8734, "step": 73960 }, { "epoch": 0.9014295638185076, "grad_norm": 1.7495250701904297, "learning_rate": 5.188582424631174e-07, "loss": 0.8196, "step": 73965 }, { "epoch": 0.9014905000426554, "grad_norm": 2.0420918464660645, "learning_rate": 5.185375240538807e-07, "loss": 0.8599, "step": 73970 }, { "epoch": 0.9015514362668031, "grad_norm": 2.6353304386138916, "learning_rate": 5.18216805644644e-07, "loss": 0.7846, "step": 73975 }, { "epoch": 0.9016123724909509, "grad_norm": 1.656790018081665, "learning_rate": 5.178960872354073e-07, "loss": 0.7349, "step": 73980 }, { "epoch": 0.9016733087150988, "grad_norm": 1.7529464960098267, "learning_rate": 5.175753688261707e-07, "loss": 0.7943, "step": 73985 }, { "epoch": 0.9017342449392466, "grad_norm": 2.1367685794830322, "learning_rate": 5.172546504169339e-07, "loss": 0.7963, "step": 73990 }, { "epoch": 0.9017951811633944, "grad_norm": 2.0116958618164062, "learning_rate": 5.169339320076973e-07, "loss": 0.8102, "step": 73995 }, { "epoch": 0.9018561173875422, "grad_norm": 1.6117011308670044, "learning_rate": 5.166132135984606e-07, "loss": 0.8078, "step": 74000 }, { "epoch": 0.9019170536116901, "grad_norm": 2.9693737030029297, "learning_rate": 5.162924951892239e-07, "loss": 0.9063, "step": 74005 }, { "epoch": 0.9019779898358378, "grad_norm": 1.9628609418869019, "learning_rate": 5.159717767799871e-07, "loss": 0.7861, "step": 74010 }, { "epoch": 0.9020389260599856, "grad_norm": 1.922402024269104, "learning_rate": 5.156510583707506e-07, "loss": 0.8152, "step": 74015 }, { "epoch": 0.9020998622841334, "grad_norm": 2.2395150661468506, "learning_rate": 5.153303399615139e-07, "loss": 0.7898, "step": 74020 }, { "epoch": 0.9021607985082812, "grad_norm": 2.2887277603149414, "learning_rate": 5.150096215522771e-07, "loss": 0.8508, "step": 74025 }, { "epoch": 0.9022217347324291, "grad_norm": 1.6470121145248413, "learning_rate": 5.146889031430405e-07, "loss": 0.7955, "step": 74030 }, { "epoch": 0.9022826709565769, "grad_norm": 2.3348612785339355, "learning_rate": 5.143681847338038e-07, "loss": 0.8083, "step": 74035 }, { "epoch": 0.9023436071807247, "grad_norm": 2.7023696899414062, "learning_rate": 5.140474663245671e-07, "loss": 0.7683, "step": 74040 }, { "epoch": 0.9024045434048724, "grad_norm": 2.708103895187378, "learning_rate": 5.137267479153304e-07, "loss": 0.8372, "step": 74045 }, { "epoch": 0.9024654796290202, "grad_norm": 1.9278364181518555, "learning_rate": 5.134060295060937e-07, "loss": 0.8753, "step": 74050 }, { "epoch": 0.9025264158531681, "grad_norm": 1.833001732826233, "learning_rate": 5.13085311096857e-07, "loss": 0.7596, "step": 74055 }, { "epoch": 0.9025873520773159, "grad_norm": 1.741561770439148, "learning_rate": 5.127645926876204e-07, "loss": 0.8475, "step": 74060 }, { "epoch": 0.9026482883014637, "grad_norm": 1.7509299516677856, "learning_rate": 5.124438742783836e-07, "loss": 0.7939, "step": 74065 }, { "epoch": 0.9027092245256115, "grad_norm": 2.2014718055725098, "learning_rate": 5.121231558691469e-07, "loss": 0.8626, "step": 74070 }, { "epoch": 0.9027701607497594, "grad_norm": 3.492626428604126, "learning_rate": 5.118024374599103e-07, "loss": 0.8257, "step": 74075 }, { "epoch": 0.9028310969739071, "grad_norm": 2.074357509613037, "learning_rate": 5.114817190506736e-07, "loss": 0.8375, "step": 74080 }, { "epoch": 0.9028920331980549, "grad_norm": 1.7254475355148315, "learning_rate": 5.111610006414368e-07, "loss": 0.8374, "step": 74085 }, { "epoch": 0.9029529694222027, "grad_norm": 2.04689359664917, "learning_rate": 5.108402822322002e-07, "loss": 0.8215, "step": 74090 }, { "epoch": 0.9030139056463505, "grad_norm": 2.0174007415771484, "learning_rate": 5.105195638229635e-07, "loss": 0.7664, "step": 74095 }, { "epoch": 0.9030748418704984, "grad_norm": 1.9999037981033325, "learning_rate": 5.101988454137268e-07, "loss": 0.7748, "step": 74100 }, { "epoch": 0.9031357780946462, "grad_norm": 1.8137203454971313, "learning_rate": 5.098781270044901e-07, "loss": 0.8083, "step": 74105 }, { "epoch": 0.903196714318794, "grad_norm": 2.128617525100708, "learning_rate": 5.095574085952534e-07, "loss": 0.8424, "step": 74110 }, { "epoch": 0.9032576505429417, "grad_norm": 1.7433745861053467, "learning_rate": 5.092366901860167e-07, "loss": 0.7577, "step": 74115 }, { "epoch": 0.9033185867670895, "grad_norm": 1.7280755043029785, "learning_rate": 5.089159717767801e-07, "loss": 0.8167, "step": 74120 }, { "epoch": 0.9033795229912374, "grad_norm": 1.978182077407837, "learning_rate": 5.085952533675433e-07, "loss": 0.7932, "step": 74125 }, { "epoch": 0.9034404592153852, "grad_norm": 2.046635150909424, "learning_rate": 5.082745349583066e-07, "loss": 0.8201, "step": 74130 }, { "epoch": 0.903501395439533, "grad_norm": 1.6473619937896729, "learning_rate": 5.0795381654907e-07, "loss": 0.8125, "step": 74135 }, { "epoch": 0.9035623316636808, "grad_norm": 1.9224345684051514, "learning_rate": 5.076330981398333e-07, "loss": 0.8918, "step": 74140 }, { "epoch": 0.9036232678878287, "grad_norm": 1.883208155632019, "learning_rate": 5.073123797305965e-07, "loss": 0.765, "step": 74145 }, { "epoch": 0.9036842041119764, "grad_norm": 1.9419151544570923, "learning_rate": 5.069916613213599e-07, "loss": 0.7818, "step": 74150 }, { "epoch": 0.9037451403361242, "grad_norm": 1.727829098701477, "learning_rate": 5.066709429121232e-07, "loss": 0.7867, "step": 74155 }, { "epoch": 0.903806076560272, "grad_norm": 1.6277085542678833, "learning_rate": 5.063502245028865e-07, "loss": 0.857, "step": 74160 }, { "epoch": 0.9038670127844198, "grad_norm": 1.9885190725326538, "learning_rate": 5.060295060936498e-07, "loss": 0.7827, "step": 74165 }, { "epoch": 0.9039279490085677, "grad_norm": 2.0495266914367676, "learning_rate": 5.057087876844132e-07, "loss": 0.8319, "step": 74170 }, { "epoch": 0.9039888852327155, "grad_norm": 2.164870023727417, "learning_rate": 5.053880692751764e-07, "loss": 0.8067, "step": 74175 }, { "epoch": 0.9040498214568633, "grad_norm": 2.032040596008301, "learning_rate": 5.050673508659398e-07, "loss": 0.8029, "step": 74180 }, { "epoch": 0.904110757681011, "grad_norm": 2.269559621810913, "learning_rate": 5.04746632456703e-07, "loss": 0.793, "step": 74185 }, { "epoch": 0.9041716939051588, "grad_norm": 2.0206542015075684, "learning_rate": 5.044259140474664e-07, "loss": 0.7535, "step": 74190 }, { "epoch": 0.9042326301293067, "grad_norm": 2.0923585891723633, "learning_rate": 5.041051956382297e-07, "loss": 0.8895, "step": 74195 }, { "epoch": 0.9042935663534545, "grad_norm": 2.055694818496704, "learning_rate": 5.03784477228993e-07, "loss": 0.809, "step": 74200 }, { "epoch": 0.9043545025776023, "grad_norm": 2.0199286937713623, "learning_rate": 5.034637588197563e-07, "loss": 0.8301, "step": 74205 }, { "epoch": 0.9044154388017501, "grad_norm": 1.9200161695480347, "learning_rate": 5.031430404105197e-07, "loss": 0.8023, "step": 74210 }, { "epoch": 0.904476375025898, "grad_norm": 2.19948148727417, "learning_rate": 5.028223220012829e-07, "loss": 0.7562, "step": 74215 }, { "epoch": 0.9045373112500457, "grad_norm": 1.758435606956482, "learning_rate": 5.025016035920462e-07, "loss": 0.7503, "step": 74220 }, { "epoch": 0.9045982474741935, "grad_norm": 1.7078666687011719, "learning_rate": 5.021808851828096e-07, "loss": 0.7901, "step": 74225 }, { "epoch": 0.9046591836983413, "grad_norm": 1.614620327949524, "learning_rate": 5.018601667735729e-07, "loss": 0.8055, "step": 74230 }, { "epoch": 0.9047201199224891, "grad_norm": 1.88389253616333, "learning_rate": 5.015394483643361e-07, "loss": 0.7289, "step": 74235 }, { "epoch": 0.904781056146637, "grad_norm": 2.139613389968872, "learning_rate": 5.012187299550995e-07, "loss": 0.7769, "step": 74240 }, { "epoch": 0.9048419923707848, "grad_norm": 1.751625418663025, "learning_rate": 5.008980115458628e-07, "loss": 0.8468, "step": 74245 }, { "epoch": 0.9049029285949326, "grad_norm": 1.9000993967056274, "learning_rate": 5.005772931366261e-07, "loss": 0.8339, "step": 74250 }, { "epoch": 0.9049638648190803, "grad_norm": 1.7882213592529297, "learning_rate": 5.002565747273894e-07, "loss": 0.8105, "step": 74255 }, { "epoch": 0.9050248010432281, "grad_norm": 1.881089210510254, "learning_rate": 4.999358563181527e-07, "loss": 0.8132, "step": 74260 }, { "epoch": 0.905085737267376, "grad_norm": 1.8761918544769287, "learning_rate": 4.99615137908916e-07, "loss": 0.7737, "step": 74265 }, { "epoch": 0.9051466734915238, "grad_norm": 2.128373384475708, "learning_rate": 4.992944194996794e-07, "loss": 0.7431, "step": 74270 }, { "epoch": 0.9052076097156716, "grad_norm": 1.7061069011688232, "learning_rate": 4.989737010904426e-07, "loss": 0.7976, "step": 74275 }, { "epoch": 0.9052685459398194, "grad_norm": 2.0235273838043213, "learning_rate": 4.986529826812059e-07, "loss": 0.7865, "step": 74280 }, { "epoch": 0.9053294821639672, "grad_norm": 1.8484671115875244, "learning_rate": 4.983322642719693e-07, "loss": 0.8456, "step": 74285 }, { "epoch": 0.905390418388115, "grad_norm": 2.1336026191711426, "learning_rate": 4.980115458627326e-07, "loss": 0.7476, "step": 74290 }, { "epoch": 0.9054513546122628, "grad_norm": 2.0768651962280273, "learning_rate": 4.976908274534958e-07, "loss": 0.8028, "step": 74295 }, { "epoch": 0.9055122908364106, "grad_norm": 2.549367904663086, "learning_rate": 4.973701090442592e-07, "loss": 0.91, "step": 74300 }, { "epoch": 0.9055732270605584, "grad_norm": 2.1862754821777344, "learning_rate": 4.970493906350225e-07, "loss": 0.7642, "step": 74305 }, { "epoch": 0.9056341632847063, "grad_norm": 1.9671965837478638, "learning_rate": 4.967286722257858e-07, "loss": 0.8125, "step": 74310 }, { "epoch": 0.9056950995088541, "grad_norm": 1.88018798828125, "learning_rate": 4.96407953816549e-07, "loss": 0.8185, "step": 74315 }, { "epoch": 0.9057560357330018, "grad_norm": 1.909020185470581, "learning_rate": 4.960872354073124e-07, "loss": 0.783, "step": 74320 }, { "epoch": 0.9058169719571496, "grad_norm": 1.966836929321289, "learning_rate": 4.957665169980757e-07, "loss": 0.8781, "step": 74325 }, { "epoch": 0.9058779081812974, "grad_norm": 1.7337112426757812, "learning_rate": 4.95445798588839e-07, "loss": 0.8174, "step": 74330 }, { "epoch": 0.9059388444054453, "grad_norm": 2.081791877746582, "learning_rate": 4.951250801796024e-07, "loss": 0.8578, "step": 74335 }, { "epoch": 0.9059997806295931, "grad_norm": 1.722887635231018, "learning_rate": 4.948043617703656e-07, "loss": 0.8348, "step": 74340 }, { "epoch": 0.9060607168537409, "grad_norm": 1.941023826599121, "learning_rate": 4.94483643361129e-07, "loss": 0.7611, "step": 74345 }, { "epoch": 0.9061216530778887, "grad_norm": 2.413494348526001, "learning_rate": 4.941629249518923e-07, "loss": 0.8053, "step": 74350 }, { "epoch": 0.9061825893020364, "grad_norm": 2.233271360397339, "learning_rate": 4.938422065426556e-07, "loss": 0.747, "step": 74355 }, { "epoch": 0.9062435255261843, "grad_norm": 1.9720063209533691, "learning_rate": 4.935214881334188e-07, "loss": 0.7566, "step": 74360 }, { "epoch": 0.9063044617503321, "grad_norm": 1.8412771224975586, "learning_rate": 4.932007697241823e-07, "loss": 0.8, "step": 74365 }, { "epoch": 0.9063653979744799, "grad_norm": 2.2177248001098633, "learning_rate": 4.928800513149455e-07, "loss": 0.8155, "step": 74370 }, { "epoch": 0.9064263341986277, "grad_norm": 2.2694437503814697, "learning_rate": 4.925593329057088e-07, "loss": 0.8327, "step": 74375 }, { "epoch": 0.9064872704227755, "grad_norm": 2.1519458293914795, "learning_rate": 4.922386144964722e-07, "loss": 0.8155, "step": 74380 }, { "epoch": 0.9065482066469234, "grad_norm": 2.0649571418762207, "learning_rate": 4.919178960872355e-07, "loss": 0.8026, "step": 74385 }, { "epoch": 0.9066091428710711, "grad_norm": 2.2737507820129395, "learning_rate": 4.915971776779987e-07, "loss": 0.8174, "step": 74390 }, { "epoch": 0.9066700790952189, "grad_norm": 2.5344367027282715, "learning_rate": 4.912764592687621e-07, "loss": 0.828, "step": 74395 }, { "epoch": 0.9067310153193667, "grad_norm": 2.1570346355438232, "learning_rate": 4.909557408595254e-07, "loss": 0.7651, "step": 74400 }, { "epoch": 0.9067919515435146, "grad_norm": 2.0653343200683594, "learning_rate": 4.906350224502887e-07, "loss": 0.812, "step": 74405 }, { "epoch": 0.9068528877676624, "grad_norm": 2.314289093017578, "learning_rate": 4.90314304041052e-07, "loss": 0.7652, "step": 74410 }, { "epoch": 0.9069138239918102, "grad_norm": 2.728372812271118, "learning_rate": 4.899935856318153e-07, "loss": 0.8093, "step": 74415 }, { "epoch": 0.906974760215958, "grad_norm": 2.6758062839508057, "learning_rate": 4.896728672225786e-07, "loss": 0.7981, "step": 74420 }, { "epoch": 0.9070356964401057, "grad_norm": 1.97357177734375, "learning_rate": 4.89352148813342e-07, "loss": 0.7605, "step": 74425 }, { "epoch": 0.9070966326642536, "grad_norm": 2.189229965209961, "learning_rate": 4.890314304041052e-07, "loss": 0.8482, "step": 74430 }, { "epoch": 0.9071575688884014, "grad_norm": 2.028061866760254, "learning_rate": 4.887107119948685e-07, "loss": 0.7871, "step": 74435 }, { "epoch": 0.9072185051125492, "grad_norm": 2.261106014251709, "learning_rate": 4.883899935856319e-07, "loss": 0.7983, "step": 74440 }, { "epoch": 0.907279441336697, "grad_norm": 2.3156898021698, "learning_rate": 4.880692751763952e-07, "loss": 0.7618, "step": 74445 }, { "epoch": 0.9073403775608448, "grad_norm": 2.0060606002807617, "learning_rate": 4.877485567671584e-07, "loss": 0.7492, "step": 74450 }, { "epoch": 0.9074013137849927, "grad_norm": 2.0961754322052, "learning_rate": 4.874278383579218e-07, "loss": 0.8447, "step": 74455 }, { "epoch": 0.9074622500091404, "grad_norm": 2.0397939682006836, "learning_rate": 4.871071199486851e-07, "loss": 0.8167, "step": 74460 }, { "epoch": 0.9075231862332882, "grad_norm": 1.8914469480514526, "learning_rate": 4.867864015394484e-07, "loss": 0.7951, "step": 74465 }, { "epoch": 0.907584122457436, "grad_norm": 1.8976839780807495, "learning_rate": 4.864656831302117e-07, "loss": 0.7338, "step": 74470 }, { "epoch": 0.9076450586815838, "grad_norm": 1.6886183023452759, "learning_rate": 4.86144964720975e-07, "loss": 0.8627, "step": 74475 }, { "epoch": 0.9077059949057317, "grad_norm": 2.387573719024658, "learning_rate": 4.858242463117383e-07, "loss": 0.8558, "step": 74480 }, { "epoch": 0.9077669311298795, "grad_norm": 2.248485803604126, "learning_rate": 4.855035279025017e-07, "loss": 0.8388, "step": 74485 }, { "epoch": 0.9078278673540273, "grad_norm": 1.9855684041976929, "learning_rate": 4.851828094932649e-07, "loss": 0.8171, "step": 74490 }, { "epoch": 0.907888803578175, "grad_norm": 2.2778778076171875, "learning_rate": 4.848620910840282e-07, "loss": 0.8277, "step": 74495 }, { "epoch": 0.9079497398023229, "grad_norm": 1.7695362567901611, "learning_rate": 4.845413726747916e-07, "loss": 0.7786, "step": 74500 }, { "epoch": 0.9080106760264707, "grad_norm": 1.6147186756134033, "learning_rate": 4.842206542655549e-07, "loss": 0.8069, "step": 74505 }, { "epoch": 0.9080716122506185, "grad_norm": 2.1841084957122803, "learning_rate": 4.838999358563181e-07, "loss": 0.8613, "step": 74510 }, { "epoch": 0.9081325484747663, "grad_norm": 2.117036819458008, "learning_rate": 4.835792174470815e-07, "loss": 0.87, "step": 74515 }, { "epoch": 0.9081934846989141, "grad_norm": 1.8932135105133057, "learning_rate": 4.832584990378448e-07, "loss": 0.7646, "step": 74520 }, { "epoch": 0.908254420923062, "grad_norm": 2.1530981063842773, "learning_rate": 4.829377806286081e-07, "loss": 0.7802, "step": 74525 }, { "epoch": 0.9083153571472097, "grad_norm": 2.42988657951355, "learning_rate": 4.826170622193714e-07, "loss": 0.8936, "step": 74530 }, { "epoch": 0.9083762933713575, "grad_norm": 2.439826488494873, "learning_rate": 4.822963438101347e-07, "loss": 0.7422, "step": 74535 }, { "epoch": 0.9084372295955053, "grad_norm": 1.8835082054138184, "learning_rate": 4.81975625400898e-07, "loss": 0.7967, "step": 74540 }, { "epoch": 0.9084981658196531, "grad_norm": 2.0983171463012695, "learning_rate": 4.816549069916614e-07, "loss": 0.8121, "step": 74545 }, { "epoch": 0.908559102043801, "grad_norm": 1.8810033798217773, "learning_rate": 4.813341885824246e-07, "loss": 0.8437, "step": 74550 }, { "epoch": 0.9086200382679488, "grad_norm": 1.7492419481277466, "learning_rate": 4.81013470173188e-07, "loss": 0.7908, "step": 74555 }, { "epoch": 0.9086809744920966, "grad_norm": 2.0806119441986084, "learning_rate": 4.806927517639513e-07, "loss": 0.8135, "step": 74560 }, { "epoch": 0.9087419107162443, "grad_norm": 1.971935749053955, "learning_rate": 4.803720333547146e-07, "loss": 0.7822, "step": 74565 }, { "epoch": 0.9088028469403922, "grad_norm": 2.0203676223754883, "learning_rate": 4.800513149454778e-07, "loss": 0.7934, "step": 74570 }, { "epoch": 0.90886378316454, "grad_norm": 1.8290293216705322, "learning_rate": 4.797305965362413e-07, "loss": 0.8462, "step": 74575 }, { "epoch": 0.9089247193886878, "grad_norm": 1.9038794040679932, "learning_rate": 4.794098781270045e-07, "loss": 0.8285, "step": 74580 }, { "epoch": 0.9089856556128356, "grad_norm": 2.2235219478607178, "learning_rate": 4.790891597177678e-07, "loss": 0.7994, "step": 74585 }, { "epoch": 0.9090465918369834, "grad_norm": 1.979223608970642, "learning_rate": 4.787684413085312e-07, "loss": 0.766, "step": 74590 }, { "epoch": 0.9091075280611313, "grad_norm": 2.009060859680176, "learning_rate": 4.784477228992945e-07, "loss": 0.8097, "step": 74595 }, { "epoch": 0.909168464285279, "grad_norm": 2.019030809402466, "learning_rate": 4.781270044900577e-07, "loss": 0.7744, "step": 74600 }, { "epoch": 0.9092294005094268, "grad_norm": 1.8038032054901123, "learning_rate": 4.778062860808211e-07, "loss": 0.7511, "step": 74605 }, { "epoch": 0.9092903367335746, "grad_norm": 1.8850018978118896, "learning_rate": 4.774855676715844e-07, "loss": 0.7997, "step": 74610 }, { "epoch": 0.9093512729577224, "grad_norm": 1.795901894569397, "learning_rate": 4.771648492623477e-07, "loss": 0.827, "step": 74615 }, { "epoch": 0.9094122091818703, "grad_norm": 2.023448944091797, "learning_rate": 4.76844130853111e-07, "loss": 0.7673, "step": 74620 }, { "epoch": 0.9094731454060181, "grad_norm": 2.098875045776367, "learning_rate": 4.765234124438743e-07, "loss": 0.8082, "step": 74625 }, { "epoch": 0.9095340816301659, "grad_norm": 1.6288121938705444, "learning_rate": 4.762026940346377e-07, "loss": 0.7907, "step": 74630 }, { "epoch": 0.9095950178543136, "grad_norm": 2.2880845069885254, "learning_rate": 4.758819756254009e-07, "loss": 0.833, "step": 74635 }, { "epoch": 0.9096559540784614, "grad_norm": 1.8549407720565796, "learning_rate": 4.755612572161643e-07, "loss": 0.7205, "step": 74640 }, { "epoch": 0.9097168903026093, "grad_norm": 2.0075032711029053, "learning_rate": 4.7524053880692753e-07, "loss": 0.7192, "step": 74645 }, { "epoch": 0.9097778265267571, "grad_norm": 1.8814582824707031, "learning_rate": 4.749198203976909e-07, "loss": 0.8817, "step": 74650 }, { "epoch": 0.9098387627509049, "grad_norm": 1.8733689785003662, "learning_rate": 4.7459910198845415e-07, "loss": 0.8087, "step": 74655 }, { "epoch": 0.9098996989750527, "grad_norm": 1.8382025957107544, "learning_rate": 4.7427838357921753e-07, "loss": 0.8457, "step": 74660 }, { "epoch": 0.9099606351992006, "grad_norm": 2.0829436779022217, "learning_rate": 4.739576651699808e-07, "loss": 0.818, "step": 74665 }, { "epoch": 0.9100215714233483, "grad_norm": 1.7980430126190186, "learning_rate": 4.7363694676074415e-07, "loss": 0.7989, "step": 74670 }, { "epoch": 0.9100825076474961, "grad_norm": 2.9632084369659424, "learning_rate": 4.7331622835150743e-07, "loss": 0.8557, "step": 74675 }, { "epoch": 0.9101434438716439, "grad_norm": 1.8743112087249756, "learning_rate": 4.7299550994227077e-07, "loss": 0.864, "step": 74680 }, { "epoch": 0.9102043800957917, "grad_norm": 1.8507022857666016, "learning_rate": 4.7267479153303405e-07, "loss": 0.844, "step": 74685 }, { "epoch": 0.9102653163199396, "grad_norm": 2.4927761554718018, "learning_rate": 4.723540731237974e-07, "loss": 0.8416, "step": 74690 }, { "epoch": 0.9103262525440874, "grad_norm": 1.8977525234222412, "learning_rate": 4.7203335471456066e-07, "loss": 0.864, "step": 74695 }, { "epoch": 0.9103871887682352, "grad_norm": 1.5905590057373047, "learning_rate": 4.71712636305324e-07, "loss": 0.7396, "step": 74700 }, { "epoch": 0.9104481249923829, "grad_norm": 2.049560070037842, "learning_rate": 4.713919178960873e-07, "loss": 0.8245, "step": 74705 }, { "epoch": 0.9105090612165307, "grad_norm": 2.265406847000122, "learning_rate": 4.710711994868506e-07, "loss": 0.829, "step": 74710 }, { "epoch": 0.9105699974406786, "grad_norm": 1.7090789079666138, "learning_rate": 4.707504810776139e-07, "loss": 0.7452, "step": 74715 }, { "epoch": 0.9106309336648264, "grad_norm": 1.601236343383789, "learning_rate": 4.7042976266837723e-07, "loss": 0.7745, "step": 74720 }, { "epoch": 0.9106918698889742, "grad_norm": 1.5910308361053467, "learning_rate": 4.701090442591405e-07, "loss": 0.8244, "step": 74725 }, { "epoch": 0.910752806113122, "grad_norm": 1.5722030401229858, "learning_rate": 4.6978832584990385e-07, "loss": 0.8292, "step": 74730 }, { "epoch": 0.9108137423372699, "grad_norm": 2.1863441467285156, "learning_rate": 4.6946760744066713e-07, "loss": 0.837, "step": 74735 }, { "epoch": 0.9108746785614176, "grad_norm": 1.8417807817459106, "learning_rate": 4.6914688903143046e-07, "loss": 0.7683, "step": 74740 }, { "epoch": 0.9109356147855654, "grad_norm": 1.9563941955566406, "learning_rate": 4.6882617062219374e-07, "loss": 0.8, "step": 74745 }, { "epoch": 0.9109965510097132, "grad_norm": 1.7464288473129272, "learning_rate": 4.685054522129571e-07, "loss": 0.8645, "step": 74750 }, { "epoch": 0.911057487233861, "grad_norm": 2.2411420345306396, "learning_rate": 4.6818473380372036e-07, "loss": 0.8148, "step": 74755 }, { "epoch": 0.9111184234580089, "grad_norm": 2.2081634998321533, "learning_rate": 4.678640153944837e-07, "loss": 0.8601, "step": 74760 }, { "epoch": 0.9111793596821567, "grad_norm": 1.8586523532867432, "learning_rate": 4.67543296985247e-07, "loss": 0.8429, "step": 74765 }, { "epoch": 0.9112402959063045, "grad_norm": 2.0095434188842773, "learning_rate": 4.672225785760103e-07, "loss": 0.7934, "step": 74770 }, { "epoch": 0.9113012321304522, "grad_norm": 1.8934478759765625, "learning_rate": 4.669018601667736e-07, "loss": 0.8016, "step": 74775 }, { "epoch": 0.9113621683546, "grad_norm": 2.0015625953674316, "learning_rate": 4.665811417575369e-07, "loss": 0.7995, "step": 74780 }, { "epoch": 0.9114231045787479, "grad_norm": 1.9303600788116455, "learning_rate": 4.662604233483002e-07, "loss": 0.8209, "step": 74785 }, { "epoch": 0.9114840408028957, "grad_norm": 2.2622339725494385, "learning_rate": 4.6593970493906354e-07, "loss": 0.8624, "step": 74790 }, { "epoch": 0.9115449770270435, "grad_norm": 2.0951015949249268, "learning_rate": 4.656189865298268e-07, "loss": 0.7922, "step": 74795 }, { "epoch": 0.9116059132511913, "grad_norm": 1.743992805480957, "learning_rate": 4.6529826812059016e-07, "loss": 0.7676, "step": 74800 }, { "epoch": 0.9116668494753392, "grad_norm": 2.0477497577667236, "learning_rate": 4.6497754971135344e-07, "loss": 0.6974, "step": 74805 }, { "epoch": 0.9117277856994869, "grad_norm": 1.873533010482788, "learning_rate": 4.6465683130211677e-07, "loss": 0.8316, "step": 74810 }, { "epoch": 0.9117887219236347, "grad_norm": 2.051776170730591, "learning_rate": 4.6433611289288005e-07, "loss": 0.8054, "step": 74815 }, { "epoch": 0.9118496581477825, "grad_norm": 1.9527556896209717, "learning_rate": 4.640153944836434e-07, "loss": 0.8341, "step": 74820 }, { "epoch": 0.9119105943719303, "grad_norm": 2.6314170360565186, "learning_rate": 4.6369467607440667e-07, "loss": 0.7825, "step": 74825 }, { "epoch": 0.9119715305960782, "grad_norm": 2.2845284938812256, "learning_rate": 4.6337395766517e-07, "loss": 0.773, "step": 74830 }, { "epoch": 0.912032466820226, "grad_norm": 2.019853353500366, "learning_rate": 4.630532392559333e-07, "loss": 0.7385, "step": 74835 }, { "epoch": 0.9120934030443738, "grad_norm": 1.862176537513733, "learning_rate": 4.6273252084669667e-07, "loss": 0.8448, "step": 74840 }, { "epoch": 0.9121543392685215, "grad_norm": 2.090240240097046, "learning_rate": 4.624118024374599e-07, "loss": 0.8185, "step": 74845 }, { "epoch": 0.9122152754926693, "grad_norm": 1.893620491027832, "learning_rate": 4.620910840282233e-07, "loss": 0.7642, "step": 74850 }, { "epoch": 0.9122762117168172, "grad_norm": 1.7379844188690186, "learning_rate": 4.617703656189865e-07, "loss": 0.777, "step": 74855 }, { "epoch": 0.912337147940965, "grad_norm": 2.286102294921875, "learning_rate": 4.614496472097499e-07, "loss": 0.7846, "step": 74860 }, { "epoch": 0.9123980841651128, "grad_norm": 1.6635360717773438, "learning_rate": 4.6112892880051313e-07, "loss": 0.7865, "step": 74865 }, { "epoch": 0.9124590203892606, "grad_norm": 2.3050127029418945, "learning_rate": 4.608082103912765e-07, "loss": 0.8027, "step": 74870 }, { "epoch": 0.9125199566134085, "grad_norm": 1.8163273334503174, "learning_rate": 4.604874919820398e-07, "loss": 0.7891, "step": 74875 }, { "epoch": 0.9125808928375562, "grad_norm": 1.8503921031951904, "learning_rate": 4.6016677357280314e-07, "loss": 0.8601, "step": 74880 }, { "epoch": 0.912641829061704, "grad_norm": 1.8426364660263062, "learning_rate": 4.598460551635664e-07, "loss": 0.8968, "step": 74885 }, { "epoch": 0.9127027652858518, "grad_norm": 1.9849783182144165, "learning_rate": 4.5952533675432975e-07, "loss": 0.7923, "step": 74890 }, { "epoch": 0.9127637015099996, "grad_norm": 2.234656572341919, "learning_rate": 4.5920461834509303e-07, "loss": 0.8038, "step": 74895 }, { "epoch": 0.9128246377341475, "grad_norm": 2.049062967300415, "learning_rate": 4.5888389993585637e-07, "loss": 0.8254, "step": 74900 }, { "epoch": 0.9128855739582953, "grad_norm": 2.07132887840271, "learning_rate": 4.5856318152661965e-07, "loss": 0.762, "step": 74905 }, { "epoch": 0.9129465101824431, "grad_norm": 1.914052963256836, "learning_rate": 4.58242463117383e-07, "loss": 0.8019, "step": 74910 }, { "epoch": 0.9130074464065908, "grad_norm": 1.951717734336853, "learning_rate": 4.5792174470814627e-07, "loss": 0.8685, "step": 74915 }, { "epoch": 0.9130683826307386, "grad_norm": 2.0614404678344727, "learning_rate": 4.576010262989096e-07, "loss": 0.8333, "step": 74920 }, { "epoch": 0.9131293188548865, "grad_norm": 1.7553082704544067, "learning_rate": 4.5728030788967293e-07, "loss": 0.7795, "step": 74925 }, { "epoch": 0.9131902550790343, "grad_norm": 1.9810031652450562, "learning_rate": 4.569595894804362e-07, "loss": 0.7423, "step": 74930 }, { "epoch": 0.9132511913031821, "grad_norm": 1.9569363594055176, "learning_rate": 4.5663887107119955e-07, "loss": 0.8635, "step": 74935 }, { "epoch": 0.9133121275273299, "grad_norm": 1.879256010055542, "learning_rate": 4.5631815266196283e-07, "loss": 0.7883, "step": 74940 }, { "epoch": 0.9133730637514778, "grad_norm": 2.54362416267395, "learning_rate": 4.5599743425272617e-07, "loss": 0.8129, "step": 74945 }, { "epoch": 0.9134339999756255, "grad_norm": 3.1069681644439697, "learning_rate": 4.5567671584348945e-07, "loss": 0.8164, "step": 74950 }, { "epoch": 0.9134949361997733, "grad_norm": 2.526334285736084, "learning_rate": 4.553559974342528e-07, "loss": 0.8211, "step": 74955 }, { "epoch": 0.9135558724239211, "grad_norm": 2.00443959236145, "learning_rate": 4.5503527902501606e-07, "loss": 0.787, "step": 74960 }, { "epoch": 0.9136168086480689, "grad_norm": 1.9606578350067139, "learning_rate": 4.547145606157794e-07, "loss": 0.7547, "step": 74965 }, { "epoch": 0.9136777448722168, "grad_norm": 2.1020021438598633, "learning_rate": 4.543938422065427e-07, "loss": 0.8848, "step": 74970 }, { "epoch": 0.9137386810963646, "grad_norm": 1.7015306949615479, "learning_rate": 4.54073123797306e-07, "loss": 0.8014, "step": 74975 }, { "epoch": 0.9137996173205124, "grad_norm": 1.8507055044174194, "learning_rate": 4.537524053880693e-07, "loss": 0.8614, "step": 74980 }, { "epoch": 0.9138605535446601, "grad_norm": 2.1979873180389404, "learning_rate": 4.5343168697883263e-07, "loss": 0.8533, "step": 74985 }, { "epoch": 0.9139214897688079, "grad_norm": 2.0020015239715576, "learning_rate": 4.531109685695959e-07, "loss": 0.7767, "step": 74990 }, { "epoch": 0.9139824259929558, "grad_norm": 1.7803844213485718, "learning_rate": 4.5279025016035925e-07, "loss": 0.8015, "step": 74995 }, { "epoch": 0.9140433622171036, "grad_norm": 1.898407220840454, "learning_rate": 4.5246953175112253e-07, "loss": 0.7595, "step": 75000 }, { "epoch": 0.9141042984412514, "grad_norm": 1.6647040843963623, "learning_rate": 4.5214881334188586e-07, "loss": 0.8217, "step": 75005 }, { "epoch": 0.9141652346653992, "grad_norm": 1.971892237663269, "learning_rate": 4.5182809493264914e-07, "loss": 0.7619, "step": 75010 }, { "epoch": 0.914226170889547, "grad_norm": 2.1286604404449463, "learning_rate": 4.5150737652341253e-07, "loss": 0.846, "step": 75015 }, { "epoch": 0.9142871071136948, "grad_norm": 1.7551281452178955, "learning_rate": 4.5118665811417576e-07, "loss": 0.7344, "step": 75020 }, { "epoch": 0.9143480433378426, "grad_norm": 2.1359455585479736, "learning_rate": 4.5086593970493915e-07, "loss": 0.799, "step": 75025 }, { "epoch": 0.9144089795619904, "grad_norm": 1.9554094076156616, "learning_rate": 4.505452212957024e-07, "loss": 0.7653, "step": 75030 }, { "epoch": 0.9144699157861382, "grad_norm": 2.008244752883911, "learning_rate": 4.5022450288646576e-07, "loss": 0.7742, "step": 75035 }, { "epoch": 0.914530852010286, "grad_norm": 1.981464147567749, "learning_rate": 4.49903784477229e-07, "loss": 0.741, "step": 75040 }, { "epoch": 0.9145917882344339, "grad_norm": 1.7490252256393433, "learning_rate": 4.495830660679924e-07, "loss": 0.8228, "step": 75045 }, { "epoch": 0.9146527244585817, "grad_norm": 2.4231481552124023, "learning_rate": 4.4926234765875566e-07, "loss": 0.8428, "step": 75050 }, { "epoch": 0.9147136606827294, "grad_norm": 1.9185045957565308, "learning_rate": 4.48941629249519e-07, "loss": 0.9005, "step": 75055 }, { "epoch": 0.9147745969068772, "grad_norm": 1.9613105058670044, "learning_rate": 4.486209108402823e-07, "loss": 0.7725, "step": 75060 }, { "epoch": 0.9148355331310251, "grad_norm": 1.8104138374328613, "learning_rate": 4.483001924310456e-07, "loss": 0.8082, "step": 75065 }, { "epoch": 0.9148964693551729, "grad_norm": 1.7424274682998657, "learning_rate": 4.479794740218089e-07, "loss": 0.748, "step": 75070 }, { "epoch": 0.9149574055793207, "grad_norm": 1.911919116973877, "learning_rate": 4.476587556125722e-07, "loss": 0.7759, "step": 75075 }, { "epoch": 0.9150183418034685, "grad_norm": 1.8688344955444336, "learning_rate": 4.473380372033355e-07, "loss": 0.7317, "step": 75080 }, { "epoch": 0.9150792780276163, "grad_norm": 1.9419115781784058, "learning_rate": 4.4701731879409884e-07, "loss": 0.7593, "step": 75085 }, { "epoch": 0.9151402142517641, "grad_norm": 1.855207085609436, "learning_rate": 4.466966003848621e-07, "loss": 0.7958, "step": 75090 }, { "epoch": 0.9152011504759119, "grad_norm": 1.7617440223693848, "learning_rate": 4.4637588197562546e-07, "loss": 0.7938, "step": 75095 }, { "epoch": 0.9152620867000597, "grad_norm": 2.106299638748169, "learning_rate": 4.4605516356638874e-07, "loss": 0.8667, "step": 75100 }, { "epoch": 0.9153230229242075, "grad_norm": 2.000905990600586, "learning_rate": 4.4573444515715207e-07, "loss": 0.8224, "step": 75105 }, { "epoch": 0.9153839591483554, "grad_norm": 1.9767826795578003, "learning_rate": 4.4541372674791535e-07, "loss": 0.788, "step": 75110 }, { "epoch": 0.9154448953725032, "grad_norm": 1.9552656412124634, "learning_rate": 4.450930083386787e-07, "loss": 0.7399, "step": 75115 }, { "epoch": 0.915505831596651, "grad_norm": 2.098449230194092, "learning_rate": 4.4477228992944197e-07, "loss": 0.8368, "step": 75120 }, { "epoch": 0.9155667678207987, "grad_norm": 2.817953586578369, "learning_rate": 4.444515715202053e-07, "loss": 0.8064, "step": 75125 }, { "epoch": 0.9156277040449465, "grad_norm": 2.070974826812744, "learning_rate": 4.441308531109686e-07, "loss": 0.8531, "step": 75130 }, { "epoch": 0.9156886402690944, "grad_norm": 2.067472219467163, "learning_rate": 4.438101347017319e-07, "loss": 0.8337, "step": 75135 }, { "epoch": 0.9157495764932422, "grad_norm": 1.9761993885040283, "learning_rate": 4.434894162924952e-07, "loss": 0.799, "step": 75140 }, { "epoch": 0.91581051271739, "grad_norm": 1.926507830619812, "learning_rate": 4.4316869788325854e-07, "loss": 0.7549, "step": 75145 }, { "epoch": 0.9158714489415378, "grad_norm": 2.192457675933838, "learning_rate": 4.428479794740218e-07, "loss": 0.8005, "step": 75150 }, { "epoch": 0.9159323851656856, "grad_norm": 2.2292299270629883, "learning_rate": 4.4252726106478515e-07, "loss": 0.7877, "step": 75155 }, { "epoch": 0.9159933213898334, "grad_norm": 2.140550374984741, "learning_rate": 4.4220654265554843e-07, "loss": 0.917, "step": 75160 }, { "epoch": 0.9160542576139812, "grad_norm": 1.9558089971542358, "learning_rate": 4.4188582424631177e-07, "loss": 0.7784, "step": 75165 }, { "epoch": 0.916115193838129, "grad_norm": 2.4149038791656494, "learning_rate": 4.4156510583707505e-07, "loss": 0.7812, "step": 75170 }, { "epoch": 0.9161761300622768, "grad_norm": 1.8295693397521973, "learning_rate": 4.412443874278384e-07, "loss": 0.8153, "step": 75175 }, { "epoch": 0.9162370662864247, "grad_norm": 1.7372958660125732, "learning_rate": 4.4092366901860167e-07, "loss": 0.8039, "step": 75180 }, { "epoch": 0.9162980025105725, "grad_norm": 2.776749610900879, "learning_rate": 4.40602950609365e-07, "loss": 0.8195, "step": 75185 }, { "epoch": 0.9163589387347203, "grad_norm": 2.27972674369812, "learning_rate": 4.402822322001283e-07, "loss": 0.8474, "step": 75190 }, { "epoch": 0.916419874958868, "grad_norm": 2.3569297790527344, "learning_rate": 4.399615137908916e-07, "loss": 0.8792, "step": 75195 }, { "epoch": 0.9164808111830158, "grad_norm": 2.12262225151062, "learning_rate": 4.396407953816549e-07, "loss": 0.8017, "step": 75200 }, { "epoch": 0.9165417474071637, "grad_norm": 2.185009717941284, "learning_rate": 4.3932007697241823e-07, "loss": 0.7648, "step": 75205 }, { "epoch": 0.9166026836313115, "grad_norm": 2.0953476428985596, "learning_rate": 4.389993585631815e-07, "loss": 0.8722, "step": 75210 }, { "epoch": 0.9166636198554593, "grad_norm": 1.7552943229675293, "learning_rate": 4.3867864015394485e-07, "loss": 0.8316, "step": 75215 }, { "epoch": 0.9167245560796071, "grad_norm": 2.4856815338134766, "learning_rate": 4.3835792174470813e-07, "loss": 0.806, "step": 75220 }, { "epoch": 0.916785492303755, "grad_norm": 1.9827957153320312, "learning_rate": 4.380372033354715e-07, "loss": 0.8928, "step": 75225 }, { "epoch": 0.9168464285279027, "grad_norm": 2.2788991928100586, "learning_rate": 4.3771648492623485e-07, "loss": 0.766, "step": 75230 }, { "epoch": 0.9169073647520505, "grad_norm": 1.7551565170288086, "learning_rate": 4.3739576651699813e-07, "loss": 0.7684, "step": 75235 }, { "epoch": 0.9169683009761983, "grad_norm": 1.7102632522583008, "learning_rate": 4.3707504810776147e-07, "loss": 0.798, "step": 75240 }, { "epoch": 0.9170292372003461, "grad_norm": 2.285036325454712, "learning_rate": 4.3675432969852475e-07, "loss": 0.7789, "step": 75245 }, { "epoch": 0.917090173424494, "grad_norm": 1.8062448501586914, "learning_rate": 4.364336112892881e-07, "loss": 0.7943, "step": 75250 }, { "epoch": 0.9171511096486418, "grad_norm": 1.9826645851135254, "learning_rate": 4.3611289288005136e-07, "loss": 0.8486, "step": 75255 }, { "epoch": 0.9172120458727895, "grad_norm": 1.8842837810516357, "learning_rate": 4.357921744708147e-07, "loss": 0.8327, "step": 75260 }, { "epoch": 0.9172729820969373, "grad_norm": 1.999729037284851, "learning_rate": 4.35471456061578e-07, "loss": 0.8465, "step": 75265 }, { "epoch": 0.9173339183210851, "grad_norm": 1.8922110795974731, "learning_rate": 4.351507376523413e-07, "loss": 0.7823, "step": 75270 }, { "epoch": 0.917394854545233, "grad_norm": 2.44901704788208, "learning_rate": 4.348300192431046e-07, "loss": 0.7872, "step": 75275 }, { "epoch": 0.9174557907693808, "grad_norm": 2.118009090423584, "learning_rate": 4.3450930083386793e-07, "loss": 0.7628, "step": 75280 }, { "epoch": 0.9175167269935286, "grad_norm": 1.762506127357483, "learning_rate": 4.341885824246312e-07, "loss": 0.8265, "step": 75285 }, { "epoch": 0.9175776632176764, "grad_norm": 1.5221481323242188, "learning_rate": 4.3386786401539455e-07, "loss": 0.7935, "step": 75290 }, { "epoch": 0.9176385994418241, "grad_norm": 2.0107476711273193, "learning_rate": 4.3354714560615783e-07, "loss": 0.8014, "step": 75295 }, { "epoch": 0.917699535665972, "grad_norm": 1.752729058265686, "learning_rate": 4.3322642719692116e-07, "loss": 0.8172, "step": 75300 }, { "epoch": 0.9177604718901198, "grad_norm": 2.0880165100097656, "learning_rate": 4.3290570878768444e-07, "loss": 0.8193, "step": 75305 }, { "epoch": 0.9178214081142676, "grad_norm": 1.6268144845962524, "learning_rate": 4.325849903784478e-07, "loss": 0.8091, "step": 75310 }, { "epoch": 0.9178823443384154, "grad_norm": 2.0716357231140137, "learning_rate": 4.3226427196921106e-07, "loss": 0.8911, "step": 75315 }, { "epoch": 0.9179432805625632, "grad_norm": 1.823630928993225, "learning_rate": 4.319435535599744e-07, "loss": 0.7319, "step": 75320 }, { "epoch": 0.9180042167867111, "grad_norm": 1.70840585231781, "learning_rate": 4.316228351507377e-07, "loss": 0.7961, "step": 75325 }, { "epoch": 0.9180651530108588, "grad_norm": 2.093345880508423, "learning_rate": 4.31302116741501e-07, "loss": 0.8086, "step": 75330 }, { "epoch": 0.9181260892350066, "grad_norm": 1.9837573766708374, "learning_rate": 4.309813983322643e-07, "loss": 0.8198, "step": 75335 }, { "epoch": 0.9181870254591544, "grad_norm": 2.0725045204162598, "learning_rate": 4.306606799230276e-07, "loss": 0.8852, "step": 75340 }, { "epoch": 0.9182479616833022, "grad_norm": 1.734630823135376, "learning_rate": 4.303399615137909e-07, "loss": 0.8125, "step": 75345 }, { "epoch": 0.9183088979074501, "grad_norm": 1.763044834136963, "learning_rate": 4.3001924310455424e-07, "loss": 0.7846, "step": 75350 }, { "epoch": 0.9183698341315979, "grad_norm": 1.7581560611724854, "learning_rate": 4.296985246953175e-07, "loss": 0.7682, "step": 75355 }, { "epoch": 0.9184307703557457, "grad_norm": 2.0028188228607178, "learning_rate": 4.2937780628608086e-07, "loss": 0.7846, "step": 75360 }, { "epoch": 0.9184917065798934, "grad_norm": 2.0506033897399902, "learning_rate": 4.2905708787684414e-07, "loss": 0.7668, "step": 75365 }, { "epoch": 0.9185526428040413, "grad_norm": 1.833492636680603, "learning_rate": 4.287363694676075e-07, "loss": 0.7895, "step": 75370 }, { "epoch": 0.9186135790281891, "grad_norm": 1.8443341255187988, "learning_rate": 4.2841565105837076e-07, "loss": 0.776, "step": 75375 }, { "epoch": 0.9186745152523369, "grad_norm": 2.042004346847534, "learning_rate": 4.280949326491341e-07, "loss": 0.7826, "step": 75380 }, { "epoch": 0.9187354514764847, "grad_norm": 1.906951904296875, "learning_rate": 4.2777421423989737e-07, "loss": 0.8444, "step": 75385 }, { "epoch": 0.9187963877006325, "grad_norm": 1.6339086294174194, "learning_rate": 4.274534958306607e-07, "loss": 0.8181, "step": 75390 }, { "epoch": 0.9188573239247804, "grad_norm": 2.296549081802368, "learning_rate": 4.27132777421424e-07, "loss": 0.8162, "step": 75395 }, { "epoch": 0.9189182601489281, "grad_norm": 1.9163923263549805, "learning_rate": 4.268120590121874e-07, "loss": 0.816, "step": 75400 }, { "epoch": 0.9189791963730759, "grad_norm": 1.7912276983261108, "learning_rate": 4.264913406029506e-07, "loss": 0.8081, "step": 75405 }, { "epoch": 0.9190401325972237, "grad_norm": 1.6977051496505737, "learning_rate": 4.26170622193714e-07, "loss": 0.8174, "step": 75410 }, { "epoch": 0.9191010688213715, "grad_norm": 2.04655385017395, "learning_rate": 4.258499037844772e-07, "loss": 0.8311, "step": 75415 }, { "epoch": 0.9191620050455194, "grad_norm": 1.9040242433547974, "learning_rate": 4.255291853752406e-07, "loss": 0.8327, "step": 75420 }, { "epoch": 0.9192229412696672, "grad_norm": 2.026319980621338, "learning_rate": 4.2520846696600383e-07, "loss": 0.7933, "step": 75425 }, { "epoch": 0.919283877493815, "grad_norm": 1.955246925354004, "learning_rate": 4.248877485567672e-07, "loss": 0.8002, "step": 75430 }, { "epoch": 0.9193448137179627, "grad_norm": 2.42887806892395, "learning_rate": 4.245670301475305e-07, "loss": 0.7317, "step": 75435 }, { "epoch": 0.9194057499421106, "grad_norm": 1.8311749696731567, "learning_rate": 4.2424631173829384e-07, "loss": 0.8822, "step": 75440 }, { "epoch": 0.9194666861662584, "grad_norm": 1.8998695611953735, "learning_rate": 4.239255933290571e-07, "loss": 0.7851, "step": 75445 }, { "epoch": 0.9195276223904062, "grad_norm": 2.159882068634033, "learning_rate": 4.2360487491982045e-07, "loss": 0.815, "step": 75450 }, { "epoch": 0.919588558614554, "grad_norm": 1.8880447149276733, "learning_rate": 4.2328415651058374e-07, "loss": 0.7755, "step": 75455 }, { "epoch": 0.9196494948387018, "grad_norm": 1.7735999822616577, "learning_rate": 4.2296343810134707e-07, "loss": 0.752, "step": 75460 }, { "epoch": 0.9197104310628497, "grad_norm": 2.042898416519165, "learning_rate": 4.2264271969211035e-07, "loss": 0.7706, "step": 75465 }, { "epoch": 0.9197713672869974, "grad_norm": 2.0426595211029053, "learning_rate": 4.223220012828737e-07, "loss": 0.6986, "step": 75470 }, { "epoch": 0.9198323035111452, "grad_norm": 2.5020363330841064, "learning_rate": 4.2200128287363697e-07, "loss": 0.8552, "step": 75475 }, { "epoch": 0.919893239735293, "grad_norm": 1.8944171667099, "learning_rate": 4.216805644644003e-07, "loss": 0.7398, "step": 75480 }, { "epoch": 0.9199541759594408, "grad_norm": 1.8977676630020142, "learning_rate": 4.213598460551636e-07, "loss": 0.7636, "step": 75485 }, { "epoch": 0.9200151121835887, "grad_norm": 1.9558769464492798, "learning_rate": 4.210391276459269e-07, "loss": 0.79, "step": 75490 }, { "epoch": 0.9200760484077365, "grad_norm": 1.8777363300323486, "learning_rate": 4.207184092366902e-07, "loss": 0.7811, "step": 75495 }, { "epoch": 0.9201369846318843, "grad_norm": 2.1896650791168213, "learning_rate": 4.2039769082745353e-07, "loss": 0.786, "step": 75500 }, { "epoch": 0.920197920856032, "grad_norm": 2.1275746822357178, "learning_rate": 4.200769724182168e-07, "loss": 0.8546, "step": 75505 }, { "epoch": 0.9202588570801798, "grad_norm": 1.803252100944519, "learning_rate": 4.1975625400898015e-07, "loss": 0.7556, "step": 75510 }, { "epoch": 0.9203197933043277, "grad_norm": 1.8611903190612793, "learning_rate": 4.1943553559974343e-07, "loss": 0.858, "step": 75515 }, { "epoch": 0.9203807295284755, "grad_norm": 1.7030456066131592, "learning_rate": 4.1911481719050676e-07, "loss": 0.7999, "step": 75520 }, { "epoch": 0.9204416657526233, "grad_norm": 1.6544002294540405, "learning_rate": 4.1879409878127005e-07, "loss": 0.8213, "step": 75525 }, { "epoch": 0.9205026019767711, "grad_norm": 2.006272554397583, "learning_rate": 4.184733803720334e-07, "loss": 0.8315, "step": 75530 }, { "epoch": 0.920563538200919, "grad_norm": 2.2258141040802, "learning_rate": 4.181526619627967e-07, "loss": 0.7164, "step": 75535 }, { "epoch": 0.9206244744250667, "grad_norm": 1.8404674530029297, "learning_rate": 4.1783194355356e-07, "loss": 0.7861, "step": 75540 }, { "epoch": 0.9206854106492145, "grad_norm": 2.037142038345337, "learning_rate": 4.1751122514432333e-07, "loss": 0.7777, "step": 75545 }, { "epoch": 0.9207463468733623, "grad_norm": 2.0960869789123535, "learning_rate": 4.171905067350866e-07, "loss": 0.759, "step": 75550 }, { "epoch": 0.9208072830975101, "grad_norm": 2.053490400314331, "learning_rate": 4.1686978832584995e-07, "loss": 0.7926, "step": 75555 }, { "epoch": 0.920868219321658, "grad_norm": 1.6795592308044434, "learning_rate": 4.1654906991661323e-07, "loss": 0.7661, "step": 75560 }, { "epoch": 0.9209291555458058, "grad_norm": 1.9289902448654175, "learning_rate": 4.1622835150737656e-07, "loss": 0.7744, "step": 75565 }, { "epoch": 0.9209900917699536, "grad_norm": 1.831398606300354, "learning_rate": 4.1590763309813984e-07, "loss": 0.8446, "step": 75570 }, { "epoch": 0.9210510279941013, "grad_norm": 2.0199384689331055, "learning_rate": 4.1558691468890323e-07, "loss": 0.7634, "step": 75575 }, { "epoch": 0.9211119642182491, "grad_norm": 1.9470274448394775, "learning_rate": 4.1526619627966646e-07, "loss": 0.8134, "step": 75580 }, { "epoch": 0.921172900442397, "grad_norm": 1.8307979106903076, "learning_rate": 4.1494547787042985e-07, "loss": 0.8213, "step": 75585 }, { "epoch": 0.9212338366665448, "grad_norm": 1.6230512857437134, "learning_rate": 4.146247594611931e-07, "loss": 0.8101, "step": 75590 }, { "epoch": 0.9212947728906926, "grad_norm": 2.1807191371917725, "learning_rate": 4.1430404105195646e-07, "loss": 0.8917, "step": 75595 }, { "epoch": 0.9213557091148404, "grad_norm": 2.006925106048584, "learning_rate": 4.139833226427197e-07, "loss": 0.7224, "step": 75600 }, { "epoch": 0.9214166453389883, "grad_norm": 2.078444719314575, "learning_rate": 4.136626042334831e-07, "loss": 0.7569, "step": 75605 }, { "epoch": 0.921477581563136, "grad_norm": 1.8690663576126099, "learning_rate": 4.1334188582424636e-07, "loss": 0.7964, "step": 75610 }, { "epoch": 0.9215385177872838, "grad_norm": 2.1718807220458984, "learning_rate": 4.130211674150097e-07, "loss": 0.9291, "step": 75615 }, { "epoch": 0.9215994540114316, "grad_norm": 2.371713399887085, "learning_rate": 4.12700449005773e-07, "loss": 0.7929, "step": 75620 }, { "epoch": 0.9216603902355794, "grad_norm": 2.179375171661377, "learning_rate": 4.123797305965363e-07, "loss": 0.8004, "step": 75625 }, { "epoch": 0.9217213264597273, "grad_norm": 1.7320401668548584, "learning_rate": 4.120590121872996e-07, "loss": 0.7584, "step": 75630 }, { "epoch": 0.9217822626838751, "grad_norm": 2.000460386276245, "learning_rate": 4.1173829377806293e-07, "loss": 0.8202, "step": 75635 }, { "epoch": 0.9218431989080229, "grad_norm": 1.8661895990371704, "learning_rate": 4.114175753688262e-07, "loss": 0.7547, "step": 75640 }, { "epoch": 0.9219041351321706, "grad_norm": 2.4652957916259766, "learning_rate": 4.1109685695958954e-07, "loss": 0.809, "step": 75645 }, { "epoch": 0.9219650713563184, "grad_norm": 2.024513006210327, "learning_rate": 4.107761385503528e-07, "loss": 0.8213, "step": 75650 }, { "epoch": 0.9220260075804663, "grad_norm": 2.005152702331543, "learning_rate": 4.1045542014111616e-07, "loss": 0.8097, "step": 75655 }, { "epoch": 0.9220869438046141, "grad_norm": 1.911820650100708, "learning_rate": 4.1013470173187944e-07, "loss": 0.856, "step": 75660 }, { "epoch": 0.9221478800287619, "grad_norm": 2.064300537109375, "learning_rate": 4.098139833226428e-07, "loss": 0.8338, "step": 75665 }, { "epoch": 0.9222088162529097, "grad_norm": 2.0474162101745605, "learning_rate": 4.0949326491340606e-07, "loss": 0.9242, "step": 75670 }, { "epoch": 0.9222697524770576, "grad_norm": 1.9873312711715698, "learning_rate": 4.091725465041694e-07, "loss": 0.8518, "step": 75675 }, { "epoch": 0.9223306887012053, "grad_norm": 1.8881120681762695, "learning_rate": 4.0885182809493267e-07, "loss": 0.8274, "step": 75680 }, { "epoch": 0.9223916249253531, "grad_norm": 1.9253205060958862, "learning_rate": 4.08531109685696e-07, "loss": 0.8688, "step": 75685 }, { "epoch": 0.9224525611495009, "grad_norm": 2.0479466915130615, "learning_rate": 4.082103912764593e-07, "loss": 0.8218, "step": 75690 }, { "epoch": 0.9225134973736487, "grad_norm": 1.931795358657837, "learning_rate": 4.078896728672226e-07, "loss": 0.8155, "step": 75695 }, { "epoch": 0.9225744335977966, "grad_norm": 1.9620426893234253, "learning_rate": 4.075689544579859e-07, "loss": 0.7758, "step": 75700 }, { "epoch": 0.9226353698219444, "grad_norm": 2.0391390323638916, "learning_rate": 4.0724823604874924e-07, "loss": 0.8094, "step": 75705 }, { "epoch": 0.9226963060460922, "grad_norm": 2.028193712234497, "learning_rate": 4.069275176395125e-07, "loss": 0.8482, "step": 75710 }, { "epoch": 0.9227572422702399, "grad_norm": 2.1662561893463135, "learning_rate": 4.0660679923027585e-07, "loss": 0.776, "step": 75715 }, { "epoch": 0.9228181784943877, "grad_norm": 1.9990910291671753, "learning_rate": 4.0628608082103914e-07, "loss": 0.8214, "step": 75720 }, { "epoch": 0.9228791147185356, "grad_norm": 2.0805470943450928, "learning_rate": 4.0596536241180247e-07, "loss": 0.7995, "step": 75725 }, { "epoch": 0.9229400509426834, "grad_norm": 2.0191385746002197, "learning_rate": 4.0564464400256575e-07, "loss": 0.7732, "step": 75730 }, { "epoch": 0.9230009871668312, "grad_norm": 1.742059350013733, "learning_rate": 4.053239255933291e-07, "loss": 0.7866, "step": 75735 }, { "epoch": 0.923061923390979, "grad_norm": 2.1208736896514893, "learning_rate": 4.0500320718409237e-07, "loss": 0.8142, "step": 75740 }, { "epoch": 0.9231228596151269, "grad_norm": 1.8579578399658203, "learning_rate": 4.046824887748557e-07, "loss": 0.7788, "step": 75745 }, { "epoch": 0.9231837958392746, "grad_norm": 2.0603387355804443, "learning_rate": 4.04361770365619e-07, "loss": 0.8206, "step": 75750 }, { "epoch": 0.9232447320634224, "grad_norm": 2.4135234355926514, "learning_rate": 4.040410519563823e-07, "loss": 0.778, "step": 75755 }, { "epoch": 0.9233056682875702, "grad_norm": 1.7943731546401978, "learning_rate": 4.037203335471456e-07, "loss": 0.8865, "step": 75760 }, { "epoch": 0.923366604511718, "grad_norm": 2.399775505065918, "learning_rate": 4.0339961513790893e-07, "loss": 0.803, "step": 75765 }, { "epoch": 0.9234275407358659, "grad_norm": 2.084108352661133, "learning_rate": 4.030788967286722e-07, "loss": 0.7814, "step": 75770 }, { "epoch": 0.9234884769600137, "grad_norm": 1.9843746423721313, "learning_rate": 4.0275817831943555e-07, "loss": 0.788, "step": 75775 }, { "epoch": 0.9235494131841615, "grad_norm": 1.7323241233825684, "learning_rate": 4.0243745991019883e-07, "loss": 0.8243, "step": 75780 }, { "epoch": 0.9236103494083092, "grad_norm": 1.7599931955337524, "learning_rate": 4.021167415009622e-07, "loss": 0.7788, "step": 75785 }, { "epoch": 0.923671285632457, "grad_norm": 1.9086610078811646, "learning_rate": 4.0179602309172545e-07, "loss": 0.8725, "step": 75790 }, { "epoch": 0.9237322218566049, "grad_norm": 1.7191836833953857, "learning_rate": 4.0147530468248883e-07, "loss": 0.7822, "step": 75795 }, { "epoch": 0.9237931580807527, "grad_norm": 1.9507068395614624, "learning_rate": 4.0115458627325206e-07, "loss": 0.8217, "step": 75800 }, { "epoch": 0.9238540943049005, "grad_norm": 2.2113800048828125, "learning_rate": 4.0083386786401545e-07, "loss": 0.7725, "step": 75805 }, { "epoch": 0.9239150305290483, "grad_norm": 1.9133201837539673, "learning_rate": 4.005131494547787e-07, "loss": 0.843, "step": 75810 }, { "epoch": 0.9239759667531962, "grad_norm": 1.8076515197753906, "learning_rate": 4.0019243104554207e-07, "loss": 0.8618, "step": 75815 }, { "epoch": 0.9240369029773439, "grad_norm": 2.0494332313537598, "learning_rate": 3.9987171263630535e-07, "loss": 0.7381, "step": 75820 }, { "epoch": 0.9240978392014917, "grad_norm": 2.0400545597076416, "learning_rate": 3.995509942270687e-07, "loss": 0.8675, "step": 75825 }, { "epoch": 0.9241587754256395, "grad_norm": 1.9056172370910645, "learning_rate": 3.9923027581783196e-07, "loss": 0.7403, "step": 75830 }, { "epoch": 0.9242197116497873, "grad_norm": 2.2699692249298096, "learning_rate": 3.989095574085953e-07, "loss": 0.7467, "step": 75835 }, { "epoch": 0.9242806478739352, "grad_norm": 2.440694808959961, "learning_rate": 3.9858883899935863e-07, "loss": 0.7471, "step": 75840 }, { "epoch": 0.924341584098083, "grad_norm": 1.8696335554122925, "learning_rate": 3.982681205901219e-07, "loss": 0.7623, "step": 75845 }, { "epoch": 0.9244025203222308, "grad_norm": 2.030905246734619, "learning_rate": 3.9794740218088525e-07, "loss": 0.7528, "step": 75850 }, { "epoch": 0.9244634565463785, "grad_norm": 1.810594916343689, "learning_rate": 3.9762668377164853e-07, "loss": 0.7937, "step": 75855 }, { "epoch": 0.9245243927705263, "grad_norm": 2.1378021240234375, "learning_rate": 3.9730596536241186e-07, "loss": 0.7679, "step": 75860 }, { "epoch": 0.9245853289946742, "grad_norm": 1.951653003692627, "learning_rate": 3.9698524695317515e-07, "loss": 0.82, "step": 75865 }, { "epoch": 0.924646265218822, "grad_norm": 2.356645345687866, "learning_rate": 3.966645285439385e-07, "loss": 0.8283, "step": 75870 }, { "epoch": 0.9247072014429698, "grad_norm": 2.125145196914673, "learning_rate": 3.9634381013470176e-07, "loss": 0.8012, "step": 75875 }, { "epoch": 0.9247681376671176, "grad_norm": 1.9427703619003296, "learning_rate": 3.960230917254651e-07, "loss": 0.8209, "step": 75880 }, { "epoch": 0.9248290738912655, "grad_norm": 1.8087319135665894, "learning_rate": 3.957023733162284e-07, "loss": 0.8697, "step": 75885 }, { "epoch": 0.9248900101154132, "grad_norm": 1.7850565910339355, "learning_rate": 3.953816549069917e-07, "loss": 0.7742, "step": 75890 }, { "epoch": 0.924950946339561, "grad_norm": 1.8425662517547607, "learning_rate": 3.95060936497755e-07, "loss": 0.8649, "step": 75895 }, { "epoch": 0.9250118825637088, "grad_norm": 1.8603402376174927, "learning_rate": 3.9474021808851833e-07, "loss": 0.7771, "step": 75900 }, { "epoch": 0.9250728187878566, "grad_norm": 2.2936105728149414, "learning_rate": 3.944194996792816e-07, "loss": 0.8753, "step": 75905 }, { "epoch": 0.9251337550120045, "grad_norm": 1.9031161069869995, "learning_rate": 3.9409878127004494e-07, "loss": 0.8219, "step": 75910 }, { "epoch": 0.9251946912361523, "grad_norm": 2.41103458404541, "learning_rate": 3.937780628608082e-07, "loss": 0.7896, "step": 75915 }, { "epoch": 0.9252556274603001, "grad_norm": 1.5982255935668945, "learning_rate": 3.9345734445157156e-07, "loss": 0.7902, "step": 75920 }, { "epoch": 0.9253165636844478, "grad_norm": 1.8007336854934692, "learning_rate": 3.9313662604233484e-07, "loss": 0.8281, "step": 75925 }, { "epoch": 0.9253774999085956, "grad_norm": 1.6441924571990967, "learning_rate": 3.928159076330982e-07, "loss": 0.8558, "step": 75930 }, { "epoch": 0.9254384361327435, "grad_norm": 2.0582616329193115, "learning_rate": 3.9249518922386146e-07, "loss": 0.8154, "step": 75935 }, { "epoch": 0.9254993723568913, "grad_norm": 2.2804267406463623, "learning_rate": 3.921744708146248e-07, "loss": 0.8273, "step": 75940 }, { "epoch": 0.9255603085810391, "grad_norm": 1.9265878200531006, "learning_rate": 3.9185375240538807e-07, "loss": 0.7765, "step": 75945 }, { "epoch": 0.9256212448051869, "grad_norm": 2.801722526550293, "learning_rate": 3.915330339961514e-07, "loss": 0.8292, "step": 75950 }, { "epoch": 0.9256821810293347, "grad_norm": 1.8828009366989136, "learning_rate": 3.912123155869147e-07, "loss": 0.8739, "step": 75955 }, { "epoch": 0.9257431172534825, "grad_norm": 1.8756202459335327, "learning_rate": 3.908915971776781e-07, "loss": 0.8256, "step": 75960 }, { "epoch": 0.9258040534776303, "grad_norm": 1.9834705591201782, "learning_rate": 3.905708787684413e-07, "loss": 0.875, "step": 75965 }, { "epoch": 0.9258649897017781, "grad_norm": 1.8666458129882812, "learning_rate": 3.902501603592047e-07, "loss": 0.7961, "step": 75970 }, { "epoch": 0.9259259259259259, "grad_norm": 1.7528194189071655, "learning_rate": 3.899294419499679e-07, "loss": 0.7321, "step": 75975 }, { "epoch": 0.9259868621500738, "grad_norm": 2.164480209350586, "learning_rate": 3.896087235407313e-07, "loss": 0.824, "step": 75980 }, { "epoch": 0.9260477983742216, "grad_norm": 1.94153892993927, "learning_rate": 3.8928800513149454e-07, "loss": 0.7647, "step": 75985 }, { "epoch": 0.9261087345983694, "grad_norm": 1.9833393096923828, "learning_rate": 3.889672867222579e-07, "loss": 0.7488, "step": 75990 }, { "epoch": 0.9261696708225171, "grad_norm": 2.270984411239624, "learning_rate": 3.886465683130212e-07, "loss": 0.8647, "step": 75995 }, { "epoch": 0.9262306070466649, "grad_norm": 2.1269052028656006, "learning_rate": 3.8832584990378454e-07, "loss": 0.8179, "step": 76000 }, { "epoch": 0.9262915432708128, "grad_norm": 1.8771947622299194, "learning_rate": 3.880051314945478e-07, "loss": 0.7394, "step": 76005 }, { "epoch": 0.9263524794949606, "grad_norm": 1.972182035446167, "learning_rate": 3.8768441308531116e-07, "loss": 0.8075, "step": 76010 }, { "epoch": 0.9264134157191084, "grad_norm": 1.9452316761016846, "learning_rate": 3.8736369467607444e-07, "loss": 0.8005, "step": 76015 }, { "epoch": 0.9264743519432562, "grad_norm": 1.6988874673843384, "learning_rate": 3.8704297626683777e-07, "loss": 0.793, "step": 76020 }, { "epoch": 0.926535288167404, "grad_norm": 2.2523388862609863, "learning_rate": 3.8672225785760105e-07, "loss": 0.767, "step": 76025 }, { "epoch": 0.9265962243915518, "grad_norm": 2.1417248249053955, "learning_rate": 3.864015394483644e-07, "loss": 0.8101, "step": 76030 }, { "epoch": 0.9266571606156996, "grad_norm": 1.7330039739608765, "learning_rate": 3.8608082103912767e-07, "loss": 0.7775, "step": 76035 }, { "epoch": 0.9267180968398474, "grad_norm": 2.0846447944641113, "learning_rate": 3.85760102629891e-07, "loss": 0.7898, "step": 76040 }, { "epoch": 0.9267790330639952, "grad_norm": 2.299330711364746, "learning_rate": 3.854393842206543e-07, "loss": 0.824, "step": 76045 }, { "epoch": 0.926839969288143, "grad_norm": 2.434532642364502, "learning_rate": 3.851186658114176e-07, "loss": 0.8497, "step": 76050 }, { "epoch": 0.9269009055122909, "grad_norm": 2.148089647293091, "learning_rate": 3.847979474021809e-07, "loss": 0.8078, "step": 76055 }, { "epoch": 0.9269618417364387, "grad_norm": 1.8728829622268677, "learning_rate": 3.8447722899294423e-07, "loss": 0.7995, "step": 76060 }, { "epoch": 0.9270227779605864, "grad_norm": 1.7501871585845947, "learning_rate": 3.841565105837075e-07, "loss": 0.859, "step": 76065 }, { "epoch": 0.9270837141847342, "grad_norm": 1.8545900583267212, "learning_rate": 3.8383579217447085e-07, "loss": 0.7993, "step": 76070 }, { "epoch": 0.927144650408882, "grad_norm": 1.806922435760498, "learning_rate": 3.8351507376523413e-07, "loss": 0.7965, "step": 76075 }, { "epoch": 0.9272055866330299, "grad_norm": 2.1665265560150146, "learning_rate": 3.8319435535599747e-07, "loss": 0.7663, "step": 76080 }, { "epoch": 0.9272665228571777, "grad_norm": 1.848757266998291, "learning_rate": 3.8287363694676075e-07, "loss": 0.7785, "step": 76085 }, { "epoch": 0.9273274590813255, "grad_norm": 2.0478222370147705, "learning_rate": 3.825529185375241e-07, "loss": 0.7813, "step": 76090 }, { "epoch": 0.9273883953054733, "grad_norm": 2.0338876247406006, "learning_rate": 3.8223220012828736e-07, "loss": 0.8769, "step": 76095 }, { "epoch": 0.9274493315296211, "grad_norm": 2.0443274974823, "learning_rate": 3.819114817190507e-07, "loss": 0.8101, "step": 76100 }, { "epoch": 0.9275102677537689, "grad_norm": 1.9836937189102173, "learning_rate": 3.81590763309814e-07, "loss": 0.8212, "step": 76105 }, { "epoch": 0.9275712039779167, "grad_norm": 1.9115188121795654, "learning_rate": 3.812700449005773e-07, "loss": 0.7926, "step": 76110 }, { "epoch": 0.9276321402020645, "grad_norm": 2.1771063804626465, "learning_rate": 3.809493264913406e-07, "loss": 0.857, "step": 76115 }, { "epoch": 0.9276930764262123, "grad_norm": 1.6579108238220215, "learning_rate": 3.8062860808210393e-07, "loss": 0.7023, "step": 76120 }, { "epoch": 0.9277540126503602, "grad_norm": 1.96871817111969, "learning_rate": 3.803078896728672e-07, "loss": 0.7268, "step": 76125 }, { "epoch": 0.927814948874508, "grad_norm": 1.9078329801559448, "learning_rate": 3.7998717126363055e-07, "loss": 0.8042, "step": 76130 }, { "epoch": 0.9278758850986557, "grad_norm": 2.0235629081726074, "learning_rate": 3.7966645285439393e-07, "loss": 0.8034, "step": 76135 }, { "epoch": 0.9279368213228035, "grad_norm": 1.6344175338745117, "learning_rate": 3.7934573444515716e-07, "loss": 0.7712, "step": 76140 }, { "epoch": 0.9279977575469514, "grad_norm": 2.0927224159240723, "learning_rate": 3.7902501603592055e-07, "loss": 0.8298, "step": 76145 }, { "epoch": 0.9280586937710992, "grad_norm": 2.0369629859924316, "learning_rate": 3.787042976266838e-07, "loss": 0.8072, "step": 76150 }, { "epoch": 0.928119629995247, "grad_norm": 1.7504541873931885, "learning_rate": 3.7838357921744716e-07, "loss": 0.868, "step": 76155 }, { "epoch": 0.9281805662193948, "grad_norm": 1.9158337116241455, "learning_rate": 3.780628608082104e-07, "loss": 0.8188, "step": 76160 }, { "epoch": 0.9282415024435426, "grad_norm": 1.988693118095398, "learning_rate": 3.777421423989738e-07, "loss": 0.7958, "step": 76165 }, { "epoch": 0.9283024386676904, "grad_norm": 1.860719919204712, "learning_rate": 3.7742142398973706e-07, "loss": 0.7807, "step": 76170 }, { "epoch": 0.9283633748918382, "grad_norm": 2.2833504676818848, "learning_rate": 3.771007055805004e-07, "loss": 0.8309, "step": 76175 }, { "epoch": 0.928424311115986, "grad_norm": 1.755960464477539, "learning_rate": 3.767799871712637e-07, "loss": 0.7369, "step": 76180 }, { "epoch": 0.9284852473401338, "grad_norm": 1.7437986135482788, "learning_rate": 3.76459268762027e-07, "loss": 0.7941, "step": 76185 }, { "epoch": 0.9285461835642816, "grad_norm": 2.0724122524261475, "learning_rate": 3.761385503527903e-07, "loss": 0.8246, "step": 76190 }, { "epoch": 0.9286071197884295, "grad_norm": 2.1004645824432373, "learning_rate": 3.7581783194355363e-07, "loss": 0.8058, "step": 76195 }, { "epoch": 0.9286680560125773, "grad_norm": 1.615864872932434, "learning_rate": 3.754971135343169e-07, "loss": 0.8736, "step": 76200 }, { "epoch": 0.928728992236725, "grad_norm": 2.2696962356567383, "learning_rate": 3.7517639512508024e-07, "loss": 0.8561, "step": 76205 }, { "epoch": 0.9287899284608728, "grad_norm": 2.228341817855835, "learning_rate": 3.748556767158435e-07, "loss": 0.754, "step": 76210 }, { "epoch": 0.9288508646850206, "grad_norm": 1.795390009880066, "learning_rate": 3.7453495830660686e-07, "loss": 0.7616, "step": 76215 }, { "epoch": 0.9289118009091685, "grad_norm": 2.0962116718292236, "learning_rate": 3.7421423989737014e-07, "loss": 0.8275, "step": 76220 }, { "epoch": 0.9289727371333163, "grad_norm": 1.6579076051712036, "learning_rate": 3.738935214881335e-07, "loss": 0.9906, "step": 76225 }, { "epoch": 0.9290336733574641, "grad_norm": 2.0143673419952393, "learning_rate": 3.7357280307889676e-07, "loss": 0.922, "step": 76230 }, { "epoch": 0.9290946095816118, "grad_norm": 1.792310357093811, "learning_rate": 3.732520846696601e-07, "loss": 0.8097, "step": 76235 }, { "epoch": 0.9291555458057597, "grad_norm": 2.3305654525756836, "learning_rate": 3.729313662604234e-07, "loss": 0.834, "step": 76240 }, { "epoch": 0.9292164820299075, "grad_norm": 1.7474030256271362, "learning_rate": 3.726106478511867e-07, "loss": 0.8002, "step": 76245 }, { "epoch": 0.9292774182540553, "grad_norm": 2.088197708129883, "learning_rate": 3.7228992944195e-07, "loss": 0.7623, "step": 76250 }, { "epoch": 0.9293383544782031, "grad_norm": 1.830268144607544, "learning_rate": 3.719692110327133e-07, "loss": 0.8244, "step": 76255 }, { "epoch": 0.9293992907023509, "grad_norm": 1.997010588645935, "learning_rate": 3.716484926234766e-07, "loss": 0.8001, "step": 76260 }, { "epoch": 0.9294602269264988, "grad_norm": 2.122148275375366, "learning_rate": 3.7132777421423994e-07, "loss": 0.7653, "step": 76265 }, { "epoch": 0.9295211631506465, "grad_norm": 2.1337499618530273, "learning_rate": 3.710070558050032e-07, "loss": 0.8145, "step": 76270 }, { "epoch": 0.9295820993747943, "grad_norm": 1.7534918785095215, "learning_rate": 3.7068633739576656e-07, "loss": 0.8538, "step": 76275 }, { "epoch": 0.9296430355989421, "grad_norm": 2.3442935943603516, "learning_rate": 3.7036561898652984e-07, "loss": 0.8343, "step": 76280 }, { "epoch": 0.92970397182309, "grad_norm": 1.7960219383239746, "learning_rate": 3.7004490057729317e-07, "loss": 0.7885, "step": 76285 }, { "epoch": 0.9297649080472378, "grad_norm": 2.3047142028808594, "learning_rate": 3.6972418216805645e-07, "loss": 0.8474, "step": 76290 }, { "epoch": 0.9298258442713856, "grad_norm": 1.9813470840454102, "learning_rate": 3.694034637588198e-07, "loss": 0.7622, "step": 76295 }, { "epoch": 0.9298867804955334, "grad_norm": 2.3894641399383545, "learning_rate": 3.6908274534958307e-07, "loss": 0.8427, "step": 76300 }, { "epoch": 0.9299477167196811, "grad_norm": 2.06846284866333, "learning_rate": 3.687620269403464e-07, "loss": 0.8007, "step": 76305 }, { "epoch": 0.930008652943829, "grad_norm": 1.9095382690429688, "learning_rate": 3.684413085311097e-07, "loss": 0.7988, "step": 76310 }, { "epoch": 0.9300695891679768, "grad_norm": 2.050675868988037, "learning_rate": 3.68120590121873e-07, "loss": 0.7827, "step": 76315 }, { "epoch": 0.9301305253921246, "grad_norm": 1.984241008758545, "learning_rate": 3.677998717126363e-07, "loss": 0.7654, "step": 76320 }, { "epoch": 0.9301914616162724, "grad_norm": 2.3398938179016113, "learning_rate": 3.6747915330339964e-07, "loss": 0.8776, "step": 76325 }, { "epoch": 0.9302523978404202, "grad_norm": 1.7190799713134766, "learning_rate": 3.671584348941629e-07, "loss": 0.7786, "step": 76330 }, { "epoch": 0.9303133340645681, "grad_norm": 1.760830283164978, "learning_rate": 3.6683771648492625e-07, "loss": 0.7661, "step": 76335 }, { "epoch": 0.9303742702887158, "grad_norm": 2.2750730514526367, "learning_rate": 3.6651699807568953e-07, "loss": 0.8532, "step": 76340 }, { "epoch": 0.9304352065128636, "grad_norm": 1.7348843812942505, "learning_rate": 3.661962796664529e-07, "loss": 0.8019, "step": 76345 }, { "epoch": 0.9304961427370114, "grad_norm": 1.9622182846069336, "learning_rate": 3.6587556125721615e-07, "loss": 0.8016, "step": 76350 }, { "epoch": 0.9305570789611592, "grad_norm": 1.731449007987976, "learning_rate": 3.6555484284797954e-07, "loss": 0.7878, "step": 76355 }, { "epoch": 0.9306180151853071, "grad_norm": 1.918312907218933, "learning_rate": 3.6523412443874276e-07, "loss": 0.8081, "step": 76360 }, { "epoch": 0.9306789514094549, "grad_norm": 2.2862558364868164, "learning_rate": 3.6491340602950615e-07, "loss": 0.804, "step": 76365 }, { "epoch": 0.9307398876336027, "grad_norm": 1.7972217798233032, "learning_rate": 3.645926876202694e-07, "loss": 0.8085, "step": 76370 }, { "epoch": 0.9308008238577504, "grad_norm": 1.9309113025665283, "learning_rate": 3.6427196921103277e-07, "loss": 0.8104, "step": 76375 }, { "epoch": 0.9308617600818982, "grad_norm": 1.879325032234192, "learning_rate": 3.6395125080179605e-07, "loss": 0.7997, "step": 76380 }, { "epoch": 0.9309226963060461, "grad_norm": 2.3999125957489014, "learning_rate": 3.636305323925594e-07, "loss": 0.8175, "step": 76385 }, { "epoch": 0.9309836325301939, "grad_norm": 1.828565001487732, "learning_rate": 3.6330981398332266e-07, "loss": 0.8183, "step": 76390 }, { "epoch": 0.9310445687543417, "grad_norm": 2.1869049072265625, "learning_rate": 3.62989095574086e-07, "loss": 0.878, "step": 76395 }, { "epoch": 0.9311055049784895, "grad_norm": 1.773796558380127, "learning_rate": 3.626683771648493e-07, "loss": 0.8217, "step": 76400 }, { "epoch": 0.9311664412026374, "grad_norm": 1.7106879949569702, "learning_rate": 3.623476587556126e-07, "loss": 0.7521, "step": 76405 }, { "epoch": 0.9312273774267851, "grad_norm": 2.891678810119629, "learning_rate": 3.620269403463759e-07, "loss": 0.7844, "step": 76410 }, { "epoch": 0.9312883136509329, "grad_norm": 1.90088951587677, "learning_rate": 3.6170622193713923e-07, "loss": 0.8392, "step": 76415 }, { "epoch": 0.9313492498750807, "grad_norm": 1.7288422584533691, "learning_rate": 3.613855035279025e-07, "loss": 0.8214, "step": 76420 }, { "epoch": 0.9314101860992285, "grad_norm": 1.8351426124572754, "learning_rate": 3.6106478511866585e-07, "loss": 0.8897, "step": 76425 }, { "epoch": 0.9314711223233764, "grad_norm": 1.846834659576416, "learning_rate": 3.6074406670942913e-07, "loss": 0.7997, "step": 76430 }, { "epoch": 0.9315320585475242, "grad_norm": 1.9771785736083984, "learning_rate": 3.6042334830019246e-07, "loss": 0.9109, "step": 76435 }, { "epoch": 0.931592994771672, "grad_norm": 2.058520555496216, "learning_rate": 3.601026298909558e-07, "loss": 0.7675, "step": 76440 }, { "epoch": 0.9316539309958197, "grad_norm": 2.1815359592437744, "learning_rate": 3.597819114817191e-07, "loss": 0.838, "step": 76445 }, { "epoch": 0.9317148672199675, "grad_norm": 1.8731625080108643, "learning_rate": 3.594611930724824e-07, "loss": 0.8784, "step": 76450 }, { "epoch": 0.9317758034441154, "grad_norm": 1.9395456314086914, "learning_rate": 3.591404746632457e-07, "loss": 0.8011, "step": 76455 }, { "epoch": 0.9318367396682632, "grad_norm": 2.1351685523986816, "learning_rate": 3.5881975625400903e-07, "loss": 0.8199, "step": 76460 }, { "epoch": 0.931897675892411, "grad_norm": 2.049577236175537, "learning_rate": 3.584990378447723e-07, "loss": 0.8711, "step": 76465 }, { "epoch": 0.9319586121165588, "grad_norm": 2.325597047805786, "learning_rate": 3.5817831943553564e-07, "loss": 0.8629, "step": 76470 }, { "epoch": 0.9320195483407067, "grad_norm": 1.7292108535766602, "learning_rate": 3.578576010262989e-07, "loss": 0.702, "step": 76475 }, { "epoch": 0.9320804845648544, "grad_norm": 2.334972858428955, "learning_rate": 3.5753688261706226e-07, "loss": 0.7773, "step": 76480 }, { "epoch": 0.9321414207890022, "grad_norm": 2.188910961151123, "learning_rate": 3.5721616420782554e-07, "loss": 0.8343, "step": 76485 }, { "epoch": 0.93220235701315, "grad_norm": 1.863599419593811, "learning_rate": 3.568954457985889e-07, "loss": 0.8072, "step": 76490 }, { "epoch": 0.9322632932372978, "grad_norm": 1.9436264038085938, "learning_rate": 3.5657472738935216e-07, "loss": 0.9041, "step": 76495 }, { "epoch": 0.9323242294614457, "grad_norm": 1.9914318323135376, "learning_rate": 3.562540089801155e-07, "loss": 0.8426, "step": 76500 }, { "epoch": 0.9323851656855935, "grad_norm": 2.095353841781616, "learning_rate": 3.559332905708788e-07, "loss": 0.8393, "step": 76505 }, { "epoch": 0.9324461019097413, "grad_norm": 1.900773525238037, "learning_rate": 3.556125721616421e-07, "loss": 0.7427, "step": 76510 }, { "epoch": 0.932507038133889, "grad_norm": 2.21036434173584, "learning_rate": 3.552918537524054e-07, "loss": 0.863, "step": 76515 }, { "epoch": 0.9325679743580368, "grad_norm": 2.017284870147705, "learning_rate": 3.549711353431688e-07, "loss": 0.7652, "step": 76520 }, { "epoch": 0.9326289105821847, "grad_norm": 1.9083441495895386, "learning_rate": 3.54650416933932e-07, "loss": 0.7654, "step": 76525 }, { "epoch": 0.9326898468063325, "grad_norm": 2.130815267562866, "learning_rate": 3.543296985246954e-07, "loss": 0.8631, "step": 76530 }, { "epoch": 0.9327507830304803, "grad_norm": 1.8779141902923584, "learning_rate": 3.540089801154586e-07, "loss": 0.817, "step": 76535 }, { "epoch": 0.9328117192546281, "grad_norm": 1.8510630130767822, "learning_rate": 3.53688261706222e-07, "loss": 0.8172, "step": 76540 }, { "epoch": 0.932872655478776, "grad_norm": 1.9322118759155273, "learning_rate": 3.5336754329698524e-07, "loss": 0.7938, "step": 76545 }, { "epoch": 0.9329335917029237, "grad_norm": 2.166414737701416, "learning_rate": 3.530468248877486e-07, "loss": 0.8226, "step": 76550 }, { "epoch": 0.9329945279270715, "grad_norm": 1.934618353843689, "learning_rate": 3.527261064785119e-07, "loss": 0.8468, "step": 76555 }, { "epoch": 0.9330554641512193, "grad_norm": 2.0763847827911377, "learning_rate": 3.5240538806927524e-07, "loss": 0.8133, "step": 76560 }, { "epoch": 0.9331164003753671, "grad_norm": 1.9943366050720215, "learning_rate": 3.520846696600385e-07, "loss": 0.779, "step": 76565 }, { "epoch": 0.933177336599515, "grad_norm": 3.1357977390289307, "learning_rate": 3.5176395125080186e-07, "loss": 0.8666, "step": 76570 }, { "epoch": 0.9332382728236628, "grad_norm": 2.198770761489868, "learning_rate": 3.5144323284156514e-07, "loss": 0.8338, "step": 76575 }, { "epoch": 0.9332992090478106, "grad_norm": 1.871566891670227, "learning_rate": 3.5112251443232847e-07, "loss": 0.8171, "step": 76580 }, { "epoch": 0.9333601452719583, "grad_norm": 1.8199496269226074, "learning_rate": 3.5080179602309175e-07, "loss": 0.8063, "step": 76585 }, { "epoch": 0.9334210814961061, "grad_norm": 2.146735429763794, "learning_rate": 3.504810776138551e-07, "loss": 0.7122, "step": 76590 }, { "epoch": 0.933482017720254, "grad_norm": 1.966945767402649, "learning_rate": 3.5016035920461837e-07, "loss": 0.7765, "step": 76595 }, { "epoch": 0.9335429539444018, "grad_norm": 1.625783920288086, "learning_rate": 3.498396407953817e-07, "loss": 0.8362, "step": 76600 }, { "epoch": 0.9336038901685496, "grad_norm": 1.89736807346344, "learning_rate": 3.49518922386145e-07, "loss": 0.89, "step": 76605 }, { "epoch": 0.9336648263926974, "grad_norm": 1.8512014150619507, "learning_rate": 3.491982039769083e-07, "loss": 0.8357, "step": 76610 }, { "epoch": 0.9337257626168453, "grad_norm": 1.9661198854446411, "learning_rate": 3.488774855676716e-07, "loss": 0.8196, "step": 76615 }, { "epoch": 0.933786698840993, "grad_norm": 1.9886119365692139, "learning_rate": 3.4855676715843494e-07, "loss": 0.7733, "step": 76620 }, { "epoch": 0.9338476350651408, "grad_norm": 1.9883402585983276, "learning_rate": 3.482360487491982e-07, "loss": 0.8675, "step": 76625 }, { "epoch": 0.9339085712892886, "grad_norm": 2.0546083450317383, "learning_rate": 3.4791533033996155e-07, "loss": 0.8212, "step": 76630 }, { "epoch": 0.9339695075134364, "grad_norm": 1.7615913152694702, "learning_rate": 3.4759461193072483e-07, "loss": 0.8099, "step": 76635 }, { "epoch": 0.9340304437375843, "grad_norm": 2.295501947402954, "learning_rate": 3.4727389352148817e-07, "loss": 0.8109, "step": 76640 }, { "epoch": 0.9340913799617321, "grad_norm": 2.084052801132202, "learning_rate": 3.4695317511225145e-07, "loss": 0.8563, "step": 76645 }, { "epoch": 0.9341523161858799, "grad_norm": 2.1521189212799072, "learning_rate": 3.466324567030148e-07, "loss": 0.8557, "step": 76650 }, { "epoch": 0.9342132524100276, "grad_norm": 2.187107563018799, "learning_rate": 3.4631173829377807e-07, "loss": 0.7385, "step": 76655 }, { "epoch": 0.9342741886341754, "grad_norm": 3.02571702003479, "learning_rate": 3.459910198845414e-07, "loss": 0.7702, "step": 76660 }, { "epoch": 0.9343351248583233, "grad_norm": 3.118072509765625, "learning_rate": 3.456703014753047e-07, "loss": 0.7899, "step": 76665 }, { "epoch": 0.9343960610824711, "grad_norm": 2.1448376178741455, "learning_rate": 3.45349583066068e-07, "loss": 0.8666, "step": 76670 }, { "epoch": 0.9344569973066189, "grad_norm": 1.858430027961731, "learning_rate": 3.450288646568313e-07, "loss": 0.7453, "step": 76675 }, { "epoch": 0.9345179335307667, "grad_norm": 1.8263593912124634, "learning_rate": 3.4470814624759463e-07, "loss": 0.7812, "step": 76680 }, { "epoch": 0.9345788697549146, "grad_norm": 2.014655590057373, "learning_rate": 3.443874278383579e-07, "loss": 0.7402, "step": 76685 }, { "epoch": 0.9346398059790623, "grad_norm": 2.499540090560913, "learning_rate": 3.4406670942912125e-07, "loss": 0.8672, "step": 76690 }, { "epoch": 0.9347007422032101, "grad_norm": 2.524806022644043, "learning_rate": 3.4374599101988453e-07, "loss": 0.8528, "step": 76695 }, { "epoch": 0.9347616784273579, "grad_norm": 2.042186737060547, "learning_rate": 3.4342527261064786e-07, "loss": 0.8492, "step": 76700 }, { "epoch": 0.9348226146515057, "grad_norm": 2.0587666034698486, "learning_rate": 3.4310455420141114e-07, "loss": 0.8315, "step": 76705 }, { "epoch": 0.9348835508756536, "grad_norm": 1.7763005495071411, "learning_rate": 3.427838357921745e-07, "loss": 0.8205, "step": 76710 }, { "epoch": 0.9349444870998014, "grad_norm": 2.1573450565338135, "learning_rate": 3.4246311738293776e-07, "loss": 0.8347, "step": 76715 }, { "epoch": 0.9350054233239492, "grad_norm": 2.251365900039673, "learning_rate": 3.421423989737011e-07, "loss": 0.766, "step": 76720 }, { "epoch": 0.9350663595480969, "grad_norm": 1.871543288230896, "learning_rate": 3.418216805644644e-07, "loss": 0.8038, "step": 76725 }, { "epoch": 0.9351272957722447, "grad_norm": 2.493738889694214, "learning_rate": 3.4150096215522776e-07, "loss": 0.8427, "step": 76730 }, { "epoch": 0.9351882319963926, "grad_norm": 2.0660319328308105, "learning_rate": 3.41180243745991e-07, "loss": 0.8062, "step": 76735 }, { "epoch": 0.9352491682205404, "grad_norm": 2.1059179306030273, "learning_rate": 3.408595253367544e-07, "loss": 0.7375, "step": 76740 }, { "epoch": 0.9353101044446882, "grad_norm": 1.7756140232086182, "learning_rate": 3.405388069275177e-07, "loss": 0.7885, "step": 76745 }, { "epoch": 0.935371040668836, "grad_norm": 2.2274692058563232, "learning_rate": 3.40218088518281e-07, "loss": 0.8121, "step": 76750 }, { "epoch": 0.9354319768929839, "grad_norm": 1.8769265413284302, "learning_rate": 3.3989737010904433e-07, "loss": 0.7842, "step": 76755 }, { "epoch": 0.9354929131171316, "grad_norm": 1.909977674484253, "learning_rate": 3.395766516998076e-07, "loss": 0.7757, "step": 76760 }, { "epoch": 0.9355538493412794, "grad_norm": 2.3778493404388428, "learning_rate": 3.3925593329057095e-07, "loss": 0.785, "step": 76765 }, { "epoch": 0.9356147855654272, "grad_norm": 1.7439160346984863, "learning_rate": 3.3893521488133423e-07, "loss": 0.7205, "step": 76770 }, { "epoch": 0.935675721789575, "grad_norm": 1.7974889278411865, "learning_rate": 3.3861449647209756e-07, "loss": 0.8399, "step": 76775 }, { "epoch": 0.9357366580137229, "grad_norm": 1.6740771532058716, "learning_rate": 3.3829377806286084e-07, "loss": 0.7969, "step": 76780 }, { "epoch": 0.9357975942378707, "grad_norm": 1.8843441009521484, "learning_rate": 3.379730596536242e-07, "loss": 0.8475, "step": 76785 }, { "epoch": 0.9358585304620185, "grad_norm": 1.9493073225021362, "learning_rate": 3.3765234124438746e-07, "loss": 0.8032, "step": 76790 }, { "epoch": 0.9359194666861662, "grad_norm": 1.7468374967575073, "learning_rate": 3.373316228351508e-07, "loss": 0.8394, "step": 76795 }, { "epoch": 0.935980402910314, "grad_norm": 3.1693716049194336, "learning_rate": 3.370109044259141e-07, "loss": 0.8217, "step": 76800 }, { "epoch": 0.9360413391344619, "grad_norm": 1.946218729019165, "learning_rate": 3.366901860166774e-07, "loss": 0.8697, "step": 76805 }, { "epoch": 0.9361022753586097, "grad_norm": 2.0406367778778076, "learning_rate": 3.363694676074407e-07, "loss": 0.8396, "step": 76810 }, { "epoch": 0.9361632115827575, "grad_norm": 2.3259174823760986, "learning_rate": 3.36048749198204e-07, "loss": 0.7766, "step": 76815 }, { "epoch": 0.9362241478069053, "grad_norm": 2.0140862464904785, "learning_rate": 3.357280307889673e-07, "loss": 0.7878, "step": 76820 }, { "epoch": 0.9362850840310531, "grad_norm": 1.7755558490753174, "learning_rate": 3.3540731237973064e-07, "loss": 0.8079, "step": 76825 }, { "epoch": 0.9363460202552009, "grad_norm": 1.742951512336731, "learning_rate": 3.350865939704939e-07, "loss": 0.8469, "step": 76830 }, { "epoch": 0.9364069564793487, "grad_norm": 2.348022699356079, "learning_rate": 3.3476587556125726e-07, "loss": 0.8661, "step": 76835 }, { "epoch": 0.9364678927034965, "grad_norm": 2.0440914630889893, "learning_rate": 3.3444515715202054e-07, "loss": 0.8535, "step": 76840 }, { "epoch": 0.9365288289276443, "grad_norm": 1.5776394605636597, "learning_rate": 3.3412443874278387e-07, "loss": 0.8615, "step": 76845 }, { "epoch": 0.9365897651517922, "grad_norm": 1.8452203273773193, "learning_rate": 3.3380372033354715e-07, "loss": 0.8011, "step": 76850 }, { "epoch": 0.93665070137594, "grad_norm": 1.8871759176254272, "learning_rate": 3.334830019243105e-07, "loss": 0.8369, "step": 76855 }, { "epoch": 0.9367116376000878, "grad_norm": 1.6261484622955322, "learning_rate": 3.3316228351507377e-07, "loss": 0.7703, "step": 76860 }, { "epoch": 0.9367725738242355, "grad_norm": 1.8995519876480103, "learning_rate": 3.328415651058371e-07, "loss": 0.8125, "step": 76865 }, { "epoch": 0.9368335100483833, "grad_norm": 2.1507723331451416, "learning_rate": 3.325208466966004e-07, "loss": 0.8184, "step": 76870 }, { "epoch": 0.9368944462725312, "grad_norm": 2.0473878383636475, "learning_rate": 3.322001282873637e-07, "loss": 0.7622, "step": 76875 }, { "epoch": 0.936955382496679, "grad_norm": 2.1720852851867676, "learning_rate": 3.31879409878127e-07, "loss": 0.7909, "step": 76880 }, { "epoch": 0.9370163187208268, "grad_norm": 2.089806318283081, "learning_rate": 3.3155869146889034e-07, "loss": 0.7301, "step": 76885 }, { "epoch": 0.9370772549449746, "grad_norm": 1.8728872537612915, "learning_rate": 3.312379730596536e-07, "loss": 0.8201, "step": 76890 }, { "epoch": 0.9371381911691224, "grad_norm": 1.9232224225997925, "learning_rate": 3.3091725465041695e-07, "loss": 0.8222, "step": 76895 }, { "epoch": 0.9371991273932702, "grad_norm": 1.9084476232528687, "learning_rate": 3.3059653624118023e-07, "loss": 0.8025, "step": 76900 }, { "epoch": 0.937260063617418, "grad_norm": 1.778679370880127, "learning_rate": 3.302758178319436e-07, "loss": 0.7781, "step": 76905 }, { "epoch": 0.9373209998415658, "grad_norm": 2.849432945251465, "learning_rate": 3.2995509942270685e-07, "loss": 0.7805, "step": 76910 }, { "epoch": 0.9373819360657136, "grad_norm": 2.06693172454834, "learning_rate": 3.2963438101347024e-07, "loss": 0.8105, "step": 76915 }, { "epoch": 0.9374428722898615, "grad_norm": 1.9048430919647217, "learning_rate": 3.2931366260423347e-07, "loss": 0.7953, "step": 76920 }, { "epoch": 0.9375038085140093, "grad_norm": 2.2121121883392334, "learning_rate": 3.2899294419499685e-07, "loss": 0.8133, "step": 76925 }, { "epoch": 0.9375647447381571, "grad_norm": 1.9481276273727417, "learning_rate": 3.286722257857601e-07, "loss": 0.7473, "step": 76930 }, { "epoch": 0.9376256809623048, "grad_norm": 1.7307862043380737, "learning_rate": 3.2835150737652347e-07, "loss": 0.791, "step": 76935 }, { "epoch": 0.9376866171864526, "grad_norm": 1.923620581626892, "learning_rate": 3.2803078896728675e-07, "loss": 0.8978, "step": 76940 }, { "epoch": 0.9377475534106005, "grad_norm": 1.7937695980072021, "learning_rate": 3.277100705580501e-07, "loss": 0.766, "step": 76945 }, { "epoch": 0.9378084896347483, "grad_norm": 1.9397227764129639, "learning_rate": 3.2738935214881337e-07, "loss": 0.789, "step": 76950 }, { "epoch": 0.9378694258588961, "grad_norm": 1.80814790725708, "learning_rate": 3.270686337395767e-07, "loss": 0.7696, "step": 76955 }, { "epoch": 0.9379303620830439, "grad_norm": 2.1125845909118652, "learning_rate": 3.2674791533034e-07, "loss": 0.7505, "step": 76960 }, { "epoch": 0.9379912983071917, "grad_norm": 1.8526394367218018, "learning_rate": 3.264271969211033e-07, "loss": 0.8298, "step": 76965 }, { "epoch": 0.9380522345313395, "grad_norm": 1.8186225891113281, "learning_rate": 3.261064785118666e-07, "loss": 0.8299, "step": 76970 }, { "epoch": 0.9381131707554873, "grad_norm": 2.004887819290161, "learning_rate": 3.2578576010262993e-07, "loss": 0.7636, "step": 76975 }, { "epoch": 0.9381741069796351, "grad_norm": 1.7809187173843384, "learning_rate": 3.254650416933932e-07, "loss": 0.7747, "step": 76980 }, { "epoch": 0.9382350432037829, "grad_norm": 1.9736199378967285, "learning_rate": 3.2514432328415655e-07, "loss": 0.782, "step": 76985 }, { "epoch": 0.9382959794279307, "grad_norm": 1.8334604501724243, "learning_rate": 3.2482360487491983e-07, "loss": 0.8425, "step": 76990 }, { "epoch": 0.9383569156520786, "grad_norm": 1.8105754852294922, "learning_rate": 3.2450288646568316e-07, "loss": 0.8367, "step": 76995 }, { "epoch": 0.9384178518762264, "grad_norm": 1.8712267875671387, "learning_rate": 3.2418216805644645e-07, "loss": 0.7369, "step": 77000 }, { "epoch": 0.9384787881003741, "grad_norm": 1.9474725723266602, "learning_rate": 3.238614496472098e-07, "loss": 0.8662, "step": 77005 }, { "epoch": 0.9385397243245219, "grad_norm": 1.858728051185608, "learning_rate": 3.2354073123797306e-07, "loss": 0.8582, "step": 77010 }, { "epoch": 0.9386006605486698, "grad_norm": 2.195568084716797, "learning_rate": 3.232200128287364e-07, "loss": 0.8007, "step": 77015 }, { "epoch": 0.9386615967728176, "grad_norm": 2.0249714851379395, "learning_rate": 3.228992944194997e-07, "loss": 0.8504, "step": 77020 }, { "epoch": 0.9387225329969654, "grad_norm": 2.1498706340789795, "learning_rate": 3.22578576010263e-07, "loss": 0.8595, "step": 77025 }, { "epoch": 0.9387834692211132, "grad_norm": 1.9256538152694702, "learning_rate": 3.222578576010263e-07, "loss": 0.7528, "step": 77030 }, { "epoch": 0.938844405445261, "grad_norm": 2.0906336307525635, "learning_rate": 3.2193713919178963e-07, "loss": 0.8169, "step": 77035 }, { "epoch": 0.9389053416694088, "grad_norm": 2.1712491512298584, "learning_rate": 3.2161642078255296e-07, "loss": 0.8045, "step": 77040 }, { "epoch": 0.9389662778935566, "grad_norm": 2.0491127967834473, "learning_rate": 3.2129570237331624e-07, "loss": 0.7415, "step": 77045 }, { "epoch": 0.9390272141177044, "grad_norm": 2.0435497760772705, "learning_rate": 3.209749839640796e-07, "loss": 0.739, "step": 77050 }, { "epoch": 0.9390881503418522, "grad_norm": 1.9886027574539185, "learning_rate": 3.2065426555484286e-07, "loss": 0.7858, "step": 77055 }, { "epoch": 0.939149086566, "grad_norm": 2.0498194694519043, "learning_rate": 3.203335471456062e-07, "loss": 0.8264, "step": 77060 }, { "epoch": 0.9392100227901479, "grad_norm": 1.8442678451538086, "learning_rate": 3.200128287363695e-07, "loss": 0.7705, "step": 77065 }, { "epoch": 0.9392709590142957, "grad_norm": 2.03926944732666, "learning_rate": 3.196921103271328e-07, "loss": 0.796, "step": 77070 }, { "epoch": 0.9393318952384434, "grad_norm": 1.8455036878585815, "learning_rate": 3.193713919178961e-07, "loss": 0.8242, "step": 77075 }, { "epoch": 0.9393928314625912, "grad_norm": 1.9898450374603271, "learning_rate": 3.190506735086595e-07, "loss": 0.8155, "step": 77080 }, { "epoch": 0.939453767686739, "grad_norm": 1.8518620729446411, "learning_rate": 3.187299550994227e-07, "loss": 0.7831, "step": 77085 }, { "epoch": 0.9395147039108869, "grad_norm": 1.9835684299468994, "learning_rate": 3.184092366901861e-07, "loss": 0.8415, "step": 77090 }, { "epoch": 0.9395756401350347, "grad_norm": 1.892918586730957, "learning_rate": 3.180885182809493e-07, "loss": 0.7705, "step": 77095 }, { "epoch": 0.9396365763591825, "grad_norm": 1.9037483930587769, "learning_rate": 3.177677998717127e-07, "loss": 0.8004, "step": 77100 }, { "epoch": 0.9396975125833303, "grad_norm": 1.940479040145874, "learning_rate": 3.1744708146247594e-07, "loss": 0.8147, "step": 77105 }, { "epoch": 0.939758448807478, "grad_norm": 2.2196810245513916, "learning_rate": 3.171263630532393e-07, "loss": 0.8093, "step": 77110 }, { "epoch": 0.9398193850316259, "grad_norm": 1.6931735277175903, "learning_rate": 3.168056446440026e-07, "loss": 0.7823, "step": 77115 }, { "epoch": 0.9398803212557737, "grad_norm": 2.038499116897583, "learning_rate": 3.1648492623476594e-07, "loss": 0.8628, "step": 77120 }, { "epoch": 0.9399412574799215, "grad_norm": 2.1372170448303223, "learning_rate": 3.161642078255292e-07, "loss": 0.8281, "step": 77125 }, { "epoch": 0.9400021937040693, "grad_norm": 2.05470609664917, "learning_rate": 3.1584348941629256e-07, "loss": 0.8085, "step": 77130 }, { "epoch": 0.9400631299282172, "grad_norm": 2.024188756942749, "learning_rate": 3.1552277100705584e-07, "loss": 0.8144, "step": 77135 }, { "epoch": 0.940124066152365, "grad_norm": 2.2806575298309326, "learning_rate": 3.152020525978192e-07, "loss": 0.7818, "step": 77140 }, { "epoch": 0.9401850023765127, "grad_norm": 1.8063205480575562, "learning_rate": 3.1488133418858246e-07, "loss": 0.8482, "step": 77145 }, { "epoch": 0.9402459386006605, "grad_norm": 1.9444029331207275, "learning_rate": 3.145606157793458e-07, "loss": 0.774, "step": 77150 }, { "epoch": 0.9403068748248083, "grad_norm": 2.367143392562866, "learning_rate": 3.1423989737010907e-07, "loss": 0.8567, "step": 77155 }, { "epoch": 0.9403678110489562, "grad_norm": 2.3680922985076904, "learning_rate": 3.139191789608724e-07, "loss": 0.8122, "step": 77160 }, { "epoch": 0.940428747273104, "grad_norm": 1.9717650413513184, "learning_rate": 3.135984605516357e-07, "loss": 0.7702, "step": 77165 }, { "epoch": 0.9404896834972518, "grad_norm": 1.872404932975769, "learning_rate": 3.13277742142399e-07, "loss": 0.7875, "step": 77170 }, { "epoch": 0.9405506197213995, "grad_norm": 1.9987727403640747, "learning_rate": 3.129570237331623e-07, "loss": 0.7447, "step": 77175 }, { "epoch": 0.9406115559455474, "grad_norm": 1.5360252857208252, "learning_rate": 3.1263630532392564e-07, "loss": 0.8195, "step": 77180 }, { "epoch": 0.9406724921696952, "grad_norm": 1.8379236459732056, "learning_rate": 3.123155869146889e-07, "loss": 0.7872, "step": 77185 }, { "epoch": 0.940733428393843, "grad_norm": 2.1205995082855225, "learning_rate": 3.1199486850545225e-07, "loss": 0.819, "step": 77190 }, { "epoch": 0.9407943646179908, "grad_norm": 1.8139276504516602, "learning_rate": 3.116741500962156e-07, "loss": 0.7728, "step": 77195 }, { "epoch": 0.9408553008421386, "grad_norm": 1.8580071926116943, "learning_rate": 3.1135343168697887e-07, "loss": 0.7729, "step": 77200 }, { "epoch": 0.9409162370662865, "grad_norm": 2.1242265701293945, "learning_rate": 3.110327132777422e-07, "loss": 0.8123, "step": 77205 }, { "epoch": 0.9409771732904342, "grad_norm": 2.0764894485473633, "learning_rate": 3.107119948685055e-07, "loss": 0.7666, "step": 77210 }, { "epoch": 0.941038109514582, "grad_norm": 1.8667429685592651, "learning_rate": 3.103912764592688e-07, "loss": 0.8952, "step": 77215 }, { "epoch": 0.9410990457387298, "grad_norm": 2.4911673069000244, "learning_rate": 3.100705580500321e-07, "loss": 0.7844, "step": 77220 }, { "epoch": 0.9411599819628776, "grad_norm": 1.76597261428833, "learning_rate": 3.0974983964079544e-07, "loss": 0.7402, "step": 77225 }, { "epoch": 0.9412209181870255, "grad_norm": 1.8227499723434448, "learning_rate": 3.094291212315587e-07, "loss": 0.7785, "step": 77230 }, { "epoch": 0.9412818544111733, "grad_norm": 2.062108039855957, "learning_rate": 3.0910840282232205e-07, "loss": 0.7595, "step": 77235 }, { "epoch": 0.9413427906353211, "grad_norm": 1.7221869230270386, "learning_rate": 3.0878768441308533e-07, "loss": 0.7771, "step": 77240 }, { "epoch": 0.9414037268594688, "grad_norm": 1.7308638095855713, "learning_rate": 3.0846696600384867e-07, "loss": 0.7966, "step": 77245 }, { "epoch": 0.9414646630836166, "grad_norm": 2.1462113857269287, "learning_rate": 3.0814624759461195e-07, "loss": 0.8268, "step": 77250 }, { "epoch": 0.9415255993077645, "grad_norm": 2.268169641494751, "learning_rate": 3.078255291853753e-07, "loss": 0.875, "step": 77255 }, { "epoch": 0.9415865355319123, "grad_norm": 1.8615291118621826, "learning_rate": 3.0750481077613856e-07, "loss": 0.7965, "step": 77260 }, { "epoch": 0.9416474717560601, "grad_norm": 1.954195261001587, "learning_rate": 3.071840923669019e-07, "loss": 0.7655, "step": 77265 }, { "epoch": 0.9417084079802079, "grad_norm": 1.8782315254211426, "learning_rate": 3.068633739576652e-07, "loss": 0.7728, "step": 77270 }, { "epoch": 0.9417693442043558, "grad_norm": 1.9483190774917603, "learning_rate": 3.065426555484285e-07, "loss": 0.7578, "step": 77275 }, { "epoch": 0.9418302804285035, "grad_norm": 1.928310513496399, "learning_rate": 3.062219371391918e-07, "loss": 0.8133, "step": 77280 }, { "epoch": 0.9418912166526513, "grad_norm": 2.007952928543091, "learning_rate": 3.0590121872995513e-07, "loss": 0.8609, "step": 77285 }, { "epoch": 0.9419521528767991, "grad_norm": 1.7135839462280273, "learning_rate": 3.0558050032071847e-07, "loss": 0.8437, "step": 77290 }, { "epoch": 0.9420130891009469, "grad_norm": 1.9330778121948242, "learning_rate": 3.0525978191148175e-07, "loss": 0.7715, "step": 77295 }, { "epoch": 0.9420740253250948, "grad_norm": 2.0585389137268066, "learning_rate": 3.049390635022451e-07, "loss": 0.7719, "step": 77300 }, { "epoch": 0.9421349615492426, "grad_norm": 1.887389898300171, "learning_rate": 3.0461834509300836e-07, "loss": 0.8238, "step": 77305 }, { "epoch": 0.9421958977733904, "grad_norm": 2.0025293827056885, "learning_rate": 3.042976266837717e-07, "loss": 0.8749, "step": 77310 }, { "epoch": 0.9422568339975381, "grad_norm": 1.744637370109558, "learning_rate": 3.03976908274535e-07, "loss": 0.7801, "step": 77315 }, { "epoch": 0.942317770221686, "grad_norm": 2.328941822052002, "learning_rate": 3.036561898652983e-07, "loss": 0.8536, "step": 77320 }, { "epoch": 0.9423787064458338, "grad_norm": 1.7541319131851196, "learning_rate": 3.033354714560616e-07, "loss": 0.8031, "step": 77325 }, { "epoch": 0.9424396426699816, "grad_norm": 1.82346510887146, "learning_rate": 3.0301475304682493e-07, "loss": 0.8594, "step": 77330 }, { "epoch": 0.9425005788941294, "grad_norm": 1.9906858205795288, "learning_rate": 3.026940346375882e-07, "loss": 0.8365, "step": 77335 }, { "epoch": 0.9425615151182772, "grad_norm": 2.0561330318450928, "learning_rate": 3.0237331622835154e-07, "loss": 0.7613, "step": 77340 }, { "epoch": 0.9426224513424251, "grad_norm": 2.1059162616729736, "learning_rate": 3.020525978191148e-07, "loss": 0.8068, "step": 77345 }, { "epoch": 0.9426833875665728, "grad_norm": 2.2138113975524902, "learning_rate": 3.0173187940987816e-07, "loss": 0.7716, "step": 77350 }, { "epoch": 0.9427443237907206, "grad_norm": 2.522611618041992, "learning_rate": 3.0141116100064144e-07, "loss": 0.8431, "step": 77355 }, { "epoch": 0.9428052600148684, "grad_norm": 1.7593984603881836, "learning_rate": 3.010904425914048e-07, "loss": 0.8175, "step": 77360 }, { "epoch": 0.9428661962390162, "grad_norm": 2.032827615737915, "learning_rate": 3.0076972418216806e-07, "loss": 0.8455, "step": 77365 }, { "epoch": 0.9429271324631641, "grad_norm": 2.0135135650634766, "learning_rate": 3.004490057729314e-07, "loss": 0.8149, "step": 77370 }, { "epoch": 0.9429880686873119, "grad_norm": 2.2185006141662598, "learning_rate": 3.001282873636947e-07, "loss": 0.8223, "step": 77375 }, { "epoch": 0.9430490049114597, "grad_norm": 1.9966390132904053, "learning_rate": 2.99807568954458e-07, "loss": 0.8457, "step": 77380 }, { "epoch": 0.9431099411356074, "grad_norm": 1.9923553466796875, "learning_rate": 2.994868505452213e-07, "loss": 0.7904, "step": 77385 }, { "epoch": 0.9431708773597552, "grad_norm": 1.9970954656600952, "learning_rate": 2.991661321359846e-07, "loss": 0.7372, "step": 77390 }, { "epoch": 0.9432318135839031, "grad_norm": 1.6312227249145508, "learning_rate": 2.9884541372674796e-07, "loss": 0.7988, "step": 77395 }, { "epoch": 0.9432927498080509, "grad_norm": 2.0093095302581787, "learning_rate": 2.9852469531751124e-07, "loss": 0.8966, "step": 77400 }, { "epoch": 0.9433536860321987, "grad_norm": 2.1491482257843018, "learning_rate": 2.982039769082746e-07, "loss": 0.8338, "step": 77405 }, { "epoch": 0.9434146222563465, "grad_norm": 1.7882511615753174, "learning_rate": 2.9788325849903786e-07, "loss": 0.7647, "step": 77410 }, { "epoch": 0.9434755584804944, "grad_norm": 2.0920193195343018, "learning_rate": 2.975625400898012e-07, "loss": 0.786, "step": 77415 }, { "epoch": 0.9435364947046421, "grad_norm": 2.1589550971984863, "learning_rate": 2.9724182168056447e-07, "loss": 0.759, "step": 77420 }, { "epoch": 0.9435974309287899, "grad_norm": 1.9238770008087158, "learning_rate": 2.969211032713278e-07, "loss": 0.7917, "step": 77425 }, { "epoch": 0.9436583671529377, "grad_norm": 2.0155866146087646, "learning_rate": 2.966003848620911e-07, "loss": 0.8544, "step": 77430 }, { "epoch": 0.9437193033770855, "grad_norm": 2.06673526763916, "learning_rate": 2.962796664528544e-07, "loss": 0.7716, "step": 77435 }, { "epoch": 0.9437802396012334, "grad_norm": 1.9524915218353271, "learning_rate": 2.959589480436177e-07, "loss": 0.7546, "step": 77440 }, { "epoch": 0.9438411758253812, "grad_norm": 1.721688151359558, "learning_rate": 2.9563822963438104e-07, "loss": 0.7984, "step": 77445 }, { "epoch": 0.943902112049529, "grad_norm": 1.9221166372299194, "learning_rate": 2.953175112251443e-07, "loss": 0.7254, "step": 77450 }, { "epoch": 0.9439630482736767, "grad_norm": 1.9040980339050293, "learning_rate": 2.9499679281590765e-07, "loss": 0.7518, "step": 77455 }, { "epoch": 0.9440239844978245, "grad_norm": 2.0456693172454834, "learning_rate": 2.9467607440667094e-07, "loss": 0.8142, "step": 77460 }, { "epoch": 0.9440849207219724, "grad_norm": 1.8393912315368652, "learning_rate": 2.9435535599743427e-07, "loss": 0.83, "step": 77465 }, { "epoch": 0.9441458569461202, "grad_norm": 1.668052077293396, "learning_rate": 2.9403463758819755e-07, "loss": 0.71, "step": 77470 }, { "epoch": 0.944206793170268, "grad_norm": 1.8811205625534058, "learning_rate": 2.937139191789609e-07, "loss": 0.8032, "step": 77475 }, { "epoch": 0.9442677293944158, "grad_norm": 1.9268537759780884, "learning_rate": 2.9339320076972417e-07, "loss": 0.7791, "step": 77480 }, { "epoch": 0.9443286656185637, "grad_norm": 2.0619661808013916, "learning_rate": 2.930724823604875e-07, "loss": 0.8025, "step": 77485 }, { "epoch": 0.9443896018427114, "grad_norm": 1.9582719802856445, "learning_rate": 2.927517639512508e-07, "loss": 0.7619, "step": 77490 }, { "epoch": 0.9444505380668592, "grad_norm": 2.1336593627929688, "learning_rate": 2.9243104554201417e-07, "loss": 0.8264, "step": 77495 }, { "epoch": 0.944511474291007, "grad_norm": 2.228020429611206, "learning_rate": 2.9211032713277745e-07, "loss": 0.7971, "step": 77500 }, { "epoch": 0.9445724105151548, "grad_norm": 1.6075223684310913, "learning_rate": 2.917896087235408e-07, "loss": 0.8058, "step": 77505 }, { "epoch": 0.9446333467393027, "grad_norm": 1.6241095066070557, "learning_rate": 2.9146889031430407e-07, "loss": 0.8282, "step": 77510 }, { "epoch": 0.9446942829634505, "grad_norm": 1.9540677070617676, "learning_rate": 2.911481719050674e-07, "loss": 0.8128, "step": 77515 }, { "epoch": 0.9447552191875983, "grad_norm": 2.8335254192352295, "learning_rate": 2.908274534958307e-07, "loss": 0.8133, "step": 77520 }, { "epoch": 0.944816155411746, "grad_norm": 1.6878392696380615, "learning_rate": 2.90506735086594e-07, "loss": 0.7532, "step": 77525 }, { "epoch": 0.9448770916358938, "grad_norm": 2.270623207092285, "learning_rate": 2.901860166773573e-07, "loss": 0.8925, "step": 77530 }, { "epoch": 0.9449380278600417, "grad_norm": 1.9368095397949219, "learning_rate": 2.8986529826812063e-07, "loss": 0.8183, "step": 77535 }, { "epoch": 0.9449989640841895, "grad_norm": 1.8027153015136719, "learning_rate": 2.895445798588839e-07, "loss": 0.7902, "step": 77540 }, { "epoch": 0.9450599003083373, "grad_norm": 2.0491485595703125, "learning_rate": 2.8922386144964725e-07, "loss": 0.8411, "step": 77545 }, { "epoch": 0.9451208365324851, "grad_norm": 1.889041543006897, "learning_rate": 2.8890314304041053e-07, "loss": 0.7655, "step": 77550 }, { "epoch": 0.945181772756633, "grad_norm": 2.2023098468780518, "learning_rate": 2.8858242463117387e-07, "loss": 0.8178, "step": 77555 }, { "epoch": 0.9452427089807807, "grad_norm": 1.9842544794082642, "learning_rate": 2.8826170622193715e-07, "loss": 0.7822, "step": 77560 }, { "epoch": 0.9453036452049285, "grad_norm": 2.0912435054779053, "learning_rate": 2.879409878127005e-07, "loss": 0.7828, "step": 77565 }, { "epoch": 0.9453645814290763, "grad_norm": 1.7505695819854736, "learning_rate": 2.876202694034638e-07, "loss": 0.8015, "step": 77570 }, { "epoch": 0.9454255176532241, "grad_norm": 1.7124394178390503, "learning_rate": 2.872995509942271e-07, "loss": 0.8054, "step": 77575 }, { "epoch": 0.945486453877372, "grad_norm": 1.793617606163025, "learning_rate": 2.8697883258499043e-07, "loss": 0.7932, "step": 77580 }, { "epoch": 0.9455473901015198, "grad_norm": 1.953795075416565, "learning_rate": 2.866581141757537e-07, "loss": 0.8704, "step": 77585 }, { "epoch": 0.9456083263256676, "grad_norm": 1.9487814903259277, "learning_rate": 2.8633739576651705e-07, "loss": 0.8888, "step": 77590 }, { "epoch": 0.9456692625498153, "grad_norm": 2.061133861541748, "learning_rate": 2.8601667735728033e-07, "loss": 0.8324, "step": 77595 }, { "epoch": 0.9457301987739631, "grad_norm": 1.9748562574386597, "learning_rate": 2.8569595894804366e-07, "loss": 0.8879, "step": 77600 }, { "epoch": 0.945791134998111, "grad_norm": 1.7783626317977905, "learning_rate": 2.8537524053880695e-07, "loss": 0.8181, "step": 77605 }, { "epoch": 0.9458520712222588, "grad_norm": 1.8925068378448486, "learning_rate": 2.850545221295703e-07, "loss": 0.7996, "step": 77610 }, { "epoch": 0.9459130074464066, "grad_norm": 2.247637987136841, "learning_rate": 2.8473380372033356e-07, "loss": 0.9075, "step": 77615 }, { "epoch": 0.9459739436705544, "grad_norm": 2.065246820449829, "learning_rate": 2.844130853110969e-07, "loss": 0.8227, "step": 77620 }, { "epoch": 0.9460348798947023, "grad_norm": 2.3162407875061035, "learning_rate": 2.840923669018602e-07, "loss": 0.7987, "step": 77625 }, { "epoch": 0.94609581611885, "grad_norm": 1.9400087594985962, "learning_rate": 2.837716484926235e-07, "loss": 0.9146, "step": 77630 }, { "epoch": 0.9461567523429978, "grad_norm": 2.0038037300109863, "learning_rate": 2.834509300833868e-07, "loss": 0.8051, "step": 77635 }, { "epoch": 0.9462176885671456, "grad_norm": 1.7111647129058838, "learning_rate": 2.8313021167415013e-07, "loss": 0.8169, "step": 77640 }, { "epoch": 0.9462786247912934, "grad_norm": 1.8625576496124268, "learning_rate": 2.828094932649134e-07, "loss": 0.8013, "step": 77645 }, { "epoch": 0.9463395610154413, "grad_norm": 1.9097909927368164, "learning_rate": 2.8248877485567674e-07, "loss": 0.7743, "step": 77650 }, { "epoch": 0.9464004972395891, "grad_norm": 1.9249913692474365, "learning_rate": 2.8216805644644e-07, "loss": 0.7854, "step": 77655 }, { "epoch": 0.9464614334637369, "grad_norm": 1.9011574983596802, "learning_rate": 2.8184733803720336e-07, "loss": 0.8014, "step": 77660 }, { "epoch": 0.9465223696878846, "grad_norm": 1.8869997262954712, "learning_rate": 2.8152661962796664e-07, "loss": 0.7891, "step": 77665 }, { "epoch": 0.9465833059120324, "grad_norm": 1.8754215240478516, "learning_rate": 2.8120590121873e-07, "loss": 0.8671, "step": 77670 }, { "epoch": 0.9466442421361803, "grad_norm": 1.7404663562774658, "learning_rate": 2.808851828094933e-07, "loss": 0.7665, "step": 77675 }, { "epoch": 0.9467051783603281, "grad_norm": 2.5415420532226562, "learning_rate": 2.805644644002566e-07, "loss": 0.7994, "step": 77680 }, { "epoch": 0.9467661145844759, "grad_norm": 1.9444986581802368, "learning_rate": 2.802437459910199e-07, "loss": 0.8496, "step": 77685 }, { "epoch": 0.9468270508086237, "grad_norm": 1.8659049272537231, "learning_rate": 2.799230275817832e-07, "loss": 0.8342, "step": 77690 }, { "epoch": 0.9468879870327715, "grad_norm": 2.0085344314575195, "learning_rate": 2.7960230917254654e-07, "loss": 0.7968, "step": 77695 }, { "epoch": 0.9469489232569193, "grad_norm": 2.2317519187927246, "learning_rate": 2.792815907633098e-07, "loss": 0.8214, "step": 77700 }, { "epoch": 0.9470098594810671, "grad_norm": 2.1846859455108643, "learning_rate": 2.7896087235407316e-07, "loss": 0.9234, "step": 77705 }, { "epoch": 0.9470707957052149, "grad_norm": 1.9303210973739624, "learning_rate": 2.7864015394483644e-07, "loss": 0.8523, "step": 77710 }, { "epoch": 0.9471317319293627, "grad_norm": 1.916250467300415, "learning_rate": 2.7831943553559977e-07, "loss": 0.8648, "step": 77715 }, { "epoch": 0.9471926681535106, "grad_norm": 2.0966150760650635, "learning_rate": 2.7799871712636305e-07, "loss": 0.7361, "step": 77720 }, { "epoch": 0.9472536043776584, "grad_norm": 2.0700201988220215, "learning_rate": 2.776779987171264e-07, "loss": 0.7477, "step": 77725 }, { "epoch": 0.9473145406018062, "grad_norm": 1.9894452095031738, "learning_rate": 2.7735728030788967e-07, "loss": 0.8404, "step": 77730 }, { "epoch": 0.9473754768259539, "grad_norm": 1.805267095565796, "learning_rate": 2.77036561898653e-07, "loss": 0.8255, "step": 77735 }, { "epoch": 0.9474364130501017, "grad_norm": 1.820562720298767, "learning_rate": 2.767158434894163e-07, "loss": 0.8697, "step": 77740 }, { "epoch": 0.9474973492742496, "grad_norm": 2.1080667972564697, "learning_rate": 2.763951250801796e-07, "loss": 0.8166, "step": 77745 }, { "epoch": 0.9475582854983974, "grad_norm": 2.0288748741149902, "learning_rate": 2.760744066709429e-07, "loss": 0.7933, "step": 77750 }, { "epoch": 0.9476192217225452, "grad_norm": 2.1912918090820312, "learning_rate": 2.7575368826170624e-07, "loss": 0.9222, "step": 77755 }, { "epoch": 0.947680157946693, "grad_norm": 2.002102851867676, "learning_rate": 2.754329698524695e-07, "loss": 0.8376, "step": 77760 }, { "epoch": 0.9477410941708408, "grad_norm": 2.0183777809143066, "learning_rate": 2.7511225144323285e-07, "loss": 0.8496, "step": 77765 }, { "epoch": 0.9478020303949886, "grad_norm": 1.7164629697799683, "learning_rate": 2.7479153303399613e-07, "loss": 0.8165, "step": 77770 }, { "epoch": 0.9478629666191364, "grad_norm": 2.1932320594787598, "learning_rate": 2.7447081462475947e-07, "loss": 0.8059, "step": 77775 }, { "epoch": 0.9479239028432842, "grad_norm": 2.036982774734497, "learning_rate": 2.741500962155228e-07, "loss": 0.8089, "step": 77780 }, { "epoch": 0.947984839067432, "grad_norm": 2.134462594985962, "learning_rate": 2.738293778062861e-07, "loss": 0.7867, "step": 77785 }, { "epoch": 0.9480457752915799, "grad_norm": 1.9036283493041992, "learning_rate": 2.735086593970494e-07, "loss": 0.742, "step": 77790 }, { "epoch": 0.9481067115157277, "grad_norm": 1.8873852491378784, "learning_rate": 2.7318794098781275e-07, "loss": 0.7678, "step": 77795 }, { "epoch": 0.9481676477398755, "grad_norm": 2.055891990661621, "learning_rate": 2.7286722257857603e-07, "loss": 0.8162, "step": 77800 }, { "epoch": 0.9482285839640232, "grad_norm": 1.9052062034606934, "learning_rate": 2.7254650416933937e-07, "loss": 0.8732, "step": 77805 }, { "epoch": 0.948289520188171, "grad_norm": 1.9838266372680664, "learning_rate": 2.7222578576010265e-07, "loss": 0.7927, "step": 77810 }, { "epoch": 0.9483504564123189, "grad_norm": 2.0810322761535645, "learning_rate": 2.71905067350866e-07, "loss": 0.8497, "step": 77815 }, { "epoch": 0.9484113926364667, "grad_norm": 2.0119292736053467, "learning_rate": 2.7158434894162927e-07, "loss": 0.8256, "step": 77820 }, { "epoch": 0.9484723288606145, "grad_norm": 2.0934865474700928, "learning_rate": 2.712636305323926e-07, "loss": 0.7512, "step": 77825 }, { "epoch": 0.9485332650847623, "grad_norm": 2.3165035247802734, "learning_rate": 2.709429121231559e-07, "loss": 0.83, "step": 77830 }, { "epoch": 0.9485942013089101, "grad_norm": 1.7984663248062134, "learning_rate": 2.706221937139192e-07, "loss": 0.8408, "step": 77835 }, { "epoch": 0.9486551375330579, "grad_norm": 1.8665399551391602, "learning_rate": 2.703014753046825e-07, "loss": 0.8111, "step": 77840 }, { "epoch": 0.9487160737572057, "grad_norm": 1.9887809753417969, "learning_rate": 2.6998075689544583e-07, "loss": 0.7643, "step": 77845 }, { "epoch": 0.9487770099813535, "grad_norm": 1.8392796516418457, "learning_rate": 2.6966003848620917e-07, "loss": 0.8657, "step": 77850 }, { "epoch": 0.9488379462055013, "grad_norm": 1.9304896593093872, "learning_rate": 2.6933932007697245e-07, "loss": 0.7923, "step": 77855 }, { "epoch": 0.9488988824296491, "grad_norm": 2.257011890411377, "learning_rate": 2.690186016677358e-07, "loss": 0.8822, "step": 77860 }, { "epoch": 0.948959818653797, "grad_norm": 2.0133206844329834, "learning_rate": 2.6869788325849906e-07, "loss": 0.795, "step": 77865 }, { "epoch": 0.9490207548779448, "grad_norm": 2.1952097415924072, "learning_rate": 2.683771648492624e-07, "loss": 0.8258, "step": 77870 }, { "epoch": 0.9490816911020925, "grad_norm": 1.892232060432434, "learning_rate": 2.680564464400257e-07, "loss": 0.7863, "step": 77875 }, { "epoch": 0.9491426273262403, "grad_norm": 2.1565632820129395, "learning_rate": 2.67735728030789e-07, "loss": 0.8364, "step": 77880 }, { "epoch": 0.9492035635503882, "grad_norm": 2.1345300674438477, "learning_rate": 2.674150096215523e-07, "loss": 0.875, "step": 77885 }, { "epoch": 0.949264499774536, "grad_norm": 2.3093714714050293, "learning_rate": 2.6709429121231563e-07, "loss": 0.8121, "step": 77890 }, { "epoch": 0.9493254359986838, "grad_norm": 2.319162368774414, "learning_rate": 2.667735728030789e-07, "loss": 0.8166, "step": 77895 }, { "epoch": 0.9493863722228316, "grad_norm": 2.0887296199798584, "learning_rate": 2.6645285439384225e-07, "loss": 0.8239, "step": 77900 }, { "epoch": 0.9494473084469794, "grad_norm": 3.0916178226470947, "learning_rate": 2.6613213598460553e-07, "loss": 0.8413, "step": 77905 }, { "epoch": 0.9495082446711272, "grad_norm": 2.0523428916931152, "learning_rate": 2.6581141757536886e-07, "loss": 0.8384, "step": 77910 }, { "epoch": 0.949569180895275, "grad_norm": 2.170926332473755, "learning_rate": 2.6549069916613214e-07, "loss": 0.8027, "step": 77915 }, { "epoch": 0.9496301171194228, "grad_norm": 2.1300137042999268, "learning_rate": 2.651699807568955e-07, "loss": 0.8534, "step": 77920 }, { "epoch": 0.9496910533435706, "grad_norm": 1.8835482597351074, "learning_rate": 2.6484926234765876e-07, "loss": 0.8058, "step": 77925 }, { "epoch": 0.9497519895677184, "grad_norm": 2.083211660385132, "learning_rate": 2.645285439384221e-07, "loss": 0.7383, "step": 77930 }, { "epoch": 0.9498129257918663, "grad_norm": 1.801945447921753, "learning_rate": 2.642078255291854e-07, "loss": 0.6823, "step": 77935 }, { "epoch": 0.9498738620160141, "grad_norm": 1.9651600122451782, "learning_rate": 2.638871071199487e-07, "loss": 0.7978, "step": 77940 }, { "epoch": 0.9499347982401618, "grad_norm": 1.8948453664779663, "learning_rate": 2.63566388710712e-07, "loss": 0.9032, "step": 77945 }, { "epoch": 0.9499957344643096, "grad_norm": 2.020718574523926, "learning_rate": 2.632456703014753e-07, "loss": 0.7535, "step": 77950 }, { "epoch": 0.9500566706884574, "grad_norm": 2.3804616928100586, "learning_rate": 2.6292495189223866e-07, "loss": 0.7979, "step": 77955 }, { "epoch": 0.9501176069126053, "grad_norm": 2.0805535316467285, "learning_rate": 2.6260423348300194e-07, "loss": 0.8469, "step": 77960 }, { "epoch": 0.9501785431367531, "grad_norm": 1.9633582830429077, "learning_rate": 2.622835150737653e-07, "loss": 0.8845, "step": 77965 }, { "epoch": 0.9502394793609009, "grad_norm": 2.04066801071167, "learning_rate": 2.6196279666452856e-07, "loss": 0.7901, "step": 77970 }, { "epoch": 0.9503004155850487, "grad_norm": 2.1324639320373535, "learning_rate": 2.616420782552919e-07, "loss": 0.7805, "step": 77975 }, { "epoch": 0.9503613518091965, "grad_norm": 1.772756814956665, "learning_rate": 2.6132135984605517e-07, "loss": 0.8277, "step": 77980 }, { "epoch": 0.9504222880333443, "grad_norm": 2.294874906539917, "learning_rate": 2.610006414368185e-07, "loss": 0.8226, "step": 77985 }, { "epoch": 0.9504832242574921, "grad_norm": 2.104663610458374, "learning_rate": 2.606799230275818e-07, "loss": 0.8509, "step": 77990 }, { "epoch": 0.9505441604816399, "grad_norm": 1.7844403982162476, "learning_rate": 2.603592046183451e-07, "loss": 0.8203, "step": 77995 }, { "epoch": 0.9506050967057877, "grad_norm": 1.8274002075195312, "learning_rate": 2.600384862091084e-07, "loss": 0.7937, "step": 78000 }, { "epoch": 0.9506660329299356, "grad_norm": 2.209686040878296, "learning_rate": 2.5971776779987174e-07, "loss": 0.8812, "step": 78005 }, { "epoch": 0.9507269691540834, "grad_norm": 2.214789628982544, "learning_rate": 2.59397049390635e-07, "loss": 0.8264, "step": 78010 }, { "epoch": 0.9507879053782311, "grad_norm": 2.3863344192504883, "learning_rate": 2.5907633098139836e-07, "loss": 0.7723, "step": 78015 }, { "epoch": 0.9508488416023789, "grad_norm": 1.9457478523254395, "learning_rate": 2.5875561257216164e-07, "loss": 0.8272, "step": 78020 }, { "epoch": 0.9509097778265267, "grad_norm": 2.4144465923309326, "learning_rate": 2.5843489416292497e-07, "loss": 0.7837, "step": 78025 }, { "epoch": 0.9509707140506746, "grad_norm": 1.925438404083252, "learning_rate": 2.5811417575368825e-07, "loss": 0.8307, "step": 78030 }, { "epoch": 0.9510316502748224, "grad_norm": 1.7432447671890259, "learning_rate": 2.577934573444516e-07, "loss": 0.7511, "step": 78035 }, { "epoch": 0.9510925864989702, "grad_norm": 2.126317262649536, "learning_rate": 2.5747273893521487e-07, "loss": 0.7913, "step": 78040 }, { "epoch": 0.951153522723118, "grad_norm": 1.9786937236785889, "learning_rate": 2.571520205259782e-07, "loss": 0.8909, "step": 78045 }, { "epoch": 0.9512144589472658, "grad_norm": 1.9654840230941772, "learning_rate": 2.568313021167415e-07, "loss": 0.8314, "step": 78050 }, { "epoch": 0.9512753951714136, "grad_norm": 1.8448113203048706, "learning_rate": 2.565105837075048e-07, "loss": 0.7925, "step": 78055 }, { "epoch": 0.9513363313955614, "grad_norm": 2.227612018585205, "learning_rate": 2.5618986529826815e-07, "loss": 0.7875, "step": 78060 }, { "epoch": 0.9513972676197092, "grad_norm": 2.1821577548980713, "learning_rate": 2.5586914688903143e-07, "loss": 0.8789, "step": 78065 }, { "epoch": 0.951458203843857, "grad_norm": 1.909205436706543, "learning_rate": 2.5554842847979477e-07, "loss": 0.9247, "step": 78070 }, { "epoch": 0.9515191400680049, "grad_norm": 2.04038667678833, "learning_rate": 2.5522771007055805e-07, "loss": 0.8242, "step": 78075 }, { "epoch": 0.9515800762921527, "grad_norm": 1.7975099086761475, "learning_rate": 2.549069916613214e-07, "loss": 0.8272, "step": 78080 }, { "epoch": 0.9516410125163004, "grad_norm": 1.9153019189834595, "learning_rate": 2.5458627325208467e-07, "loss": 0.7616, "step": 78085 }, { "epoch": 0.9517019487404482, "grad_norm": 2.463547945022583, "learning_rate": 2.54265554842848e-07, "loss": 0.8749, "step": 78090 }, { "epoch": 0.951762884964596, "grad_norm": 1.8111375570297241, "learning_rate": 2.539448364336113e-07, "loss": 0.8034, "step": 78095 }, { "epoch": 0.9518238211887439, "grad_norm": 1.8820937871932983, "learning_rate": 2.536241180243746e-07, "loss": 0.754, "step": 78100 }, { "epoch": 0.9518847574128917, "grad_norm": 2.008340835571289, "learning_rate": 2.5330339961513795e-07, "loss": 0.8315, "step": 78105 }, { "epoch": 0.9519456936370395, "grad_norm": 1.8376911878585815, "learning_rate": 2.5298268120590123e-07, "loss": 0.7436, "step": 78110 }, { "epoch": 0.9520066298611873, "grad_norm": 1.7553361654281616, "learning_rate": 2.5266196279666457e-07, "loss": 0.7677, "step": 78115 }, { "epoch": 0.952067566085335, "grad_norm": 1.9540287256240845, "learning_rate": 2.5234124438742785e-07, "loss": 0.7574, "step": 78120 }, { "epoch": 0.9521285023094829, "grad_norm": 1.894182801246643, "learning_rate": 2.520205259781912e-07, "loss": 0.8248, "step": 78125 }, { "epoch": 0.9521894385336307, "grad_norm": 2.343963146209717, "learning_rate": 2.516998075689545e-07, "loss": 0.8077, "step": 78130 }, { "epoch": 0.9522503747577785, "grad_norm": 1.7818875312805176, "learning_rate": 2.513790891597178e-07, "loss": 0.7651, "step": 78135 }, { "epoch": 0.9523113109819263, "grad_norm": 1.8375967741012573, "learning_rate": 2.5105837075048113e-07, "loss": 0.7787, "step": 78140 }, { "epoch": 0.9523722472060742, "grad_norm": 2.1865031719207764, "learning_rate": 2.507376523412444e-07, "loss": 0.805, "step": 78145 }, { "epoch": 0.9524331834302219, "grad_norm": 2.3316259384155273, "learning_rate": 2.5041693393200775e-07, "loss": 0.8043, "step": 78150 }, { "epoch": 0.9524941196543697, "grad_norm": 1.915130615234375, "learning_rate": 2.5009621552277103e-07, "loss": 0.7398, "step": 78155 }, { "epoch": 0.9525550558785175, "grad_norm": 2.090679407119751, "learning_rate": 2.4977549711353437e-07, "loss": 0.7664, "step": 78160 }, { "epoch": 0.9526159921026653, "grad_norm": 2.165980577468872, "learning_rate": 2.4945477870429765e-07, "loss": 0.8093, "step": 78165 }, { "epoch": 0.9526769283268132, "grad_norm": 1.8582392930984497, "learning_rate": 2.49134060295061e-07, "loss": 0.8222, "step": 78170 }, { "epoch": 0.952737864550961, "grad_norm": 2.3345491886138916, "learning_rate": 2.4881334188582426e-07, "loss": 0.8148, "step": 78175 }, { "epoch": 0.9527988007751088, "grad_norm": 2.3829615116119385, "learning_rate": 2.484926234765876e-07, "loss": 0.8241, "step": 78180 }, { "epoch": 0.9528597369992565, "grad_norm": 2.368072271347046, "learning_rate": 2.481719050673509e-07, "loss": 0.7965, "step": 78185 }, { "epoch": 0.9529206732234043, "grad_norm": 1.9748468399047852, "learning_rate": 2.478511866581142e-07, "loss": 0.7932, "step": 78190 }, { "epoch": 0.9529816094475522, "grad_norm": 1.7046440839767456, "learning_rate": 2.475304682488775e-07, "loss": 0.7684, "step": 78195 }, { "epoch": 0.9530425456717, "grad_norm": 2.0085554122924805, "learning_rate": 2.4720974983964083e-07, "loss": 0.757, "step": 78200 }, { "epoch": 0.9531034818958478, "grad_norm": 2.057377338409424, "learning_rate": 2.468890314304041e-07, "loss": 0.8084, "step": 78205 }, { "epoch": 0.9531644181199956, "grad_norm": 1.8848448991775513, "learning_rate": 2.4656831302116744e-07, "loss": 0.7937, "step": 78210 }, { "epoch": 0.9532253543441435, "grad_norm": 1.7006200551986694, "learning_rate": 2.462475946119307e-07, "loss": 0.7758, "step": 78215 }, { "epoch": 0.9532862905682912, "grad_norm": 1.8164091110229492, "learning_rate": 2.4592687620269406e-07, "loss": 0.7951, "step": 78220 }, { "epoch": 0.953347226792439, "grad_norm": 2.2434797286987305, "learning_rate": 2.4560615779345734e-07, "loss": 0.858, "step": 78225 }, { "epoch": 0.9534081630165868, "grad_norm": 1.8684791326522827, "learning_rate": 2.452854393842207e-07, "loss": 0.8072, "step": 78230 }, { "epoch": 0.9534690992407346, "grad_norm": 2.284848213195801, "learning_rate": 2.44964720974984e-07, "loss": 0.8074, "step": 78235 }, { "epoch": 0.9535300354648825, "grad_norm": 1.6643950939178467, "learning_rate": 2.446440025657473e-07, "loss": 0.7943, "step": 78240 }, { "epoch": 0.9535909716890303, "grad_norm": 1.983405590057373, "learning_rate": 2.4432328415651063e-07, "loss": 0.7997, "step": 78245 }, { "epoch": 0.9536519079131781, "grad_norm": 1.9102277755737305, "learning_rate": 2.440025657472739e-07, "loss": 0.8537, "step": 78250 }, { "epoch": 0.9537128441373258, "grad_norm": 2.221158742904663, "learning_rate": 2.4368184733803724e-07, "loss": 0.836, "step": 78255 }, { "epoch": 0.9537737803614736, "grad_norm": 1.9843688011169434, "learning_rate": 2.433611289288005e-07, "loss": 0.7169, "step": 78260 }, { "epoch": 0.9538347165856215, "grad_norm": 1.9241153001785278, "learning_rate": 2.4304041051956386e-07, "loss": 0.8363, "step": 78265 }, { "epoch": 0.9538956528097693, "grad_norm": 1.8279588222503662, "learning_rate": 2.4271969211032714e-07, "loss": 0.7404, "step": 78270 }, { "epoch": 0.9539565890339171, "grad_norm": 1.8073991537094116, "learning_rate": 2.423989737010905e-07, "loss": 0.7499, "step": 78275 }, { "epoch": 0.9540175252580649, "grad_norm": 1.7960318326950073, "learning_rate": 2.4207825529185376e-07, "loss": 0.883, "step": 78280 }, { "epoch": 0.9540784614822128, "grad_norm": 2.119839906692505, "learning_rate": 2.417575368826171e-07, "loss": 0.7322, "step": 78285 }, { "epoch": 0.9541393977063605, "grad_norm": 1.910395860671997, "learning_rate": 2.4143681847338037e-07, "loss": 0.8204, "step": 78290 }, { "epoch": 0.9542003339305083, "grad_norm": 2.1059041023254395, "learning_rate": 2.411161000641437e-07, "loss": 0.8058, "step": 78295 }, { "epoch": 0.9542612701546561, "grad_norm": 1.6503499746322632, "learning_rate": 2.40795381654907e-07, "loss": 0.8192, "step": 78300 }, { "epoch": 0.9543222063788039, "grad_norm": 2.213599443435669, "learning_rate": 2.404746632456703e-07, "loss": 0.7324, "step": 78305 }, { "epoch": 0.9543831426029518, "grad_norm": 2.05889892578125, "learning_rate": 2.401539448364336e-07, "loss": 0.7726, "step": 78310 }, { "epoch": 0.9544440788270996, "grad_norm": 2.050785779953003, "learning_rate": 2.3983322642719694e-07, "loss": 0.8685, "step": 78315 }, { "epoch": 0.9545050150512474, "grad_norm": 1.9723563194274902, "learning_rate": 2.395125080179602e-07, "loss": 0.7995, "step": 78320 }, { "epoch": 0.9545659512753951, "grad_norm": 2.3320491313934326, "learning_rate": 2.3919178960872355e-07, "loss": 0.8407, "step": 78325 }, { "epoch": 0.9546268874995429, "grad_norm": 1.912293553352356, "learning_rate": 2.3887107119948684e-07, "loss": 0.7599, "step": 78330 }, { "epoch": 0.9546878237236908, "grad_norm": 1.852513313293457, "learning_rate": 2.3855035279025017e-07, "loss": 0.7606, "step": 78335 }, { "epoch": 0.9547487599478386, "grad_norm": 1.694295883178711, "learning_rate": 2.3822963438101348e-07, "loss": 0.7765, "step": 78340 }, { "epoch": 0.9548096961719864, "grad_norm": 1.8554887771606445, "learning_rate": 2.3790891597177679e-07, "loss": 0.8229, "step": 78345 }, { "epoch": 0.9548706323961342, "grad_norm": 2.1536295413970947, "learning_rate": 2.375881975625401e-07, "loss": 0.7687, "step": 78350 }, { "epoch": 0.9549315686202821, "grad_norm": 1.8543201684951782, "learning_rate": 2.372674791533034e-07, "loss": 0.7514, "step": 78355 }, { "epoch": 0.9549925048444298, "grad_norm": 2.047067880630493, "learning_rate": 2.369467607440667e-07, "loss": 0.8749, "step": 78360 }, { "epoch": 0.9550534410685776, "grad_norm": 2.614295721054077, "learning_rate": 2.3662604233483002e-07, "loss": 0.8621, "step": 78365 }, { "epoch": 0.9551143772927254, "grad_norm": 2.1857357025146484, "learning_rate": 2.3630532392559333e-07, "loss": 0.8623, "step": 78370 }, { "epoch": 0.9551753135168732, "grad_norm": 2.049049139022827, "learning_rate": 2.3598460551635663e-07, "loss": 0.7642, "step": 78375 }, { "epoch": 0.9552362497410211, "grad_norm": 1.99518620967865, "learning_rate": 2.3566388710711994e-07, "loss": 0.8369, "step": 78380 }, { "epoch": 0.9552971859651689, "grad_norm": 1.803478717803955, "learning_rate": 2.3534316869788325e-07, "loss": 0.7913, "step": 78385 }, { "epoch": 0.9553581221893167, "grad_norm": 1.9116971492767334, "learning_rate": 2.3502245028864658e-07, "loss": 0.7832, "step": 78390 }, { "epoch": 0.9554190584134644, "grad_norm": 2.188075065612793, "learning_rate": 2.347017318794099e-07, "loss": 0.8122, "step": 78395 }, { "epoch": 0.9554799946376122, "grad_norm": 1.9309190511703491, "learning_rate": 2.3438101347017323e-07, "loss": 0.709, "step": 78400 }, { "epoch": 0.9555409308617601, "grad_norm": 2.2021501064300537, "learning_rate": 2.3406029506093653e-07, "loss": 0.7747, "step": 78405 }, { "epoch": 0.9556018670859079, "grad_norm": 2.4534714221954346, "learning_rate": 2.3373957665169984e-07, "loss": 0.7942, "step": 78410 }, { "epoch": 0.9556628033100557, "grad_norm": 1.9643487930297852, "learning_rate": 2.3341885824246315e-07, "loss": 0.7877, "step": 78415 }, { "epoch": 0.9557237395342035, "grad_norm": 1.9939346313476562, "learning_rate": 2.3309813983322646e-07, "loss": 0.8238, "step": 78420 }, { "epoch": 0.9557846757583514, "grad_norm": 2.1480138301849365, "learning_rate": 2.3277742142398977e-07, "loss": 0.8687, "step": 78425 }, { "epoch": 0.9558456119824991, "grad_norm": 1.9939476251602173, "learning_rate": 2.3245670301475307e-07, "loss": 0.8766, "step": 78430 }, { "epoch": 0.9559065482066469, "grad_norm": 2.3916399478912354, "learning_rate": 2.3213598460551638e-07, "loss": 0.8274, "step": 78435 }, { "epoch": 0.9559674844307947, "grad_norm": 1.6606860160827637, "learning_rate": 2.318152661962797e-07, "loss": 0.7601, "step": 78440 }, { "epoch": 0.9560284206549425, "grad_norm": 2.06380295753479, "learning_rate": 2.31494547787043e-07, "loss": 0.762, "step": 78445 }, { "epoch": 0.9560893568790904, "grad_norm": 3.620009422302246, "learning_rate": 2.311738293778063e-07, "loss": 0.7753, "step": 78450 }, { "epoch": 0.9561502931032382, "grad_norm": 2.2107114791870117, "learning_rate": 2.308531109685696e-07, "loss": 0.75, "step": 78455 }, { "epoch": 0.956211229327386, "grad_norm": 1.8307431936264038, "learning_rate": 2.3053239255933295e-07, "loss": 0.7883, "step": 78460 }, { "epoch": 0.9562721655515337, "grad_norm": 1.9400845766067505, "learning_rate": 2.3021167415009626e-07, "loss": 0.7985, "step": 78465 }, { "epoch": 0.9563331017756815, "grad_norm": 1.9903013706207275, "learning_rate": 2.2989095574085956e-07, "loss": 0.7851, "step": 78470 }, { "epoch": 0.9563940379998294, "grad_norm": 2.3132734298706055, "learning_rate": 2.2957023733162287e-07, "loss": 0.8324, "step": 78475 }, { "epoch": 0.9564549742239772, "grad_norm": 2.0807557106018066, "learning_rate": 2.2924951892238618e-07, "loss": 0.8223, "step": 78480 }, { "epoch": 0.956515910448125, "grad_norm": 1.965561866760254, "learning_rate": 2.289288005131495e-07, "loss": 0.8088, "step": 78485 }, { "epoch": 0.9565768466722728, "grad_norm": 2.1034629344940186, "learning_rate": 2.286080821039128e-07, "loss": 0.7539, "step": 78490 }, { "epoch": 0.9566377828964207, "grad_norm": 1.7343792915344238, "learning_rate": 2.282873636946761e-07, "loss": 0.8038, "step": 78495 }, { "epoch": 0.9566987191205684, "grad_norm": 1.8733887672424316, "learning_rate": 2.279666452854394e-07, "loss": 0.7942, "step": 78500 }, { "epoch": 0.9567596553447162, "grad_norm": 1.8947312831878662, "learning_rate": 2.2764592687620272e-07, "loss": 0.7683, "step": 78505 }, { "epoch": 0.956820591568864, "grad_norm": 2.059677839279175, "learning_rate": 2.2732520846696603e-07, "loss": 0.7521, "step": 78510 }, { "epoch": 0.9568815277930118, "grad_norm": 2.059528350830078, "learning_rate": 2.2700449005772934e-07, "loss": 0.7932, "step": 78515 }, { "epoch": 0.9569424640171597, "grad_norm": 2.389382839202881, "learning_rate": 2.2668377164849264e-07, "loss": 0.7863, "step": 78520 }, { "epoch": 0.9570034002413075, "grad_norm": 1.7083820104599, "learning_rate": 2.2636305323925595e-07, "loss": 0.8136, "step": 78525 }, { "epoch": 0.9570643364654553, "grad_norm": 1.592227578163147, "learning_rate": 2.2604233483001926e-07, "loss": 0.7812, "step": 78530 }, { "epoch": 0.957125272689603, "grad_norm": 1.9927856922149658, "learning_rate": 2.2572161642078257e-07, "loss": 0.8282, "step": 78535 }, { "epoch": 0.9571862089137508, "grad_norm": 2.8380820751190186, "learning_rate": 2.2540089801154587e-07, "loss": 0.8601, "step": 78540 }, { "epoch": 0.9572471451378987, "grad_norm": 2.233090877532959, "learning_rate": 2.2508017960230918e-07, "loss": 0.791, "step": 78545 }, { "epoch": 0.9573080813620465, "grad_norm": 1.9399465322494507, "learning_rate": 2.247594611930725e-07, "loss": 0.7998, "step": 78550 }, { "epoch": 0.9573690175861943, "grad_norm": 2.067359447479248, "learning_rate": 2.244387427838358e-07, "loss": 0.8436, "step": 78555 }, { "epoch": 0.9574299538103421, "grad_norm": 1.8663749694824219, "learning_rate": 2.241180243745991e-07, "loss": 0.8104, "step": 78560 }, { "epoch": 0.95749089003449, "grad_norm": 1.8675189018249512, "learning_rate": 2.2379730596536244e-07, "loss": 0.8335, "step": 78565 }, { "epoch": 0.9575518262586377, "grad_norm": 2.0868194103240967, "learning_rate": 2.2347658755612575e-07, "loss": 0.7622, "step": 78570 }, { "epoch": 0.9576127624827855, "grad_norm": 1.6215134859085083, "learning_rate": 2.2315586914688906e-07, "loss": 0.7205, "step": 78575 }, { "epoch": 0.9576736987069333, "grad_norm": 1.706558346748352, "learning_rate": 2.2283515073765236e-07, "loss": 0.7607, "step": 78580 }, { "epoch": 0.9577346349310811, "grad_norm": 1.9837491512298584, "learning_rate": 2.2251443232841567e-07, "loss": 0.7619, "step": 78585 }, { "epoch": 0.957795571155229, "grad_norm": 1.9093530178070068, "learning_rate": 2.2219371391917898e-07, "loss": 0.7996, "step": 78590 }, { "epoch": 0.9578565073793768, "grad_norm": 2.08150577545166, "learning_rate": 2.218729955099423e-07, "loss": 0.7892, "step": 78595 }, { "epoch": 0.9579174436035246, "grad_norm": 1.6487736701965332, "learning_rate": 2.215522771007056e-07, "loss": 0.7883, "step": 78600 }, { "epoch": 0.9579783798276723, "grad_norm": 1.881141185760498, "learning_rate": 2.212315586914689e-07, "loss": 0.8201, "step": 78605 }, { "epoch": 0.9580393160518201, "grad_norm": 2.1432175636291504, "learning_rate": 2.209108402822322e-07, "loss": 0.8061, "step": 78610 }, { "epoch": 0.958100252275968, "grad_norm": 2.073291778564453, "learning_rate": 2.2059012187299552e-07, "loss": 0.8164, "step": 78615 }, { "epoch": 0.9581611885001158, "grad_norm": 1.9923444986343384, "learning_rate": 2.2026940346375883e-07, "loss": 0.8399, "step": 78620 }, { "epoch": 0.9582221247242636, "grad_norm": 1.6825520992279053, "learning_rate": 2.1994868505452214e-07, "loss": 0.8013, "step": 78625 }, { "epoch": 0.9582830609484114, "grad_norm": 2.22055721282959, "learning_rate": 2.1962796664528544e-07, "loss": 0.8892, "step": 78630 }, { "epoch": 0.9583439971725592, "grad_norm": 2.2321889400482178, "learning_rate": 2.1930724823604875e-07, "loss": 0.7771, "step": 78635 }, { "epoch": 0.958404933396707, "grad_norm": 1.988543152809143, "learning_rate": 2.1898652982681206e-07, "loss": 0.8558, "step": 78640 }, { "epoch": 0.9584658696208548, "grad_norm": 1.8631041049957275, "learning_rate": 2.1866581141757537e-07, "loss": 0.8641, "step": 78645 }, { "epoch": 0.9585268058450026, "grad_norm": 2.058156728744507, "learning_rate": 2.1834509300833868e-07, "loss": 0.7794, "step": 78650 }, { "epoch": 0.9585877420691504, "grad_norm": 2.1344330310821533, "learning_rate": 2.1802437459910198e-07, "loss": 0.8313, "step": 78655 }, { "epoch": 0.9586486782932983, "grad_norm": 2.0108370780944824, "learning_rate": 2.177036561898653e-07, "loss": 0.7871, "step": 78660 }, { "epoch": 0.9587096145174461, "grad_norm": 1.9335657358169556, "learning_rate": 2.173829377806286e-07, "loss": 0.7805, "step": 78665 }, { "epoch": 0.9587705507415939, "grad_norm": 1.8468708992004395, "learning_rate": 2.1706221937139193e-07, "loss": 0.8724, "step": 78670 }, { "epoch": 0.9588314869657416, "grad_norm": 2.5812435150146484, "learning_rate": 2.1674150096215524e-07, "loss": 0.8069, "step": 78675 }, { "epoch": 0.9588924231898894, "grad_norm": 2.0907692909240723, "learning_rate": 2.1642078255291855e-07, "loss": 0.7936, "step": 78680 }, { "epoch": 0.9589533594140373, "grad_norm": 2.069305896759033, "learning_rate": 2.1610006414368186e-07, "loss": 0.7907, "step": 78685 }, { "epoch": 0.9590142956381851, "grad_norm": 1.8195717334747314, "learning_rate": 2.1577934573444517e-07, "loss": 0.7845, "step": 78690 }, { "epoch": 0.9590752318623329, "grad_norm": 2.1315715312957764, "learning_rate": 2.1545862732520847e-07, "loss": 0.7519, "step": 78695 }, { "epoch": 0.9591361680864807, "grad_norm": 2.228224515914917, "learning_rate": 2.1513790891597178e-07, "loss": 0.7581, "step": 78700 }, { "epoch": 0.9591971043106285, "grad_norm": 1.8636884689331055, "learning_rate": 2.1481719050673512e-07, "loss": 0.8327, "step": 78705 }, { "epoch": 0.9592580405347763, "grad_norm": 2.1157519817352295, "learning_rate": 2.1449647209749842e-07, "loss": 0.7934, "step": 78710 }, { "epoch": 0.9593189767589241, "grad_norm": 1.768875002861023, "learning_rate": 2.1417575368826173e-07, "loss": 0.8337, "step": 78715 }, { "epoch": 0.9593799129830719, "grad_norm": 2.2138609886169434, "learning_rate": 2.1385503527902504e-07, "loss": 0.8145, "step": 78720 }, { "epoch": 0.9594408492072197, "grad_norm": 2.1993815898895264, "learning_rate": 2.1353431686978835e-07, "loss": 0.8178, "step": 78725 }, { "epoch": 0.9595017854313675, "grad_norm": 2.024907112121582, "learning_rate": 2.1321359846055166e-07, "loss": 0.8461, "step": 78730 }, { "epoch": 0.9595627216555154, "grad_norm": 1.8889973163604736, "learning_rate": 2.1289288005131496e-07, "loss": 0.7541, "step": 78735 }, { "epoch": 0.9596236578796632, "grad_norm": 2.310152053833008, "learning_rate": 2.125721616420783e-07, "loss": 0.754, "step": 78740 }, { "epoch": 0.9596845941038109, "grad_norm": 2.2084970474243164, "learning_rate": 2.122514432328416e-07, "loss": 0.7984, "step": 78745 }, { "epoch": 0.9597455303279587, "grad_norm": 1.9549424648284912, "learning_rate": 2.1193072482360491e-07, "loss": 0.8571, "step": 78750 }, { "epoch": 0.9598064665521066, "grad_norm": 1.8805322647094727, "learning_rate": 2.1161000641436822e-07, "loss": 0.7978, "step": 78755 }, { "epoch": 0.9598674027762544, "grad_norm": 1.7623611688613892, "learning_rate": 2.1128928800513153e-07, "loss": 0.7852, "step": 78760 }, { "epoch": 0.9599283390004022, "grad_norm": 1.8693026304244995, "learning_rate": 2.1096856959589484e-07, "loss": 0.7787, "step": 78765 }, { "epoch": 0.95998927522455, "grad_norm": 1.8432343006134033, "learning_rate": 2.1064785118665815e-07, "loss": 0.8349, "step": 78770 }, { "epoch": 0.9600502114486978, "grad_norm": 2.3990561962127686, "learning_rate": 2.1032713277742145e-07, "loss": 0.7864, "step": 78775 }, { "epoch": 0.9601111476728456, "grad_norm": 1.7761454582214355, "learning_rate": 2.1000641436818476e-07, "loss": 0.8721, "step": 78780 }, { "epoch": 0.9601720838969934, "grad_norm": 1.852739930152893, "learning_rate": 2.0968569595894807e-07, "loss": 0.843, "step": 78785 }, { "epoch": 0.9602330201211412, "grad_norm": 2.131256580352783, "learning_rate": 2.0936497754971138e-07, "loss": 0.8288, "step": 78790 }, { "epoch": 0.960293956345289, "grad_norm": 1.9349969625473022, "learning_rate": 2.0904425914047469e-07, "loss": 0.8531, "step": 78795 }, { "epoch": 0.9603548925694368, "grad_norm": 1.7671109437942505, "learning_rate": 2.08723540731238e-07, "loss": 0.7958, "step": 78800 }, { "epoch": 0.9604158287935847, "grad_norm": 1.9509516954421997, "learning_rate": 2.084028223220013e-07, "loss": 0.8185, "step": 78805 }, { "epoch": 0.9604767650177325, "grad_norm": 1.8548485040664673, "learning_rate": 2.080821039127646e-07, "loss": 0.809, "step": 78810 }, { "epoch": 0.9605377012418802, "grad_norm": 1.8339998722076416, "learning_rate": 2.0776138550352792e-07, "loss": 0.7261, "step": 78815 }, { "epoch": 0.960598637466028, "grad_norm": 1.757534384727478, "learning_rate": 2.0744066709429123e-07, "loss": 0.7916, "step": 78820 }, { "epoch": 0.9606595736901758, "grad_norm": 1.711362361907959, "learning_rate": 2.0711994868505453e-07, "loss": 0.73, "step": 78825 }, { "epoch": 0.9607205099143237, "grad_norm": 1.9607534408569336, "learning_rate": 2.0679923027581784e-07, "loss": 0.8217, "step": 78830 }, { "epoch": 0.9607814461384715, "grad_norm": 2.2047483921051025, "learning_rate": 2.0647851186658115e-07, "loss": 0.8277, "step": 78835 }, { "epoch": 0.9608423823626193, "grad_norm": 1.922949194908142, "learning_rate": 2.0615779345734446e-07, "loss": 0.7762, "step": 78840 }, { "epoch": 0.9609033185867671, "grad_norm": 1.8512803316116333, "learning_rate": 2.058370750481078e-07, "loss": 0.7742, "step": 78845 }, { "epoch": 0.9609642548109149, "grad_norm": 1.9755043983459473, "learning_rate": 2.055163566388711e-07, "loss": 0.8205, "step": 78850 }, { "epoch": 0.9610251910350627, "grad_norm": 1.677793025970459, "learning_rate": 2.051956382296344e-07, "loss": 0.8238, "step": 78855 }, { "epoch": 0.9610861272592105, "grad_norm": 2.006849527359009, "learning_rate": 2.0487491982039772e-07, "loss": 0.7223, "step": 78860 }, { "epoch": 0.9611470634833583, "grad_norm": 2.175290822982788, "learning_rate": 2.0455420141116102e-07, "loss": 0.8646, "step": 78865 }, { "epoch": 0.9612079997075061, "grad_norm": 1.7496905326843262, "learning_rate": 2.0423348300192433e-07, "loss": 0.8332, "step": 78870 }, { "epoch": 0.961268935931654, "grad_norm": 1.763508915901184, "learning_rate": 2.0391276459268764e-07, "loss": 0.7626, "step": 78875 }, { "epoch": 0.9613298721558018, "grad_norm": 2.1353838443756104, "learning_rate": 2.0359204618345095e-07, "loss": 0.837, "step": 78880 }, { "epoch": 0.9613908083799495, "grad_norm": 1.9826303720474243, "learning_rate": 2.0327132777421426e-07, "loss": 0.7938, "step": 78885 }, { "epoch": 0.9614517446040973, "grad_norm": 2.0167040824890137, "learning_rate": 2.0295060936497756e-07, "loss": 0.8597, "step": 78890 }, { "epoch": 0.9615126808282451, "grad_norm": 1.7848504781723022, "learning_rate": 2.0262989095574087e-07, "loss": 0.8445, "step": 78895 }, { "epoch": 0.961573617052393, "grad_norm": 1.8035428524017334, "learning_rate": 2.0230917254650418e-07, "loss": 0.7654, "step": 78900 }, { "epoch": 0.9616345532765408, "grad_norm": 1.8962491750717163, "learning_rate": 2.019884541372675e-07, "loss": 0.773, "step": 78905 }, { "epoch": 0.9616954895006886, "grad_norm": 2.27591609954834, "learning_rate": 2.016677357280308e-07, "loss": 0.7247, "step": 78910 }, { "epoch": 0.9617564257248364, "grad_norm": 2.0240821838378906, "learning_rate": 2.013470173187941e-07, "loss": 0.7683, "step": 78915 }, { "epoch": 0.9618173619489842, "grad_norm": 1.9808448553085327, "learning_rate": 2.010262989095574e-07, "loss": 0.8716, "step": 78920 }, { "epoch": 0.961878298173132, "grad_norm": 2.237403631210327, "learning_rate": 2.0070558050032072e-07, "loss": 0.8325, "step": 78925 }, { "epoch": 0.9619392343972798, "grad_norm": 1.9096280336380005, "learning_rate": 2.0038486209108403e-07, "loss": 0.7201, "step": 78930 }, { "epoch": 0.9620001706214276, "grad_norm": 1.870935082435608, "learning_rate": 2.0006414368184733e-07, "loss": 0.7765, "step": 78935 }, { "epoch": 0.9620611068455754, "grad_norm": 1.8332281112670898, "learning_rate": 1.9974342527261064e-07, "loss": 0.7934, "step": 78940 }, { "epoch": 0.9621220430697233, "grad_norm": 1.9566594362258911, "learning_rate": 1.9942270686337395e-07, "loss": 0.8074, "step": 78945 }, { "epoch": 0.9621829792938711, "grad_norm": 2.0738437175750732, "learning_rate": 1.9910198845413728e-07, "loss": 0.8182, "step": 78950 }, { "epoch": 0.9622439155180188, "grad_norm": 1.8653217554092407, "learning_rate": 1.987812700449006e-07, "loss": 0.8734, "step": 78955 }, { "epoch": 0.9623048517421666, "grad_norm": 1.7605605125427246, "learning_rate": 1.984605516356639e-07, "loss": 0.7524, "step": 78960 }, { "epoch": 0.9623657879663144, "grad_norm": 1.9051785469055176, "learning_rate": 1.981398332264272e-07, "loss": 0.7918, "step": 78965 }, { "epoch": 0.9624267241904623, "grad_norm": 1.6953144073486328, "learning_rate": 1.9781911481719052e-07, "loss": 0.7825, "step": 78970 }, { "epoch": 0.9624876604146101, "grad_norm": 2.326850175857544, "learning_rate": 1.9749839640795382e-07, "loss": 0.7659, "step": 78975 }, { "epoch": 0.9625485966387579, "grad_norm": 1.7245441675186157, "learning_rate": 1.9717767799871713e-07, "loss": 0.7938, "step": 78980 }, { "epoch": 0.9626095328629057, "grad_norm": 2.035637855529785, "learning_rate": 1.9685695958948044e-07, "loss": 0.707, "step": 78985 }, { "epoch": 0.9626704690870534, "grad_norm": 2.360302448272705, "learning_rate": 1.9653624118024375e-07, "loss": 0.8824, "step": 78990 }, { "epoch": 0.9627314053112013, "grad_norm": 1.7421280145645142, "learning_rate": 1.9621552277100706e-07, "loss": 0.7544, "step": 78995 }, { "epoch": 0.9627923415353491, "grad_norm": 2.0826425552368164, "learning_rate": 1.9589480436177036e-07, "loss": 0.8114, "step": 79000 }, { "epoch": 0.9628532777594969, "grad_norm": 1.9111255407333374, "learning_rate": 1.955740859525337e-07, "loss": 0.7744, "step": 79005 }, { "epoch": 0.9629142139836447, "grad_norm": 2.3348612785339355, "learning_rate": 1.95253367543297e-07, "loss": 0.7575, "step": 79010 }, { "epoch": 0.9629751502077926, "grad_norm": 1.779975175857544, "learning_rate": 1.9493264913406031e-07, "loss": 0.8136, "step": 79015 }, { "epoch": 0.9630360864319404, "grad_norm": 1.6350505352020264, "learning_rate": 1.9461193072482365e-07, "loss": 0.8171, "step": 79020 }, { "epoch": 0.9630970226560881, "grad_norm": 2.5905048847198486, "learning_rate": 1.9429121231558696e-07, "loss": 0.8128, "step": 79025 }, { "epoch": 0.9631579588802359, "grad_norm": 2.0552854537963867, "learning_rate": 1.9397049390635026e-07, "loss": 0.7498, "step": 79030 }, { "epoch": 0.9632188951043837, "grad_norm": 1.6754480600357056, "learning_rate": 1.9364977549711357e-07, "loss": 0.8183, "step": 79035 }, { "epoch": 0.9632798313285316, "grad_norm": 2.122731924057007, "learning_rate": 1.9332905708787688e-07, "loss": 0.7879, "step": 79040 }, { "epoch": 0.9633407675526794, "grad_norm": 2.1421194076538086, "learning_rate": 1.930083386786402e-07, "loss": 0.7604, "step": 79045 }, { "epoch": 0.9634017037768272, "grad_norm": 1.8356868028640747, "learning_rate": 1.926876202694035e-07, "loss": 0.8404, "step": 79050 }, { "epoch": 0.963462640000975, "grad_norm": 1.8975017070770264, "learning_rate": 1.923669018601668e-07, "loss": 0.7952, "step": 79055 }, { "epoch": 0.9635235762251227, "grad_norm": 1.6811487674713135, "learning_rate": 1.920461834509301e-07, "loss": 0.8322, "step": 79060 }, { "epoch": 0.9635845124492706, "grad_norm": 1.9235055446624756, "learning_rate": 1.9172546504169342e-07, "loss": 0.8006, "step": 79065 }, { "epoch": 0.9636454486734184, "grad_norm": 2.296910524368286, "learning_rate": 1.9140474663245673e-07, "loss": 0.7825, "step": 79070 }, { "epoch": 0.9637063848975662, "grad_norm": 2.008817195892334, "learning_rate": 1.9108402822322004e-07, "loss": 0.761, "step": 79075 }, { "epoch": 0.963767321121714, "grad_norm": 2.00608491897583, "learning_rate": 1.9076330981398334e-07, "loss": 0.8362, "step": 79080 }, { "epoch": 0.9638282573458619, "grad_norm": 1.827794075012207, "learning_rate": 1.9044259140474665e-07, "loss": 0.7821, "step": 79085 }, { "epoch": 0.9638891935700096, "grad_norm": 2.116096019744873, "learning_rate": 1.9012187299550996e-07, "loss": 0.7886, "step": 79090 }, { "epoch": 0.9639501297941574, "grad_norm": 1.8650612831115723, "learning_rate": 1.8980115458627327e-07, "loss": 0.8036, "step": 79095 }, { "epoch": 0.9640110660183052, "grad_norm": 1.8880655765533447, "learning_rate": 1.8948043617703658e-07, "loss": 0.7594, "step": 79100 }, { "epoch": 0.964072002242453, "grad_norm": 2.0224523544311523, "learning_rate": 1.8915971776779988e-07, "loss": 0.8184, "step": 79105 }, { "epoch": 0.9641329384666009, "grad_norm": 2.07985782623291, "learning_rate": 1.888389993585632e-07, "loss": 0.6896, "step": 79110 }, { "epoch": 0.9641938746907487, "grad_norm": 1.8643296957015991, "learning_rate": 1.885182809493265e-07, "loss": 0.7762, "step": 79115 }, { "epoch": 0.9642548109148965, "grad_norm": 1.8260796070098877, "learning_rate": 1.881975625400898e-07, "loss": 0.8048, "step": 79120 }, { "epoch": 0.9643157471390442, "grad_norm": 2.1162068843841553, "learning_rate": 1.8787684413085314e-07, "loss": 0.7421, "step": 79125 }, { "epoch": 0.964376683363192, "grad_norm": 2.140449047088623, "learning_rate": 1.8755612572161645e-07, "loss": 0.819, "step": 79130 }, { "epoch": 0.9644376195873399, "grad_norm": 2.1339757442474365, "learning_rate": 1.8723540731237976e-07, "loss": 0.8519, "step": 79135 }, { "epoch": 0.9644985558114877, "grad_norm": 1.857966661453247, "learning_rate": 1.8691468890314307e-07, "loss": 0.8188, "step": 79140 }, { "epoch": 0.9645594920356355, "grad_norm": 1.82526433467865, "learning_rate": 1.8659397049390637e-07, "loss": 0.7017, "step": 79145 }, { "epoch": 0.9646204282597833, "grad_norm": 2.315072774887085, "learning_rate": 1.8627325208466968e-07, "loss": 0.8187, "step": 79150 }, { "epoch": 0.9646813644839312, "grad_norm": 2.0546767711639404, "learning_rate": 1.85952533675433e-07, "loss": 0.8172, "step": 79155 }, { "epoch": 0.9647423007080789, "grad_norm": 1.8993980884552002, "learning_rate": 1.856318152661963e-07, "loss": 0.7723, "step": 79160 }, { "epoch": 0.9648032369322267, "grad_norm": 1.9293092489242554, "learning_rate": 1.853110968569596e-07, "loss": 0.7929, "step": 79165 }, { "epoch": 0.9648641731563745, "grad_norm": 1.7615270614624023, "learning_rate": 1.8499037844772291e-07, "loss": 0.8359, "step": 79170 }, { "epoch": 0.9649251093805223, "grad_norm": 1.8618297576904297, "learning_rate": 1.8466966003848622e-07, "loss": 0.8085, "step": 79175 }, { "epoch": 0.9649860456046702, "grad_norm": 1.716241478919983, "learning_rate": 1.8434894162924953e-07, "loss": 0.8468, "step": 79180 }, { "epoch": 0.965046981828818, "grad_norm": 2.0493221282958984, "learning_rate": 1.8402822322001284e-07, "loss": 0.7881, "step": 79185 }, { "epoch": 0.9651079180529658, "grad_norm": 2.103038787841797, "learning_rate": 1.8370750481077615e-07, "loss": 0.8329, "step": 79190 }, { "epoch": 0.9651688542771135, "grad_norm": 1.9517619609832764, "learning_rate": 1.8338678640153945e-07, "loss": 0.8234, "step": 79195 }, { "epoch": 0.9652297905012613, "grad_norm": 2.099081039428711, "learning_rate": 1.8306606799230276e-07, "loss": 0.8622, "step": 79200 }, { "epoch": 0.9652907267254092, "grad_norm": 1.962982177734375, "learning_rate": 1.8274534958306607e-07, "loss": 0.7435, "step": 79205 }, { "epoch": 0.965351662949557, "grad_norm": 2.254669189453125, "learning_rate": 1.8242463117382938e-07, "loss": 0.8737, "step": 79210 }, { "epoch": 0.9654125991737048, "grad_norm": 2.1329195499420166, "learning_rate": 1.8210391276459269e-07, "loss": 0.8433, "step": 79215 }, { "epoch": 0.9654735353978526, "grad_norm": 2.0020155906677246, "learning_rate": 1.81783194355356e-07, "loss": 0.8032, "step": 79220 }, { "epoch": 0.9655344716220005, "grad_norm": 1.726037621498108, "learning_rate": 1.814624759461193e-07, "loss": 0.7595, "step": 79225 }, { "epoch": 0.9655954078461482, "grad_norm": 1.7436541318893433, "learning_rate": 1.8114175753688264e-07, "loss": 0.8651, "step": 79230 }, { "epoch": 0.965656344070296, "grad_norm": 2.084872245788574, "learning_rate": 1.8082103912764594e-07, "loss": 0.7909, "step": 79235 }, { "epoch": 0.9657172802944438, "grad_norm": 1.92371666431427, "learning_rate": 1.8050032071840925e-07, "loss": 0.8147, "step": 79240 }, { "epoch": 0.9657782165185916, "grad_norm": 1.7815537452697754, "learning_rate": 1.8017960230917256e-07, "loss": 0.8011, "step": 79245 }, { "epoch": 0.9658391527427395, "grad_norm": 2.0658223628997803, "learning_rate": 1.7985888389993587e-07, "loss": 0.7803, "step": 79250 }, { "epoch": 0.9659000889668873, "grad_norm": 2.2444989681243896, "learning_rate": 1.7953816549069918e-07, "loss": 0.7536, "step": 79255 }, { "epoch": 0.9659610251910351, "grad_norm": 1.9810688495635986, "learning_rate": 1.7921744708146248e-07, "loss": 0.8092, "step": 79260 }, { "epoch": 0.9660219614151828, "grad_norm": 2.2306151390075684, "learning_rate": 1.788967286722258e-07, "loss": 0.7988, "step": 79265 }, { "epoch": 0.9660828976393306, "grad_norm": 1.798991084098816, "learning_rate": 1.785760102629891e-07, "loss": 0.7894, "step": 79270 }, { "epoch": 0.9661438338634785, "grad_norm": 1.897210955619812, "learning_rate": 1.782552918537524e-07, "loss": 0.847, "step": 79275 }, { "epoch": 0.9662047700876263, "grad_norm": 2.221788167953491, "learning_rate": 1.7793457344451572e-07, "loss": 0.7958, "step": 79280 }, { "epoch": 0.9662657063117741, "grad_norm": 2.229175329208374, "learning_rate": 1.7761385503527902e-07, "loss": 0.789, "step": 79285 }, { "epoch": 0.9663266425359219, "grad_norm": 1.8105889558792114, "learning_rate": 1.7729313662604233e-07, "loss": 0.7616, "step": 79290 }, { "epoch": 0.9663875787600698, "grad_norm": 2.163942575454712, "learning_rate": 1.7697241821680564e-07, "loss": 0.8459, "step": 79295 }, { "epoch": 0.9664485149842175, "grad_norm": 2.05399227142334, "learning_rate": 1.7665169980756895e-07, "loss": 0.6968, "step": 79300 }, { "epoch": 0.9665094512083653, "grad_norm": 2.2246508598327637, "learning_rate": 1.763309813983323e-07, "loss": 0.8354, "step": 79305 }, { "epoch": 0.9665703874325131, "grad_norm": 1.800944447517395, "learning_rate": 1.7601026298909562e-07, "loss": 0.791, "step": 79310 }, { "epoch": 0.9666313236566609, "grad_norm": 1.9729338884353638, "learning_rate": 1.7568954457985892e-07, "loss": 0.8009, "step": 79315 }, { "epoch": 0.9666922598808088, "grad_norm": 2.0060837268829346, "learning_rate": 1.7536882617062223e-07, "loss": 0.778, "step": 79320 }, { "epoch": 0.9667531961049566, "grad_norm": 1.6792879104614258, "learning_rate": 1.7504810776138554e-07, "loss": 0.7657, "step": 79325 }, { "epoch": 0.9668141323291044, "grad_norm": 1.9626929759979248, "learning_rate": 1.7472738935214885e-07, "loss": 0.727, "step": 79330 }, { "epoch": 0.9668750685532521, "grad_norm": 1.9778053760528564, "learning_rate": 1.7440667094291216e-07, "loss": 0.8406, "step": 79335 }, { "epoch": 0.9669360047773999, "grad_norm": 1.9617477655410767, "learning_rate": 1.7408595253367546e-07, "loss": 0.781, "step": 79340 }, { "epoch": 0.9669969410015478, "grad_norm": 1.9703569412231445, "learning_rate": 1.7376523412443877e-07, "loss": 0.7604, "step": 79345 }, { "epoch": 0.9670578772256956, "grad_norm": 2.342611074447632, "learning_rate": 1.7344451571520208e-07, "loss": 0.8392, "step": 79350 }, { "epoch": 0.9671188134498434, "grad_norm": 2.19801664352417, "learning_rate": 1.731237973059654e-07, "loss": 0.7307, "step": 79355 }, { "epoch": 0.9671797496739912, "grad_norm": 1.9867290258407593, "learning_rate": 1.728030788967287e-07, "loss": 0.8532, "step": 79360 }, { "epoch": 0.967240685898139, "grad_norm": 1.924392580986023, "learning_rate": 1.72482360487492e-07, "loss": 0.8019, "step": 79365 }, { "epoch": 0.9673016221222868, "grad_norm": 1.8690656423568726, "learning_rate": 1.721616420782553e-07, "loss": 0.8052, "step": 79370 }, { "epoch": 0.9673625583464346, "grad_norm": 2.0294201374053955, "learning_rate": 1.7184092366901862e-07, "loss": 0.871, "step": 79375 }, { "epoch": 0.9674234945705824, "grad_norm": 2.0014700889587402, "learning_rate": 1.7152020525978193e-07, "loss": 0.7799, "step": 79380 }, { "epoch": 0.9674844307947302, "grad_norm": 2.328472375869751, "learning_rate": 1.7119948685054523e-07, "loss": 0.8363, "step": 79385 }, { "epoch": 0.967545367018878, "grad_norm": 2.211289644241333, "learning_rate": 1.7087876844130854e-07, "loss": 0.7767, "step": 79390 }, { "epoch": 0.9676063032430259, "grad_norm": 2.020148992538452, "learning_rate": 1.7055805003207185e-07, "loss": 0.7993, "step": 79395 }, { "epoch": 0.9676672394671737, "grad_norm": 2.33610200881958, "learning_rate": 1.7023733162283516e-07, "loss": 0.8642, "step": 79400 }, { "epoch": 0.9677281756913214, "grad_norm": 2.4387753009796143, "learning_rate": 1.699166132135985e-07, "loss": 0.835, "step": 79405 }, { "epoch": 0.9677891119154692, "grad_norm": 1.9799829721450806, "learning_rate": 1.695958948043618e-07, "loss": 0.804, "step": 79410 }, { "epoch": 0.9678500481396171, "grad_norm": 2.351526975631714, "learning_rate": 1.692751763951251e-07, "loss": 0.8666, "step": 79415 }, { "epoch": 0.9679109843637649, "grad_norm": 2.128908395767212, "learning_rate": 1.6895445798588842e-07, "loss": 0.6924, "step": 79420 }, { "epoch": 0.9679719205879127, "grad_norm": 2.275031566619873, "learning_rate": 1.6863373957665172e-07, "loss": 0.7893, "step": 79425 }, { "epoch": 0.9680328568120605, "grad_norm": 2.119637966156006, "learning_rate": 1.6831302116741503e-07, "loss": 0.8199, "step": 79430 }, { "epoch": 0.9680937930362083, "grad_norm": 1.823596715927124, "learning_rate": 1.6799230275817834e-07, "loss": 0.8056, "step": 79435 }, { "epoch": 0.9681547292603561, "grad_norm": 2.317234516143799, "learning_rate": 1.6767158434894165e-07, "loss": 0.7758, "step": 79440 }, { "epoch": 0.9682156654845039, "grad_norm": 2.7025954723358154, "learning_rate": 1.6735086593970496e-07, "loss": 0.8326, "step": 79445 }, { "epoch": 0.9682766017086517, "grad_norm": 2.2566733360290527, "learning_rate": 1.6703014753046826e-07, "loss": 0.8248, "step": 79450 }, { "epoch": 0.9683375379327995, "grad_norm": 1.8751778602600098, "learning_rate": 1.6670942912123157e-07, "loss": 0.7919, "step": 79455 }, { "epoch": 0.9683984741569474, "grad_norm": 1.90956711769104, "learning_rate": 1.6638871071199488e-07, "loss": 0.7961, "step": 79460 }, { "epoch": 0.9684594103810952, "grad_norm": 1.894623875617981, "learning_rate": 1.660679923027582e-07, "loss": 0.9107, "step": 79465 }, { "epoch": 0.968520346605243, "grad_norm": 1.887770414352417, "learning_rate": 1.657472738935215e-07, "loss": 0.8095, "step": 79470 }, { "epoch": 0.9685812828293907, "grad_norm": 1.7512445449829102, "learning_rate": 1.654265554842848e-07, "loss": 0.8054, "step": 79475 }, { "epoch": 0.9686422190535385, "grad_norm": 1.8271020650863647, "learning_rate": 1.651058370750481e-07, "loss": 0.7967, "step": 79480 }, { "epoch": 0.9687031552776864, "grad_norm": 2.016944646835327, "learning_rate": 1.6478511866581142e-07, "loss": 0.8655, "step": 79485 }, { "epoch": 0.9687640915018342, "grad_norm": 2.2888364791870117, "learning_rate": 1.6446440025657473e-07, "loss": 0.87, "step": 79490 }, { "epoch": 0.968825027725982, "grad_norm": 2.1908698081970215, "learning_rate": 1.6414368184733804e-07, "loss": 0.771, "step": 79495 }, { "epoch": 0.9688859639501298, "grad_norm": 1.9230238199234009, "learning_rate": 1.6382296343810134e-07, "loss": 0.9512, "step": 79500 }, { "epoch": 0.9689469001742776, "grad_norm": 1.9018042087554932, "learning_rate": 1.6350224502886465e-07, "loss": 0.8355, "step": 79505 }, { "epoch": 0.9690078363984254, "grad_norm": 2.063509464263916, "learning_rate": 1.6318152661962799e-07, "loss": 0.8553, "step": 79510 }, { "epoch": 0.9690687726225732, "grad_norm": 2.0471484661102295, "learning_rate": 1.628608082103913e-07, "loss": 0.7903, "step": 79515 }, { "epoch": 0.969129708846721, "grad_norm": 2.137819290161133, "learning_rate": 1.625400898011546e-07, "loss": 0.7672, "step": 79520 }, { "epoch": 0.9691906450708688, "grad_norm": 1.9955403804779053, "learning_rate": 1.622193713919179e-07, "loss": 0.8018, "step": 79525 }, { "epoch": 0.9692515812950167, "grad_norm": 2.7273647785186768, "learning_rate": 1.6189865298268122e-07, "loss": 0.8541, "step": 79530 }, { "epoch": 0.9693125175191645, "grad_norm": 1.936080813407898, "learning_rate": 1.6157793457344453e-07, "loss": 0.7955, "step": 79535 }, { "epoch": 0.9693734537433123, "grad_norm": 1.7573739290237427, "learning_rate": 1.6125721616420783e-07, "loss": 0.8125, "step": 79540 }, { "epoch": 0.96943438996746, "grad_norm": 1.8094291687011719, "learning_rate": 1.6093649775497114e-07, "loss": 0.7438, "step": 79545 }, { "epoch": 0.9694953261916078, "grad_norm": 1.726097822189331, "learning_rate": 1.6061577934573445e-07, "loss": 0.7964, "step": 79550 }, { "epoch": 0.9695562624157557, "grad_norm": 2.6621551513671875, "learning_rate": 1.6029506093649776e-07, "loss": 0.8068, "step": 79555 }, { "epoch": 0.9696171986399035, "grad_norm": 1.84457266330719, "learning_rate": 1.5997434252726107e-07, "loss": 0.8088, "step": 79560 }, { "epoch": 0.9696781348640513, "grad_norm": 1.6242727041244507, "learning_rate": 1.5965362411802437e-07, "loss": 0.8113, "step": 79565 }, { "epoch": 0.9697390710881991, "grad_norm": 1.937276005744934, "learning_rate": 1.5933290570878768e-07, "loss": 0.7734, "step": 79570 }, { "epoch": 0.969800007312347, "grad_norm": 2.170529365539551, "learning_rate": 1.59012187299551e-07, "loss": 0.8237, "step": 79575 }, { "epoch": 0.9698609435364947, "grad_norm": 1.8585673570632935, "learning_rate": 1.586914688903143e-07, "loss": 0.816, "step": 79580 }, { "epoch": 0.9699218797606425, "grad_norm": 1.7277225255966187, "learning_rate": 1.583707504810776e-07, "loss": 0.75, "step": 79585 }, { "epoch": 0.9699828159847903, "grad_norm": 1.8622733354568481, "learning_rate": 1.5805003207184091e-07, "loss": 0.8029, "step": 79590 }, { "epoch": 0.9700437522089381, "grad_norm": 2.1711201667785645, "learning_rate": 1.5772931366260422e-07, "loss": 0.8333, "step": 79595 }, { "epoch": 0.970104688433086, "grad_norm": 1.7071692943572998, "learning_rate": 1.5740859525336753e-07, "loss": 0.8173, "step": 79600 }, { "epoch": 0.9701656246572338, "grad_norm": 1.9978461265563965, "learning_rate": 1.5708787684413084e-07, "loss": 0.836, "step": 79605 }, { "epoch": 0.9702265608813816, "grad_norm": 1.921212077140808, "learning_rate": 1.567671584348942e-07, "loss": 0.7505, "step": 79610 }, { "epoch": 0.9702874971055293, "grad_norm": 1.9297771453857422, "learning_rate": 1.564464400256575e-07, "loss": 0.8466, "step": 79615 }, { "epoch": 0.9703484333296771, "grad_norm": 2.2897114753723145, "learning_rate": 1.561257216164208e-07, "loss": 0.8121, "step": 79620 }, { "epoch": 0.970409369553825, "grad_norm": 2.748629331588745, "learning_rate": 1.558050032071841e-07, "loss": 0.7704, "step": 79625 }, { "epoch": 0.9704703057779728, "grad_norm": 1.7472156286239624, "learning_rate": 1.554842847979474e-07, "loss": 0.865, "step": 79630 }, { "epoch": 0.9705312420021206, "grad_norm": 1.763437032699585, "learning_rate": 1.551635663887107e-07, "loss": 0.8052, "step": 79635 }, { "epoch": 0.9705921782262684, "grad_norm": 1.966897964477539, "learning_rate": 1.5484284797947402e-07, "loss": 0.8099, "step": 79640 }, { "epoch": 0.9706531144504162, "grad_norm": 2.045076608657837, "learning_rate": 1.5452212957023733e-07, "loss": 0.8075, "step": 79645 }, { "epoch": 0.970714050674564, "grad_norm": 2.1481094360351562, "learning_rate": 1.5420141116100066e-07, "loss": 0.8479, "step": 79650 }, { "epoch": 0.9707749868987118, "grad_norm": 1.8863435983657837, "learning_rate": 1.5388069275176397e-07, "loss": 0.812, "step": 79655 }, { "epoch": 0.9708359231228596, "grad_norm": 1.8370531797409058, "learning_rate": 1.5355997434252728e-07, "loss": 0.8206, "step": 79660 }, { "epoch": 0.9708968593470074, "grad_norm": 1.892977237701416, "learning_rate": 1.5323925593329059e-07, "loss": 0.8321, "step": 79665 }, { "epoch": 0.9709577955711552, "grad_norm": 1.8742326498031616, "learning_rate": 1.529185375240539e-07, "loss": 0.8396, "step": 79670 }, { "epoch": 0.9710187317953031, "grad_norm": 2.103701114654541, "learning_rate": 1.525978191148172e-07, "loss": 0.8434, "step": 79675 }, { "epoch": 0.9710796680194509, "grad_norm": 2.1705732345581055, "learning_rate": 1.522771007055805e-07, "loss": 0.8711, "step": 79680 }, { "epoch": 0.9711406042435986, "grad_norm": 1.73965322971344, "learning_rate": 1.5195638229634384e-07, "loss": 0.8502, "step": 79685 }, { "epoch": 0.9712015404677464, "grad_norm": 2.147761344909668, "learning_rate": 1.5163566388710715e-07, "loss": 0.8575, "step": 79690 }, { "epoch": 0.9712624766918942, "grad_norm": 3.1549272537231445, "learning_rate": 1.5131494547787046e-07, "loss": 0.8437, "step": 79695 }, { "epoch": 0.9713234129160421, "grad_norm": 1.988089680671692, "learning_rate": 1.5099422706863377e-07, "loss": 0.8151, "step": 79700 }, { "epoch": 0.9713843491401899, "grad_norm": 1.9525094032287598, "learning_rate": 1.5067350865939708e-07, "loss": 0.7752, "step": 79705 }, { "epoch": 0.9714452853643377, "grad_norm": 2.131010055541992, "learning_rate": 1.5035279025016038e-07, "loss": 0.8109, "step": 79710 }, { "epoch": 0.9715062215884855, "grad_norm": 1.680915117263794, "learning_rate": 1.500320718409237e-07, "loss": 0.7427, "step": 79715 }, { "epoch": 0.9715671578126333, "grad_norm": 2.0659124851226807, "learning_rate": 1.49711353431687e-07, "loss": 0.7786, "step": 79720 }, { "epoch": 0.9716280940367811, "grad_norm": 2.6611475944519043, "learning_rate": 1.493906350224503e-07, "loss": 0.7842, "step": 79725 }, { "epoch": 0.9716890302609289, "grad_norm": 1.9097398519515991, "learning_rate": 1.4906991661321362e-07, "loss": 0.8515, "step": 79730 }, { "epoch": 0.9717499664850767, "grad_norm": 2.0161499977111816, "learning_rate": 1.4874919820397692e-07, "loss": 0.8101, "step": 79735 }, { "epoch": 0.9718109027092245, "grad_norm": 1.7497962713241577, "learning_rate": 1.4842847979474023e-07, "loss": 0.7571, "step": 79740 }, { "epoch": 0.9718718389333724, "grad_norm": 1.7931464910507202, "learning_rate": 1.4810776138550354e-07, "loss": 0.8452, "step": 79745 }, { "epoch": 0.9719327751575202, "grad_norm": 2.388465642929077, "learning_rate": 1.4778704297626685e-07, "loss": 0.862, "step": 79750 }, { "epoch": 0.9719937113816679, "grad_norm": 2.278301239013672, "learning_rate": 1.4746632456703016e-07, "loss": 0.7903, "step": 79755 }, { "epoch": 0.9720546476058157, "grad_norm": 1.8588112592697144, "learning_rate": 1.4714560615779346e-07, "loss": 0.8274, "step": 79760 }, { "epoch": 0.9721155838299635, "grad_norm": 2.086348056793213, "learning_rate": 1.4682488774855677e-07, "loss": 0.7318, "step": 79765 }, { "epoch": 0.9721765200541114, "grad_norm": 1.9000588655471802, "learning_rate": 1.4650416933932008e-07, "loss": 0.852, "step": 79770 }, { "epoch": 0.9722374562782592, "grad_norm": 2.045125961303711, "learning_rate": 1.461834509300834e-07, "loss": 0.8556, "step": 79775 }, { "epoch": 0.972298392502407, "grad_norm": 1.9916105270385742, "learning_rate": 1.458627325208467e-07, "loss": 0.7916, "step": 79780 }, { "epoch": 0.9723593287265548, "grad_norm": 1.9354777336120605, "learning_rate": 1.4554201411161e-07, "loss": 0.7473, "step": 79785 }, { "epoch": 0.9724202649507026, "grad_norm": 2.3734524250030518, "learning_rate": 1.4522129570237334e-07, "loss": 0.7606, "step": 79790 }, { "epoch": 0.9724812011748504, "grad_norm": 1.9111202955245972, "learning_rate": 1.4490057729313665e-07, "loss": 0.7909, "step": 79795 }, { "epoch": 0.9725421373989982, "grad_norm": 1.6258022785186768, "learning_rate": 1.4457985888389995e-07, "loss": 0.8453, "step": 79800 }, { "epoch": 0.972603073623146, "grad_norm": 1.7341705560684204, "learning_rate": 1.4425914047466326e-07, "loss": 0.7945, "step": 79805 }, { "epoch": 0.9726640098472938, "grad_norm": 2.0219709873199463, "learning_rate": 1.4393842206542657e-07, "loss": 0.8007, "step": 79810 }, { "epoch": 0.9727249460714417, "grad_norm": 1.936633586883545, "learning_rate": 1.4361770365618988e-07, "loss": 0.8145, "step": 79815 }, { "epoch": 0.9727858822955895, "grad_norm": 1.8010125160217285, "learning_rate": 1.4329698524695318e-07, "loss": 0.7391, "step": 79820 }, { "epoch": 0.9728468185197372, "grad_norm": 2.2783706188201904, "learning_rate": 1.429762668377165e-07, "loss": 0.8073, "step": 79825 }, { "epoch": 0.972907754743885, "grad_norm": 2.271331310272217, "learning_rate": 1.426555484284798e-07, "loss": 0.8049, "step": 79830 }, { "epoch": 0.9729686909680328, "grad_norm": 1.7207609415054321, "learning_rate": 1.4233483001924314e-07, "loss": 0.8017, "step": 79835 }, { "epoch": 0.9730296271921807, "grad_norm": 1.8649345636367798, "learning_rate": 1.4201411161000644e-07, "loss": 0.8025, "step": 79840 }, { "epoch": 0.9730905634163285, "grad_norm": 2.379988193511963, "learning_rate": 1.4169339320076975e-07, "loss": 0.8081, "step": 79845 }, { "epoch": 0.9731514996404763, "grad_norm": 2.106301784515381, "learning_rate": 1.4137267479153306e-07, "loss": 0.754, "step": 79850 }, { "epoch": 0.9732124358646241, "grad_norm": 1.873858094215393, "learning_rate": 1.4105195638229637e-07, "loss": 0.8157, "step": 79855 }, { "epoch": 0.9732733720887718, "grad_norm": 1.8860620260238647, "learning_rate": 1.4073123797305967e-07, "loss": 0.7905, "step": 79860 }, { "epoch": 0.9733343083129197, "grad_norm": 1.964478611946106, "learning_rate": 1.4041051956382298e-07, "loss": 0.7776, "step": 79865 }, { "epoch": 0.9733952445370675, "grad_norm": 1.8607884645462036, "learning_rate": 1.400898011545863e-07, "loss": 0.701, "step": 79870 }, { "epoch": 0.9734561807612153, "grad_norm": 1.8332301378250122, "learning_rate": 1.397690827453496e-07, "loss": 0.8109, "step": 79875 }, { "epoch": 0.9735171169853631, "grad_norm": 1.9030238389968872, "learning_rate": 1.394483643361129e-07, "loss": 0.8244, "step": 79880 }, { "epoch": 0.973578053209511, "grad_norm": 1.9876577854156494, "learning_rate": 1.3912764592687621e-07, "loss": 0.7256, "step": 79885 }, { "epoch": 0.9736389894336588, "grad_norm": 2.0423429012298584, "learning_rate": 1.3880692751763952e-07, "loss": 0.8707, "step": 79890 }, { "epoch": 0.9736999256578065, "grad_norm": 1.8934557437896729, "learning_rate": 1.3848620910840283e-07, "loss": 0.7508, "step": 79895 }, { "epoch": 0.9737608618819543, "grad_norm": 1.9105316400527954, "learning_rate": 1.3816549069916614e-07, "loss": 0.7741, "step": 79900 }, { "epoch": 0.9738217981061021, "grad_norm": 1.8707749843597412, "learning_rate": 1.3784477228992945e-07, "loss": 0.7673, "step": 79905 }, { "epoch": 0.97388273433025, "grad_norm": 1.8977798223495483, "learning_rate": 1.3752405388069275e-07, "loss": 0.7665, "step": 79910 }, { "epoch": 0.9739436705543978, "grad_norm": 1.8992016315460205, "learning_rate": 1.3720333547145606e-07, "loss": 0.7504, "step": 79915 }, { "epoch": 0.9740046067785456, "grad_norm": 2.1423323154449463, "learning_rate": 1.3688261706221937e-07, "loss": 0.785, "step": 79920 }, { "epoch": 0.9740655430026934, "grad_norm": 2.1801834106445312, "learning_rate": 1.3656189865298268e-07, "loss": 0.8795, "step": 79925 }, { "epoch": 0.9741264792268411, "grad_norm": 1.844839096069336, "learning_rate": 1.36241180243746e-07, "loss": 0.8561, "step": 79930 }, { "epoch": 0.974187415450989, "grad_norm": 1.8695197105407715, "learning_rate": 1.3592046183450932e-07, "loss": 0.797, "step": 79935 }, { "epoch": 0.9742483516751368, "grad_norm": 2.321765184402466, "learning_rate": 1.3559974342527263e-07, "loss": 0.854, "step": 79940 }, { "epoch": 0.9743092878992846, "grad_norm": 1.8369802236557007, "learning_rate": 1.3527902501603594e-07, "loss": 0.7721, "step": 79945 }, { "epoch": 0.9743702241234324, "grad_norm": 2.0059447288513184, "learning_rate": 1.3495830660679924e-07, "loss": 0.8658, "step": 79950 }, { "epoch": 0.9744311603475803, "grad_norm": 1.9374151229858398, "learning_rate": 1.3463758819756255e-07, "loss": 0.8458, "step": 79955 }, { "epoch": 0.9744920965717281, "grad_norm": 2.116702079772949, "learning_rate": 1.3431686978832586e-07, "loss": 0.8136, "step": 79960 }, { "epoch": 0.9745530327958758, "grad_norm": 2.2873685359954834, "learning_rate": 1.3399615137908917e-07, "loss": 0.7858, "step": 79965 }, { "epoch": 0.9746139690200236, "grad_norm": 1.617211937904358, "learning_rate": 1.3367543296985248e-07, "loss": 0.7133, "step": 79970 }, { "epoch": 0.9746749052441714, "grad_norm": 2.1259765625, "learning_rate": 1.3335471456061578e-07, "loss": 0.7865, "step": 79975 }, { "epoch": 0.9747358414683193, "grad_norm": 1.8690241575241089, "learning_rate": 1.330339961513791e-07, "loss": 0.7894, "step": 79980 }, { "epoch": 0.9747967776924671, "grad_norm": 1.7942522764205933, "learning_rate": 1.3271327774214243e-07, "loss": 0.8515, "step": 79985 }, { "epoch": 0.9748577139166149, "grad_norm": 2.2349348068237305, "learning_rate": 1.3239255933290573e-07, "loss": 0.842, "step": 79990 }, { "epoch": 0.9749186501407627, "grad_norm": 2.4388961791992188, "learning_rate": 1.3207184092366904e-07, "loss": 0.7927, "step": 79995 }, { "epoch": 0.9749795863649104, "grad_norm": 1.9789419174194336, "learning_rate": 1.3175112251443235e-07, "loss": 0.8347, "step": 80000 }, { "epoch": 0.9750405225890583, "grad_norm": 1.7478525638580322, "learning_rate": 1.3143040410519566e-07, "loss": 0.7716, "step": 80005 }, { "epoch": 0.9751014588132061, "grad_norm": 1.9163588285446167, "learning_rate": 1.3110968569595897e-07, "loss": 0.7814, "step": 80010 }, { "epoch": 0.9751623950373539, "grad_norm": 1.7798230648040771, "learning_rate": 1.3078896728672227e-07, "loss": 0.7567, "step": 80015 }, { "epoch": 0.9752233312615017, "grad_norm": 1.8560943603515625, "learning_rate": 1.3046824887748558e-07, "loss": 0.7409, "step": 80020 }, { "epoch": 0.9752842674856496, "grad_norm": 1.7995903491973877, "learning_rate": 1.301475304682489e-07, "loss": 0.8338, "step": 80025 }, { "epoch": 0.9753452037097974, "grad_norm": 1.9861502647399902, "learning_rate": 1.298268120590122e-07, "loss": 0.7876, "step": 80030 }, { "epoch": 0.9754061399339451, "grad_norm": 2.493767261505127, "learning_rate": 1.295060936497755e-07, "loss": 0.7998, "step": 80035 }, { "epoch": 0.9754670761580929, "grad_norm": 2.257857322692871, "learning_rate": 1.2918537524053881e-07, "loss": 0.7582, "step": 80040 }, { "epoch": 0.9755280123822407, "grad_norm": 2.0554394721984863, "learning_rate": 1.2886465683130212e-07, "loss": 0.7518, "step": 80045 }, { "epoch": 0.9755889486063886, "grad_norm": 1.8898862600326538, "learning_rate": 1.2854393842206543e-07, "loss": 0.7731, "step": 80050 }, { "epoch": 0.9756498848305364, "grad_norm": 2.015012741088867, "learning_rate": 1.2822322001282874e-07, "loss": 0.827, "step": 80055 }, { "epoch": 0.9757108210546842, "grad_norm": 1.984089732170105, "learning_rate": 1.2790250160359205e-07, "loss": 0.8313, "step": 80060 }, { "epoch": 0.9757717572788319, "grad_norm": 2.278437376022339, "learning_rate": 1.2758178319435535e-07, "loss": 0.8115, "step": 80065 }, { "epoch": 0.9758326935029797, "grad_norm": 1.835496187210083, "learning_rate": 1.272610647851187e-07, "loss": 0.764, "step": 80070 }, { "epoch": 0.9758936297271276, "grad_norm": 2.292177200317383, "learning_rate": 1.26940346375882e-07, "loss": 0.8125, "step": 80075 }, { "epoch": 0.9759545659512754, "grad_norm": 2.081949472427368, "learning_rate": 1.266196279666453e-07, "loss": 0.8218, "step": 80080 }, { "epoch": 0.9760155021754232, "grad_norm": 1.994532585144043, "learning_rate": 1.262989095574086e-07, "loss": 0.7668, "step": 80085 }, { "epoch": 0.976076438399571, "grad_norm": 1.832074761390686, "learning_rate": 1.2597819114817192e-07, "loss": 0.7763, "step": 80090 }, { "epoch": 0.9761373746237189, "grad_norm": 1.979889154434204, "learning_rate": 1.2565747273893523e-07, "loss": 0.7937, "step": 80095 }, { "epoch": 0.9761983108478666, "grad_norm": 1.8817046880722046, "learning_rate": 1.2533675432969854e-07, "loss": 0.8119, "step": 80100 }, { "epoch": 0.9762592470720144, "grad_norm": 1.8870365619659424, "learning_rate": 1.2501603592046184e-07, "loss": 0.7795, "step": 80105 }, { "epoch": 0.9763201832961622, "grad_norm": 1.6707309484481812, "learning_rate": 1.2469531751122515e-07, "loss": 0.7726, "step": 80110 }, { "epoch": 0.97638111952031, "grad_norm": 1.8697153329849243, "learning_rate": 1.2437459910198846e-07, "loss": 0.7579, "step": 80115 }, { "epoch": 0.9764420557444579, "grad_norm": 2.1966311931610107, "learning_rate": 1.2405388069275177e-07, "loss": 0.8054, "step": 80120 }, { "epoch": 0.9765029919686057, "grad_norm": 2.186415433883667, "learning_rate": 1.2373316228351508e-07, "loss": 0.7907, "step": 80125 }, { "epoch": 0.9765639281927535, "grad_norm": 1.9059375524520874, "learning_rate": 1.2341244387427838e-07, "loss": 0.8141, "step": 80130 }, { "epoch": 0.9766248644169012, "grad_norm": 1.9491053819656372, "learning_rate": 1.230917254650417e-07, "loss": 0.8155, "step": 80135 }, { "epoch": 0.976685800641049, "grad_norm": 1.7294272184371948, "learning_rate": 1.2277100705580503e-07, "loss": 0.8034, "step": 80140 }, { "epoch": 0.9767467368651969, "grad_norm": 1.9818713665008545, "learning_rate": 1.2245028864656833e-07, "loss": 0.7509, "step": 80145 }, { "epoch": 0.9768076730893447, "grad_norm": 2.2086331844329834, "learning_rate": 1.2212957023733164e-07, "loss": 0.768, "step": 80150 }, { "epoch": 0.9768686093134925, "grad_norm": 1.8768197298049927, "learning_rate": 1.2180885182809495e-07, "loss": 0.8265, "step": 80155 }, { "epoch": 0.9769295455376403, "grad_norm": 2.5332813262939453, "learning_rate": 1.2148813341885826e-07, "loss": 0.8604, "step": 80160 }, { "epoch": 0.9769904817617882, "grad_norm": 1.9340472221374512, "learning_rate": 1.2116741500962157e-07, "loss": 0.7858, "step": 80165 }, { "epoch": 0.9770514179859359, "grad_norm": 1.9674878120422363, "learning_rate": 1.2084669660038487e-07, "loss": 0.7891, "step": 80170 }, { "epoch": 0.9771123542100837, "grad_norm": 2.0319511890411377, "learning_rate": 1.2052597819114818e-07, "loss": 0.8043, "step": 80175 }, { "epoch": 0.9771732904342315, "grad_norm": 2.1753501892089844, "learning_rate": 1.202052597819115e-07, "loss": 0.8342, "step": 80180 }, { "epoch": 0.9772342266583793, "grad_norm": 1.8229843378067017, "learning_rate": 1.198845413726748e-07, "loss": 0.79, "step": 80185 }, { "epoch": 0.9772951628825272, "grad_norm": 1.963523507118225, "learning_rate": 1.195638229634381e-07, "loss": 0.7805, "step": 80190 }, { "epoch": 0.977356099106675, "grad_norm": 1.9900622367858887, "learning_rate": 1.192431045542014e-07, "loss": 0.7329, "step": 80195 }, { "epoch": 0.9774170353308228, "grad_norm": 1.982165813446045, "learning_rate": 1.1892238614496473e-07, "loss": 0.8684, "step": 80200 }, { "epoch": 0.9774779715549705, "grad_norm": 2.1778218746185303, "learning_rate": 1.1860166773572804e-07, "loss": 0.8166, "step": 80205 }, { "epoch": 0.9775389077791183, "grad_norm": 2.2627334594726562, "learning_rate": 1.1828094932649135e-07, "loss": 0.796, "step": 80210 }, { "epoch": 0.9775998440032662, "grad_norm": 1.9936699867248535, "learning_rate": 1.1796023091725466e-07, "loss": 0.8699, "step": 80215 }, { "epoch": 0.977660780227414, "grad_norm": 2.077713966369629, "learning_rate": 1.1763951250801797e-07, "loss": 0.7995, "step": 80220 }, { "epoch": 0.9777217164515618, "grad_norm": 2.154491901397705, "learning_rate": 1.1731879409878127e-07, "loss": 0.7863, "step": 80225 }, { "epoch": 0.9777826526757096, "grad_norm": 1.8084255456924438, "learning_rate": 1.1699807568954458e-07, "loss": 0.8169, "step": 80230 }, { "epoch": 0.9778435888998575, "grad_norm": 1.8311115503311157, "learning_rate": 1.166773572803079e-07, "loss": 0.8795, "step": 80235 }, { "epoch": 0.9779045251240052, "grad_norm": 1.6591904163360596, "learning_rate": 1.1635663887107121e-07, "loss": 0.8053, "step": 80240 }, { "epoch": 0.977965461348153, "grad_norm": 1.9314863681793213, "learning_rate": 1.1603592046183452e-07, "loss": 0.755, "step": 80245 }, { "epoch": 0.9780263975723008, "grad_norm": 1.9544997215270996, "learning_rate": 1.1571520205259783e-07, "loss": 0.766, "step": 80250 }, { "epoch": 0.9780873337964486, "grad_norm": 1.8977588415145874, "learning_rate": 1.1539448364336113e-07, "loss": 0.8475, "step": 80255 }, { "epoch": 0.9781482700205965, "grad_norm": 2.1775760650634766, "learning_rate": 1.1507376523412444e-07, "loss": 0.7961, "step": 80260 }, { "epoch": 0.9782092062447443, "grad_norm": 2.170586347579956, "learning_rate": 1.1475304682488775e-07, "loss": 0.7642, "step": 80265 }, { "epoch": 0.9782701424688921, "grad_norm": 1.9542073011398315, "learning_rate": 1.1443232841565106e-07, "loss": 0.8603, "step": 80270 }, { "epoch": 0.9783310786930398, "grad_norm": 1.6954864263534546, "learning_rate": 1.1411161000641437e-07, "loss": 0.8172, "step": 80275 }, { "epoch": 0.9783920149171876, "grad_norm": 2.1919336318969727, "learning_rate": 1.1379089159717767e-07, "loss": 0.8304, "step": 80280 }, { "epoch": 0.9784529511413355, "grad_norm": 1.9850819110870361, "learning_rate": 1.1347017318794098e-07, "loss": 0.8873, "step": 80285 }, { "epoch": 0.9785138873654833, "grad_norm": 2.1651885509490967, "learning_rate": 1.1314945477870432e-07, "loss": 0.785, "step": 80290 }, { "epoch": 0.9785748235896311, "grad_norm": 2.173008918762207, "learning_rate": 1.1282873636946762e-07, "loss": 0.8553, "step": 80295 }, { "epoch": 0.9786357598137789, "grad_norm": 1.7812126874923706, "learning_rate": 1.1250801796023093e-07, "loss": 0.8086, "step": 80300 }, { "epoch": 0.9786966960379267, "grad_norm": 1.9208451509475708, "learning_rate": 1.1218729955099424e-07, "loss": 0.7104, "step": 80305 }, { "epoch": 0.9787576322620745, "grad_norm": 1.8202992677688599, "learning_rate": 1.1186658114175755e-07, "loss": 0.8113, "step": 80310 }, { "epoch": 0.9788185684862223, "grad_norm": 2.1114437580108643, "learning_rate": 1.1154586273252086e-07, "loss": 0.826, "step": 80315 }, { "epoch": 0.9788795047103701, "grad_norm": 1.9217430353164673, "learning_rate": 1.1122514432328416e-07, "loss": 0.7653, "step": 80320 }, { "epoch": 0.9789404409345179, "grad_norm": 2.260620594024658, "learning_rate": 1.1090442591404749e-07, "loss": 0.8134, "step": 80325 }, { "epoch": 0.9790013771586658, "grad_norm": 1.9356920719146729, "learning_rate": 1.105837075048108e-07, "loss": 0.7404, "step": 80330 }, { "epoch": 0.9790623133828136, "grad_norm": 1.9892553091049194, "learning_rate": 1.102629890955741e-07, "loss": 0.7767, "step": 80335 }, { "epoch": 0.9791232496069614, "grad_norm": 2.251136302947998, "learning_rate": 1.0994227068633741e-07, "loss": 0.7804, "step": 80340 }, { "epoch": 0.9791841858311091, "grad_norm": 1.881108045578003, "learning_rate": 1.0962155227710072e-07, "loss": 0.8271, "step": 80345 }, { "epoch": 0.9792451220552569, "grad_norm": 2.0353307723999023, "learning_rate": 1.0930083386786403e-07, "loss": 0.8195, "step": 80350 }, { "epoch": 0.9793060582794048, "grad_norm": 1.9198170900344849, "learning_rate": 1.0898011545862733e-07, "loss": 0.7567, "step": 80355 }, { "epoch": 0.9793669945035526, "grad_norm": 1.6703907251358032, "learning_rate": 1.0865939704939064e-07, "loss": 0.8145, "step": 80360 }, { "epoch": 0.9794279307277004, "grad_norm": 2.314723014831543, "learning_rate": 1.0833867864015395e-07, "loss": 0.852, "step": 80365 }, { "epoch": 0.9794888669518482, "grad_norm": 2.1050870418548584, "learning_rate": 1.0801796023091726e-07, "loss": 0.7811, "step": 80370 }, { "epoch": 0.979549803175996, "grad_norm": 1.7992113828659058, "learning_rate": 1.0769724182168058e-07, "loss": 0.7607, "step": 80375 }, { "epoch": 0.9796107394001438, "grad_norm": 2.6904473304748535, "learning_rate": 1.0737652341244389e-07, "loss": 0.7637, "step": 80380 }, { "epoch": 0.9796716756242916, "grad_norm": 2.276517629623413, "learning_rate": 1.070558050032072e-07, "loss": 0.8296, "step": 80385 }, { "epoch": 0.9797326118484394, "grad_norm": 1.755750060081482, "learning_rate": 1.067350865939705e-07, "loss": 0.7659, "step": 80390 }, { "epoch": 0.9797935480725872, "grad_norm": 1.993202805519104, "learning_rate": 1.0641436818473381e-07, "loss": 0.7401, "step": 80395 }, { "epoch": 0.979854484296735, "grad_norm": 1.9828846454620361, "learning_rate": 1.0609364977549712e-07, "loss": 0.7878, "step": 80400 }, { "epoch": 0.9799154205208829, "grad_norm": 1.9658770561218262, "learning_rate": 1.0577293136626043e-07, "loss": 0.7911, "step": 80405 }, { "epoch": 0.9799763567450307, "grad_norm": 2.2950432300567627, "learning_rate": 1.0545221295702373e-07, "loss": 0.8207, "step": 80410 }, { "epoch": 0.9800372929691784, "grad_norm": 1.7068959474563599, "learning_rate": 1.0513149454778704e-07, "loss": 0.7939, "step": 80415 }, { "epoch": 0.9800982291933262, "grad_norm": 2.1243717670440674, "learning_rate": 1.0481077613855035e-07, "loss": 0.7521, "step": 80420 }, { "epoch": 0.980159165417474, "grad_norm": 1.8913801908493042, "learning_rate": 1.0449005772931366e-07, "loss": 0.7462, "step": 80425 }, { "epoch": 0.9802201016416219, "grad_norm": 2.1650569438934326, "learning_rate": 1.0416933932007698e-07, "loss": 0.8125, "step": 80430 }, { "epoch": 0.9802810378657697, "grad_norm": 1.634705662727356, "learning_rate": 1.0384862091084029e-07, "loss": 0.755, "step": 80435 }, { "epoch": 0.9803419740899175, "grad_norm": 1.7481276988983154, "learning_rate": 1.0352790250160361e-07, "loss": 0.7836, "step": 80440 }, { "epoch": 0.9804029103140653, "grad_norm": 1.9236148595809937, "learning_rate": 1.0320718409236692e-07, "loss": 0.7935, "step": 80445 }, { "epoch": 0.9804638465382131, "grad_norm": 1.8415708541870117, "learning_rate": 1.0288646568313022e-07, "loss": 0.8395, "step": 80450 }, { "epoch": 0.9805247827623609, "grad_norm": 2.4374852180480957, "learning_rate": 1.0256574727389353e-07, "loss": 0.7706, "step": 80455 }, { "epoch": 0.9805857189865087, "grad_norm": 2.174074649810791, "learning_rate": 1.0224502886465684e-07, "loss": 0.8455, "step": 80460 }, { "epoch": 0.9806466552106565, "grad_norm": 2.3427555561065674, "learning_rate": 1.0192431045542016e-07, "loss": 0.8673, "step": 80465 }, { "epoch": 0.9807075914348043, "grad_norm": 2.0441932678222656, "learning_rate": 1.0160359204618347e-07, "loss": 0.7932, "step": 80470 }, { "epoch": 0.9807685276589522, "grad_norm": 2.1215853691101074, "learning_rate": 1.0128287363694678e-07, "loss": 0.813, "step": 80475 }, { "epoch": 0.9808294638831, "grad_norm": 1.7482013702392578, "learning_rate": 1.0096215522771008e-07, "loss": 0.7923, "step": 80480 }, { "epoch": 0.9808904001072477, "grad_norm": 2.006101369857788, "learning_rate": 1.0064143681847339e-07, "loss": 0.8319, "step": 80485 }, { "epoch": 0.9809513363313955, "grad_norm": 2.5287485122680664, "learning_rate": 1.003207184092367e-07, "loss": 0.8061, "step": 80490 }, { "epoch": 0.9810122725555434, "grad_norm": 1.7005308866500854, "learning_rate": 1.0000000000000001e-07, "loss": 0.7804, "step": 80495 }, { "epoch": 0.9810732087796912, "grad_norm": 2.053056001663208, "learning_rate": 9.967928159076332e-08, "loss": 0.8178, "step": 80500 }, { "epoch": 0.981134145003839, "grad_norm": 2.0179927349090576, "learning_rate": 9.935856318152662e-08, "loss": 0.7821, "step": 80505 }, { "epoch": 0.9811950812279868, "grad_norm": 1.95384681224823, "learning_rate": 9.903784477228993e-08, "loss": 0.7546, "step": 80510 }, { "epoch": 0.9812560174521346, "grad_norm": 2.340367078781128, "learning_rate": 9.871712636305325e-08, "loss": 0.8693, "step": 80515 }, { "epoch": 0.9813169536762824, "grad_norm": 1.5996332168579102, "learning_rate": 9.839640795381656e-08, "loss": 0.824, "step": 80520 }, { "epoch": 0.9813778899004302, "grad_norm": 2.208069086074829, "learning_rate": 9.807568954457987e-08, "loss": 0.8286, "step": 80525 }, { "epoch": 0.981438826124578, "grad_norm": 2.131234645843506, "learning_rate": 9.775497113534318e-08, "loss": 0.7273, "step": 80530 }, { "epoch": 0.9814997623487258, "grad_norm": 1.854617953300476, "learning_rate": 9.743425272610649e-08, "loss": 0.8092, "step": 80535 }, { "epoch": 0.9815606985728736, "grad_norm": 1.7355211973190308, "learning_rate": 9.71135343168698e-08, "loss": 0.7365, "step": 80540 }, { "epoch": 0.9816216347970215, "grad_norm": 1.8323655128479004, "learning_rate": 9.67928159076331e-08, "loss": 0.7392, "step": 80545 }, { "epoch": 0.9816825710211693, "grad_norm": 2.129199981689453, "learning_rate": 9.647209749839641e-08, "loss": 0.8219, "step": 80550 }, { "epoch": 0.981743507245317, "grad_norm": 1.8336796760559082, "learning_rate": 9.615137908915972e-08, "loss": 0.8233, "step": 80555 }, { "epoch": 0.9818044434694648, "grad_norm": 2.186645746231079, "learning_rate": 9.583066067992303e-08, "loss": 0.8063, "step": 80560 }, { "epoch": 0.9818653796936126, "grad_norm": 1.9299778938293457, "learning_rate": 9.550994227068633e-08, "loss": 0.8599, "step": 80565 }, { "epoch": 0.9819263159177605, "grad_norm": 1.7599986791610718, "learning_rate": 9.518922386144965e-08, "loss": 0.8248, "step": 80570 }, { "epoch": 0.9819872521419083, "grad_norm": 1.7601522207260132, "learning_rate": 9.486850545221296e-08, "loss": 0.8128, "step": 80575 }, { "epoch": 0.9820481883660561, "grad_norm": 2.396030902862549, "learning_rate": 9.454778704297627e-08, "loss": 0.8325, "step": 80580 }, { "epoch": 0.9821091245902039, "grad_norm": 2.076211929321289, "learning_rate": 9.422706863373958e-08, "loss": 0.8305, "step": 80585 }, { "epoch": 0.9821700608143517, "grad_norm": 1.9370146989822388, "learning_rate": 9.39063502245029e-08, "loss": 0.8308, "step": 80590 }, { "epoch": 0.9822309970384995, "grad_norm": 2.7527148723602295, "learning_rate": 9.358563181526621e-08, "loss": 0.791, "step": 80595 }, { "epoch": 0.9822919332626473, "grad_norm": 1.8093849420547485, "learning_rate": 9.326491340602952e-08, "loss": 0.8543, "step": 80600 }, { "epoch": 0.9823528694867951, "grad_norm": 2.094353437423706, "learning_rate": 9.294419499679284e-08, "loss": 0.8371, "step": 80605 }, { "epoch": 0.982413805710943, "grad_norm": 2.1153719425201416, "learning_rate": 9.262347658755614e-08, "loss": 0.7798, "step": 80610 }, { "epoch": 0.9824747419350908, "grad_norm": 2.0390172004699707, "learning_rate": 9.230275817831945e-08, "loss": 0.7291, "step": 80615 }, { "epoch": 0.9825356781592386, "grad_norm": 1.978287935256958, "learning_rate": 9.198203976908276e-08, "loss": 0.8324, "step": 80620 }, { "epoch": 0.9825966143833863, "grad_norm": 1.7542093992233276, "learning_rate": 9.166132135984607e-08, "loss": 0.8003, "step": 80625 }, { "epoch": 0.9826575506075341, "grad_norm": 2.0755345821380615, "learning_rate": 9.134060295060938e-08, "loss": 0.8183, "step": 80630 }, { "epoch": 0.982718486831682, "grad_norm": 2.6099514961242676, "learning_rate": 9.101988454137268e-08, "loss": 0.7949, "step": 80635 }, { "epoch": 0.9827794230558298, "grad_norm": 1.9089303016662598, "learning_rate": 9.069916613213599e-08, "loss": 0.7478, "step": 80640 }, { "epoch": 0.9828403592799776, "grad_norm": 2.0182688236236572, "learning_rate": 9.03784477228993e-08, "loss": 0.8627, "step": 80645 }, { "epoch": 0.9829012955041254, "grad_norm": 2.12727427482605, "learning_rate": 9.005772931366261e-08, "loss": 0.754, "step": 80650 }, { "epoch": 0.9829622317282732, "grad_norm": 1.822388768196106, "learning_rate": 8.973701090442592e-08, "loss": 0.8678, "step": 80655 }, { "epoch": 0.983023167952421, "grad_norm": 1.7680261135101318, "learning_rate": 8.941629249518924e-08, "loss": 0.8657, "step": 80660 }, { "epoch": 0.9830841041765688, "grad_norm": 1.9017935991287231, "learning_rate": 8.909557408595254e-08, "loss": 0.8171, "step": 80665 }, { "epoch": 0.9831450404007166, "grad_norm": 2.035961627960205, "learning_rate": 8.877485567671585e-08, "loss": 0.8394, "step": 80670 }, { "epoch": 0.9832059766248644, "grad_norm": 2.20876145362854, "learning_rate": 8.845413726747916e-08, "loss": 0.7617, "step": 80675 }, { "epoch": 0.9832669128490122, "grad_norm": 1.8622410297393799, "learning_rate": 8.813341885824247e-08, "loss": 0.813, "step": 80680 }, { "epoch": 0.9833278490731601, "grad_norm": 1.9108487367630005, "learning_rate": 8.781270044900578e-08, "loss": 0.8111, "step": 80685 }, { "epoch": 0.9833887852973079, "grad_norm": 2.047339677810669, "learning_rate": 8.749198203976908e-08, "loss": 0.8225, "step": 80690 }, { "epoch": 0.9834497215214556, "grad_norm": 2.19331431388855, "learning_rate": 8.717126363053239e-08, "loss": 0.8364, "step": 80695 }, { "epoch": 0.9835106577456034, "grad_norm": 1.9644815921783447, "learning_rate": 8.68505452212957e-08, "loss": 0.803, "step": 80700 }, { "epoch": 0.9835715939697512, "grad_norm": 2.333401679992676, "learning_rate": 8.652982681205901e-08, "loss": 0.8098, "step": 80705 }, { "epoch": 0.9836325301938991, "grad_norm": 2.162691593170166, "learning_rate": 8.620910840282233e-08, "loss": 0.7676, "step": 80710 }, { "epoch": 0.9836934664180469, "grad_norm": 1.9157066345214844, "learning_rate": 8.588838999358564e-08, "loss": 0.7854, "step": 80715 }, { "epoch": 0.9837544026421947, "grad_norm": 2.1051220893859863, "learning_rate": 8.556767158434895e-08, "loss": 0.8092, "step": 80720 }, { "epoch": 0.9838153388663425, "grad_norm": 2.793523073196411, "learning_rate": 8.524695317511225e-08, "loss": 0.7951, "step": 80725 }, { "epoch": 0.9838762750904902, "grad_norm": 1.7801995277404785, "learning_rate": 8.492623476587556e-08, "loss": 0.7654, "step": 80730 }, { "epoch": 0.9839372113146381, "grad_norm": 1.9997731447219849, "learning_rate": 8.460551635663887e-08, "loss": 0.7947, "step": 80735 }, { "epoch": 0.9839981475387859, "grad_norm": 1.871859073638916, "learning_rate": 8.428479794740219e-08, "loss": 0.8267, "step": 80740 }, { "epoch": 0.9840590837629337, "grad_norm": 2.210322618484497, "learning_rate": 8.396407953816551e-08, "loss": 0.8001, "step": 80745 }, { "epoch": 0.9841200199870815, "grad_norm": 2.08164381980896, "learning_rate": 8.364336112892882e-08, "loss": 0.7186, "step": 80750 }, { "epoch": 0.9841809562112294, "grad_norm": 2.0703389644622803, "learning_rate": 8.332264271969213e-08, "loss": 0.827, "step": 80755 }, { "epoch": 0.9842418924353772, "grad_norm": 1.9480235576629639, "learning_rate": 8.300192431045544e-08, "loss": 0.8154, "step": 80760 }, { "epoch": 0.9843028286595249, "grad_norm": 2.0778563022613525, "learning_rate": 8.268120590121874e-08, "loss": 0.7841, "step": 80765 }, { "epoch": 0.9843637648836727, "grad_norm": 2.0585055351257324, "learning_rate": 8.236048749198205e-08, "loss": 0.8103, "step": 80770 }, { "epoch": 0.9844247011078205, "grad_norm": 2.5114331245422363, "learning_rate": 8.203976908274536e-08, "loss": 0.7721, "step": 80775 }, { "epoch": 0.9844856373319684, "grad_norm": 2.6426639556884766, "learning_rate": 8.171905067350867e-08, "loss": 0.9126, "step": 80780 }, { "epoch": 0.9845465735561162, "grad_norm": 1.8624510765075684, "learning_rate": 8.139833226427198e-08, "loss": 0.8033, "step": 80785 }, { "epoch": 0.984607509780264, "grad_norm": 2.0328567028045654, "learning_rate": 8.107761385503528e-08, "loss": 0.7889, "step": 80790 }, { "epoch": 0.9846684460044118, "grad_norm": 1.8094680309295654, "learning_rate": 8.075689544579859e-08, "loss": 0.7602, "step": 80795 }, { "epoch": 0.9847293822285595, "grad_norm": 2.1408889293670654, "learning_rate": 8.043617703656191e-08, "loss": 0.7886, "step": 80800 }, { "epoch": 0.9847903184527074, "grad_norm": 1.8725628852844238, "learning_rate": 8.011545862732522e-08, "loss": 0.7958, "step": 80805 }, { "epoch": 0.9848512546768552, "grad_norm": 1.955325961112976, "learning_rate": 7.979474021808853e-08, "loss": 0.8193, "step": 80810 }, { "epoch": 0.984912190901003, "grad_norm": 2.229304075241089, "learning_rate": 7.947402180885184e-08, "loss": 0.8319, "step": 80815 }, { "epoch": 0.9849731271251508, "grad_norm": 1.9971193075180054, "learning_rate": 7.915330339961514e-08, "loss": 0.8698, "step": 80820 }, { "epoch": 0.9850340633492987, "grad_norm": 1.931907057762146, "learning_rate": 7.883258499037845e-08, "loss": 0.7961, "step": 80825 }, { "epoch": 0.9850949995734465, "grad_norm": 2.3141002655029297, "learning_rate": 7.851186658114176e-08, "loss": 0.7895, "step": 80830 }, { "epoch": 0.9851559357975942, "grad_norm": 1.6239725351333618, "learning_rate": 7.819114817190507e-08, "loss": 0.7589, "step": 80835 }, { "epoch": 0.985216872021742, "grad_norm": 2.05319881439209, "learning_rate": 7.787042976266838e-08, "loss": 0.7798, "step": 80840 }, { "epoch": 0.9852778082458898, "grad_norm": 3.166374444961548, "learning_rate": 7.754971135343168e-08, "loss": 0.8639, "step": 80845 }, { "epoch": 0.9853387444700377, "grad_norm": 1.9315887689590454, "learning_rate": 7.7228992944195e-08, "loss": 0.7746, "step": 80850 }, { "epoch": 0.9853996806941855, "grad_norm": 2.0009429454803467, "learning_rate": 7.690827453495831e-08, "loss": 0.854, "step": 80855 }, { "epoch": 0.9854606169183333, "grad_norm": 1.9253522157669067, "learning_rate": 7.658755612572162e-08, "loss": 0.747, "step": 80860 }, { "epoch": 0.9855215531424811, "grad_norm": 1.8500698804855347, "learning_rate": 7.626683771648493e-08, "loss": 0.7608, "step": 80865 }, { "epoch": 0.9855824893666288, "grad_norm": 1.7960313558578491, "learning_rate": 7.594611930724825e-08, "loss": 0.8196, "step": 80870 }, { "epoch": 0.9856434255907767, "grad_norm": 2.0770978927612305, "learning_rate": 7.562540089801156e-08, "loss": 0.8218, "step": 80875 }, { "epoch": 0.9857043618149245, "grad_norm": 2.2480170726776123, "learning_rate": 7.530468248877487e-08, "loss": 0.8256, "step": 80880 }, { "epoch": 0.9857652980390723, "grad_norm": 1.8974425792694092, "learning_rate": 7.498396407953817e-08, "loss": 0.7812, "step": 80885 }, { "epoch": 0.9858262342632201, "grad_norm": 1.7799029350280762, "learning_rate": 7.466324567030148e-08, "loss": 0.7482, "step": 80890 }, { "epoch": 0.985887170487368, "grad_norm": 2.041766881942749, "learning_rate": 7.434252726106479e-08, "loss": 0.7788, "step": 80895 }, { "epoch": 0.9859481067115158, "grad_norm": 1.9351683855056763, "learning_rate": 7.40218088518281e-08, "loss": 0.7897, "step": 80900 }, { "epoch": 0.9860090429356635, "grad_norm": 2.3045663833618164, "learning_rate": 7.37010904425914e-08, "loss": 0.8061, "step": 80905 }, { "epoch": 0.9860699791598113, "grad_norm": 1.8451802730560303, "learning_rate": 7.338037203335471e-08, "loss": 0.8217, "step": 80910 }, { "epoch": 0.9861309153839591, "grad_norm": 2.027897834777832, "learning_rate": 7.305965362411802e-08, "loss": 0.8207, "step": 80915 }, { "epoch": 0.986191851608107, "grad_norm": 2.0066404342651367, "learning_rate": 7.273893521488134e-08, "loss": 0.7796, "step": 80920 }, { "epoch": 0.9862527878322548, "grad_norm": 1.86783766746521, "learning_rate": 7.241821680564465e-08, "loss": 0.7552, "step": 80925 }, { "epoch": 0.9863137240564026, "grad_norm": 1.7349520921707153, "learning_rate": 7.209749839640796e-08, "loss": 0.8051, "step": 80930 }, { "epoch": 0.9863746602805504, "grad_norm": 2.1138226985931396, "learning_rate": 7.177677998717127e-08, "loss": 0.804, "step": 80935 }, { "epoch": 0.9864355965046981, "grad_norm": 1.938921570777893, "learning_rate": 7.145606157793459e-08, "loss": 0.7958, "step": 80940 }, { "epoch": 0.986496532728846, "grad_norm": 1.952089548110962, "learning_rate": 7.11353431686979e-08, "loss": 0.8615, "step": 80945 }, { "epoch": 0.9865574689529938, "grad_norm": 1.9282580614089966, "learning_rate": 7.08146247594612e-08, "loss": 0.8337, "step": 80950 }, { "epoch": 0.9866184051771416, "grad_norm": 1.8507598638534546, "learning_rate": 7.049390635022451e-08, "loss": 0.8233, "step": 80955 }, { "epoch": 0.9866793414012894, "grad_norm": 1.6804485321044922, "learning_rate": 7.017318794098782e-08, "loss": 0.776, "step": 80960 }, { "epoch": 0.9867402776254373, "grad_norm": 2.1090798377990723, "learning_rate": 6.985246953175113e-08, "loss": 0.7897, "step": 80965 }, { "epoch": 0.9868012138495851, "grad_norm": 1.8769806623458862, "learning_rate": 6.953175112251444e-08, "loss": 0.7824, "step": 80970 }, { "epoch": 0.9868621500737328, "grad_norm": 1.8698068857192993, "learning_rate": 6.921103271327774e-08, "loss": 0.8005, "step": 80975 }, { "epoch": 0.9869230862978806, "grad_norm": 2.2704596519470215, "learning_rate": 6.889031430404105e-08, "loss": 0.7618, "step": 80980 }, { "epoch": 0.9869840225220284, "grad_norm": 1.8431426286697388, "learning_rate": 6.856959589480436e-08, "loss": 0.8434, "step": 80985 }, { "epoch": 0.9870449587461763, "grad_norm": 1.7073721885681152, "learning_rate": 6.824887748556768e-08, "loss": 0.7364, "step": 80990 }, { "epoch": 0.9871058949703241, "grad_norm": 1.9111346006393433, "learning_rate": 6.792815907633099e-08, "loss": 0.86, "step": 80995 }, { "epoch": 0.9871668311944719, "grad_norm": 2.3310728073120117, "learning_rate": 6.76074406670943e-08, "loss": 0.8617, "step": 81000 }, { "epoch": 0.9872277674186196, "grad_norm": 1.8465228080749512, "learning_rate": 6.72867222578576e-08, "loss": 0.8, "step": 81005 }, { "epoch": 0.9872887036427674, "grad_norm": 2.172032356262207, "learning_rate": 6.696600384862093e-08, "loss": 0.7859, "step": 81010 }, { "epoch": 0.9873496398669153, "grad_norm": 1.9853463172912598, "learning_rate": 6.664528543938423e-08, "loss": 0.713, "step": 81015 }, { "epoch": 0.9874105760910631, "grad_norm": 2.1801209449768066, "learning_rate": 6.632456703014754e-08, "loss": 0.7709, "step": 81020 }, { "epoch": 0.9874715123152109, "grad_norm": 2.049960136413574, "learning_rate": 6.600384862091085e-08, "loss": 0.7957, "step": 81025 }, { "epoch": 0.9875324485393587, "grad_norm": 2.2174501419067383, "learning_rate": 6.568313021167416e-08, "loss": 0.7601, "step": 81030 }, { "epoch": 0.9875933847635066, "grad_norm": 2.2908108234405518, "learning_rate": 6.536241180243747e-08, "loss": 0.83, "step": 81035 }, { "epoch": 0.9876543209876543, "grad_norm": 1.6452696323394775, "learning_rate": 6.504169339320077e-08, "loss": 0.7735, "step": 81040 }, { "epoch": 0.9877152572118021, "grad_norm": 2.009765386581421, "learning_rate": 6.472097498396408e-08, "loss": 0.7424, "step": 81045 }, { "epoch": 0.9877761934359499, "grad_norm": 2.1425211429595947, "learning_rate": 6.440025657472739e-08, "loss": 0.8419, "step": 81050 }, { "epoch": 0.9878371296600977, "grad_norm": 2.043872356414795, "learning_rate": 6.40795381654907e-08, "loss": 0.7875, "step": 81055 }, { "epoch": 0.9878980658842456, "grad_norm": 2.0245323181152344, "learning_rate": 6.375881975625402e-08, "loss": 0.8476, "step": 81060 }, { "epoch": 0.9879590021083934, "grad_norm": 2.2136497497558594, "learning_rate": 6.343810134701733e-08, "loss": 0.815, "step": 81065 }, { "epoch": 0.9880199383325412, "grad_norm": 1.9903581142425537, "learning_rate": 6.311738293778063e-08, "loss": 0.8325, "step": 81070 }, { "epoch": 0.9880808745566889, "grad_norm": 2.1105847358703613, "learning_rate": 6.279666452854394e-08, "loss": 0.8306, "step": 81075 }, { "epoch": 0.9881418107808367, "grad_norm": 1.7513675689697266, "learning_rate": 6.247594611930726e-08, "loss": 0.7061, "step": 81080 }, { "epoch": 0.9882027470049846, "grad_norm": 2.083829402923584, "learning_rate": 6.215522771007057e-08, "loss": 0.805, "step": 81085 }, { "epoch": 0.9882636832291324, "grad_norm": 1.9301668405532837, "learning_rate": 6.183450930083388e-08, "loss": 0.8044, "step": 81090 }, { "epoch": 0.9883246194532802, "grad_norm": 2.04992938041687, "learning_rate": 6.151379089159719e-08, "loss": 0.8024, "step": 81095 }, { "epoch": 0.988385555677428, "grad_norm": 2.6263587474823, "learning_rate": 6.11930724823605e-08, "loss": 0.7766, "step": 81100 }, { "epoch": 0.9884464919015759, "grad_norm": 1.8616886138916016, "learning_rate": 6.08723540731238e-08, "loss": 0.7568, "step": 81105 }, { "epoch": 0.9885074281257236, "grad_norm": 2.023953914642334, "learning_rate": 6.055163566388711e-08, "loss": 0.7576, "step": 81110 }, { "epoch": 0.9885683643498714, "grad_norm": 1.910788655281067, "learning_rate": 6.023091725465042e-08, "loss": 0.7931, "step": 81115 }, { "epoch": 0.9886293005740192, "grad_norm": 2.0054104328155518, "learning_rate": 5.991019884541373e-08, "loss": 0.7819, "step": 81120 }, { "epoch": 0.988690236798167, "grad_norm": 2.1414132118225098, "learning_rate": 5.958948043617704e-08, "loss": 0.8053, "step": 81125 }, { "epoch": 0.9887511730223149, "grad_norm": 1.6691670417785645, "learning_rate": 5.926876202694035e-08, "loss": 0.8393, "step": 81130 }, { "epoch": 0.9888121092464627, "grad_norm": 1.9558616876602173, "learning_rate": 5.894804361770366e-08, "loss": 0.8169, "step": 81135 }, { "epoch": 0.9888730454706105, "grad_norm": 2.0000205039978027, "learning_rate": 5.8627325208466965e-08, "loss": 0.769, "step": 81140 }, { "epoch": 0.9889339816947582, "grad_norm": 1.843909740447998, "learning_rate": 5.830660679923028e-08, "loss": 0.7877, "step": 81145 }, { "epoch": 0.988994917918906, "grad_norm": 1.9562486410140991, "learning_rate": 5.798588838999359e-08, "loss": 0.8835, "step": 81150 }, { "epoch": 0.9890558541430539, "grad_norm": 1.807273507118225, "learning_rate": 5.7665169980756896e-08, "loss": 0.8302, "step": 81155 }, { "epoch": 0.9891167903672017, "grad_norm": 1.9574588537216187, "learning_rate": 5.734445157152022e-08, "loss": 0.7826, "step": 81160 }, { "epoch": 0.9891777265913495, "grad_norm": 2.500736713409424, "learning_rate": 5.7023733162283525e-08, "loss": 0.7855, "step": 81165 }, { "epoch": 0.9892386628154973, "grad_norm": 1.8105649948120117, "learning_rate": 5.670301475304683e-08, "loss": 0.7525, "step": 81170 }, { "epoch": 0.9892995990396452, "grad_norm": 2.144712448120117, "learning_rate": 5.638229634381014e-08, "loss": 0.7982, "step": 81175 }, { "epoch": 0.9893605352637929, "grad_norm": 1.7359455823898315, "learning_rate": 5.606157793457345e-08, "loss": 0.7661, "step": 81180 }, { "epoch": 0.9894214714879407, "grad_norm": 1.9920415878295898, "learning_rate": 5.574085952533676e-08, "loss": 0.7867, "step": 81185 }, { "epoch": 0.9894824077120885, "grad_norm": 2.102006435394287, "learning_rate": 5.542014111610007e-08, "loss": 0.7869, "step": 81190 }, { "epoch": 0.9895433439362363, "grad_norm": 1.9482001066207886, "learning_rate": 5.509942270686338e-08, "loss": 0.7777, "step": 81195 }, { "epoch": 0.9896042801603842, "grad_norm": 1.7030059099197388, "learning_rate": 5.477870429762669e-08, "loss": 0.7243, "step": 81200 }, { "epoch": 0.989665216384532, "grad_norm": 1.8775012493133545, "learning_rate": 5.4457985888389995e-08, "loss": 0.7618, "step": 81205 }, { "epoch": 0.9897261526086798, "grad_norm": 2.2580766677856445, "learning_rate": 5.41372674791533e-08, "loss": 0.8154, "step": 81210 }, { "epoch": 0.9897870888328275, "grad_norm": 2.2255752086639404, "learning_rate": 5.381654906991662e-08, "loss": 0.938, "step": 81215 }, { "epoch": 0.9898480250569753, "grad_norm": 2.1568875312805176, "learning_rate": 5.3495830660679925e-08, "loss": 0.7223, "step": 81220 }, { "epoch": 0.9899089612811232, "grad_norm": 1.9757806062698364, "learning_rate": 5.317511225144323e-08, "loss": 0.7434, "step": 81225 }, { "epoch": 0.989969897505271, "grad_norm": 2.365115165710449, "learning_rate": 5.285439384220654e-08, "loss": 0.7803, "step": 81230 }, { "epoch": 0.9900308337294188, "grad_norm": 2.1572513580322266, "learning_rate": 5.253367543296986e-08, "loss": 0.8516, "step": 81235 }, { "epoch": 0.9900917699535666, "grad_norm": 2.121428966522217, "learning_rate": 5.221295702373317e-08, "loss": 0.8176, "step": 81240 }, { "epoch": 0.9901527061777144, "grad_norm": 1.946644902229309, "learning_rate": 5.189223861449648e-08, "loss": 0.6905, "step": 81245 }, { "epoch": 0.9902136424018622, "grad_norm": 2.0817649364471436, "learning_rate": 5.1571520205259786e-08, "loss": 0.8565, "step": 81250 }, { "epoch": 0.99027457862601, "grad_norm": 1.8443421125411987, "learning_rate": 5.12508017960231e-08, "loss": 0.8043, "step": 81255 }, { "epoch": 0.9903355148501578, "grad_norm": 2.0981602668762207, "learning_rate": 5.093008338678641e-08, "loss": 0.8064, "step": 81260 }, { "epoch": 0.9903964510743056, "grad_norm": 1.873839259147644, "learning_rate": 5.0609364977549717e-08, "loss": 0.7982, "step": 81265 }, { "epoch": 0.9904573872984535, "grad_norm": 2.0769853591918945, "learning_rate": 5.0288646568313025e-08, "loss": 0.7828, "step": 81270 }, { "epoch": 0.9905183235226013, "grad_norm": 1.7308909893035889, "learning_rate": 4.996792815907633e-08, "loss": 0.8146, "step": 81275 }, { "epoch": 0.9905792597467491, "grad_norm": 1.787893295288086, "learning_rate": 4.964720974983964e-08, "loss": 0.8341, "step": 81280 }, { "epoch": 0.9906401959708968, "grad_norm": 1.7856026887893677, "learning_rate": 4.9326491340602955e-08, "loss": 0.7692, "step": 81285 }, { "epoch": 0.9907011321950446, "grad_norm": 2.219052791595459, "learning_rate": 4.900577293136626e-08, "loss": 0.8047, "step": 81290 }, { "epoch": 0.9907620684191925, "grad_norm": 1.539984107017517, "learning_rate": 4.868505452212957e-08, "loss": 0.7605, "step": 81295 }, { "epoch": 0.9908230046433403, "grad_norm": 1.9262969493865967, "learning_rate": 4.836433611289288e-08, "loss": 0.7905, "step": 81300 }, { "epoch": 0.9908839408674881, "grad_norm": 1.9891340732574463, "learning_rate": 4.804361770365619e-08, "loss": 0.8312, "step": 81305 }, { "epoch": 0.9909448770916359, "grad_norm": 2.1367764472961426, "learning_rate": 4.772289929441951e-08, "loss": 0.7597, "step": 81310 }, { "epoch": 0.9910058133157837, "grad_norm": 1.9680625200271606, "learning_rate": 4.7402180885182816e-08, "loss": 0.812, "step": 81315 }, { "epoch": 0.9910667495399315, "grad_norm": 1.9312002658843994, "learning_rate": 4.7081462475946124e-08, "loss": 0.8548, "step": 81320 }, { "epoch": 0.9911276857640793, "grad_norm": 1.8777670860290527, "learning_rate": 4.676074406670944e-08, "loss": 0.7719, "step": 81325 }, { "epoch": 0.9911886219882271, "grad_norm": 2.2662086486816406, "learning_rate": 4.6440025657472746e-08, "loss": 0.7961, "step": 81330 }, { "epoch": 0.9912495582123749, "grad_norm": 2.04784893989563, "learning_rate": 4.6119307248236054e-08, "loss": 0.7511, "step": 81335 }, { "epoch": 0.9913104944365227, "grad_norm": 1.726354718208313, "learning_rate": 4.579858883899936e-08, "loss": 0.7881, "step": 81340 }, { "epoch": 0.9913714306606706, "grad_norm": 2.3856515884399414, "learning_rate": 4.547787042976267e-08, "loss": 0.8215, "step": 81345 }, { "epoch": 0.9914323668848184, "grad_norm": 2.1654086112976074, "learning_rate": 4.515715202052598e-08, "loss": 0.8112, "step": 81350 }, { "epoch": 0.9914933031089661, "grad_norm": 2.2363412380218506, "learning_rate": 4.483643361128929e-08, "loss": 0.8539, "step": 81355 }, { "epoch": 0.9915542393331139, "grad_norm": 1.8538950681686401, "learning_rate": 4.45157152020526e-08, "loss": 0.7704, "step": 81360 }, { "epoch": 0.9916151755572618, "grad_norm": 1.7008616924285889, "learning_rate": 4.419499679281591e-08, "loss": 0.8139, "step": 81365 }, { "epoch": 0.9916761117814096, "grad_norm": 1.969133973121643, "learning_rate": 4.3874278383579217e-08, "loss": 0.7556, "step": 81370 }, { "epoch": 0.9917370480055574, "grad_norm": 1.9404478073120117, "learning_rate": 4.3553559974342524e-08, "loss": 0.8487, "step": 81375 }, { "epoch": 0.9917979842297052, "grad_norm": 2.2863245010375977, "learning_rate": 4.323284156510584e-08, "loss": 0.785, "step": 81380 }, { "epoch": 0.991858920453853, "grad_norm": 1.7545593976974487, "learning_rate": 4.2912123155869154e-08, "loss": 0.8142, "step": 81385 }, { "epoch": 0.9919198566780008, "grad_norm": 2.272648572921753, "learning_rate": 4.259140474663246e-08, "loss": 0.8438, "step": 81390 }, { "epoch": 0.9919807929021486, "grad_norm": 1.9573887586593628, "learning_rate": 4.2270686337395776e-08, "loss": 0.8448, "step": 81395 }, { "epoch": 0.9920417291262964, "grad_norm": 1.815428614616394, "learning_rate": 4.1949967928159084e-08, "loss": 0.8105, "step": 81400 }, { "epoch": 0.9921026653504442, "grad_norm": 1.8940989971160889, "learning_rate": 4.162924951892239e-08, "loss": 0.7891, "step": 81405 }, { "epoch": 0.992163601574592, "grad_norm": 1.9673207998275757, "learning_rate": 4.13085311096857e-08, "loss": 0.8216, "step": 81410 }, { "epoch": 0.9922245377987399, "grad_norm": 2.0394654273986816, "learning_rate": 4.098781270044901e-08, "loss": 0.815, "step": 81415 }, { "epoch": 0.9922854740228877, "grad_norm": 1.8713799715042114, "learning_rate": 4.0667094291212316e-08, "loss": 0.7892, "step": 81420 }, { "epoch": 0.9923464102470354, "grad_norm": 2.0146257877349854, "learning_rate": 4.034637588197563e-08, "loss": 0.8213, "step": 81425 }, { "epoch": 0.9924073464711832, "grad_norm": 1.9715250730514526, "learning_rate": 4.002565747273894e-08, "loss": 0.7252, "step": 81430 }, { "epoch": 0.992468282695331, "grad_norm": 1.919846773147583, "learning_rate": 3.9704939063502246e-08, "loss": 0.7263, "step": 81435 }, { "epoch": 0.9925292189194789, "grad_norm": 1.9980204105377197, "learning_rate": 3.9384220654265554e-08, "loss": 0.8326, "step": 81440 }, { "epoch": 0.9925901551436267, "grad_norm": 2.093759536743164, "learning_rate": 3.906350224502886e-08, "loss": 0.7826, "step": 81445 }, { "epoch": 0.9926510913677745, "grad_norm": 2.2025372982025146, "learning_rate": 3.874278383579218e-08, "loss": 0.7766, "step": 81450 }, { "epoch": 0.9927120275919223, "grad_norm": 1.7492107152938843, "learning_rate": 3.8422065426555485e-08, "loss": 0.8083, "step": 81455 }, { "epoch": 0.99277296381607, "grad_norm": 2.0651135444641113, "learning_rate": 3.81013470173188e-08, "loss": 0.7768, "step": 81460 }, { "epoch": 0.9928339000402179, "grad_norm": 2.1037914752960205, "learning_rate": 3.778062860808211e-08, "loss": 0.8513, "step": 81465 }, { "epoch": 0.9928948362643657, "grad_norm": 2.1821532249450684, "learning_rate": 3.7459910198845415e-08, "loss": 0.862, "step": 81470 }, { "epoch": 0.9929557724885135, "grad_norm": 1.9745216369628906, "learning_rate": 3.713919178960872e-08, "loss": 0.8185, "step": 81475 }, { "epoch": 0.9930167087126613, "grad_norm": 1.840958595275879, "learning_rate": 3.681847338037204e-08, "loss": 0.8025, "step": 81480 }, { "epoch": 0.9930776449368092, "grad_norm": 1.8196285963058472, "learning_rate": 3.6497754971135346e-08, "loss": 0.8287, "step": 81485 }, { "epoch": 0.993138581160957, "grad_norm": 1.9962327480316162, "learning_rate": 3.6177036561898653e-08, "loss": 0.8214, "step": 81490 }, { "epoch": 0.9931995173851047, "grad_norm": 1.8716073036193848, "learning_rate": 3.585631815266197e-08, "loss": 0.7898, "step": 81495 }, { "epoch": 0.9932604536092525, "grad_norm": 2.3390488624572754, "learning_rate": 3.5535599743425276e-08, "loss": 0.7278, "step": 81500 }, { "epoch": 0.9933213898334003, "grad_norm": 1.8296431303024292, "learning_rate": 3.5214881334188584e-08, "loss": 0.857, "step": 81505 }, { "epoch": 0.9933823260575482, "grad_norm": 1.7928225994110107, "learning_rate": 3.489416292495189e-08, "loss": 0.7163, "step": 81510 }, { "epoch": 0.993443262281696, "grad_norm": 1.8406316041946411, "learning_rate": 3.4573444515715206e-08, "loss": 0.8173, "step": 81515 }, { "epoch": 0.9935041985058438, "grad_norm": 2.057548761367798, "learning_rate": 3.4252726106478514e-08, "loss": 0.7745, "step": 81520 }, { "epoch": 0.9935651347299916, "grad_norm": 2.130673408508301, "learning_rate": 3.393200769724182e-08, "loss": 0.7683, "step": 81525 }, { "epoch": 0.9936260709541394, "grad_norm": 2.0016238689422607, "learning_rate": 3.361128928800514e-08, "loss": 0.746, "step": 81530 }, { "epoch": 0.9936870071782872, "grad_norm": 1.8542559146881104, "learning_rate": 3.3290570878768445e-08, "loss": 0.7568, "step": 81535 }, { "epoch": 0.993747943402435, "grad_norm": 1.6953703165054321, "learning_rate": 3.296985246953175e-08, "loss": 0.7695, "step": 81540 }, { "epoch": 0.9938088796265828, "grad_norm": 1.985072374343872, "learning_rate": 3.264913406029506e-08, "loss": 0.838, "step": 81545 }, { "epoch": 0.9938698158507306, "grad_norm": 1.90680992603302, "learning_rate": 3.232841565105837e-08, "loss": 0.8329, "step": 81550 }, { "epoch": 0.9939307520748785, "grad_norm": 2.458287477493286, "learning_rate": 3.200769724182168e-08, "loss": 0.747, "step": 81555 }, { "epoch": 0.9939916882990263, "grad_norm": 1.8784881830215454, "learning_rate": 3.168697883258499e-08, "loss": 0.7864, "step": 81560 }, { "epoch": 0.994052624523174, "grad_norm": 1.7852646112442017, "learning_rate": 3.1366260423348306e-08, "loss": 0.7513, "step": 81565 }, { "epoch": 0.9941135607473218, "grad_norm": 1.66303551197052, "learning_rate": 3.1045542014111614e-08, "loss": 0.8044, "step": 81570 }, { "epoch": 0.9941744969714696, "grad_norm": 2.160066843032837, "learning_rate": 3.072482360487492e-08, "loss": 0.6992, "step": 81575 }, { "epoch": 0.9942354331956175, "grad_norm": 1.88534414768219, "learning_rate": 3.040410519563823e-08, "loss": 0.8453, "step": 81580 }, { "epoch": 0.9942963694197653, "grad_norm": 1.8681730031967163, "learning_rate": 3.008338678640154e-08, "loss": 0.7256, "step": 81585 }, { "epoch": 0.9943573056439131, "grad_norm": 2.288925886154175, "learning_rate": 2.9762668377164855e-08, "loss": 0.7861, "step": 81590 }, { "epoch": 0.9944182418680609, "grad_norm": 2.4751760959625244, "learning_rate": 2.9441949967928163e-08, "loss": 0.7887, "step": 81595 }, { "epoch": 0.9944791780922086, "grad_norm": 1.610484004020691, "learning_rate": 2.912123155869147e-08, "loss": 0.7964, "step": 81600 }, { "epoch": 0.9945401143163565, "grad_norm": 1.603121280670166, "learning_rate": 2.8800513149454783e-08, "loss": 0.839, "step": 81605 }, { "epoch": 0.9946010505405043, "grad_norm": 1.883016586303711, "learning_rate": 2.847979474021809e-08, "loss": 0.8473, "step": 81610 }, { "epoch": 0.9946619867646521, "grad_norm": 1.9275484085083008, "learning_rate": 2.81590763309814e-08, "loss": 0.7696, "step": 81615 }, { "epoch": 0.9947229229887999, "grad_norm": 2.0282018184661865, "learning_rate": 2.783835792174471e-08, "loss": 0.7571, "step": 81620 }, { "epoch": 0.9947838592129478, "grad_norm": 2.1217257976531982, "learning_rate": 2.7517639512508018e-08, "loss": 0.744, "step": 81625 }, { "epoch": 0.9948447954370956, "grad_norm": 2.1701512336730957, "learning_rate": 2.7196921103271332e-08, "loss": 0.8922, "step": 81630 }, { "epoch": 0.9949057316612433, "grad_norm": 1.8439677953720093, "learning_rate": 2.687620269403464e-08, "loss": 0.7943, "step": 81635 }, { "epoch": 0.9949666678853911, "grad_norm": 2.2023112773895264, "learning_rate": 2.655548428479795e-08, "loss": 0.8791, "step": 81640 }, { "epoch": 0.9950276041095389, "grad_norm": 1.6989294290542603, "learning_rate": 2.623476587556126e-08, "loss": 0.7482, "step": 81645 }, { "epoch": 0.9950885403336868, "grad_norm": 2.314877510070801, "learning_rate": 2.5914047466324567e-08, "loss": 0.8181, "step": 81650 }, { "epoch": 0.9951494765578346, "grad_norm": 1.8269872665405273, "learning_rate": 2.559332905708788e-08, "loss": 0.8187, "step": 81655 }, { "epoch": 0.9952104127819824, "grad_norm": 2.114549398422241, "learning_rate": 2.5272610647851186e-08, "loss": 0.8407, "step": 81660 }, { "epoch": 0.9952713490061302, "grad_norm": 1.9231020212173462, "learning_rate": 2.4951892238614498e-08, "loss": 0.8555, "step": 81665 }, { "epoch": 0.995332285230278, "grad_norm": 1.9410669803619385, "learning_rate": 2.463117382937781e-08, "loss": 0.7698, "step": 81670 }, { "epoch": 0.9953932214544258, "grad_norm": 1.9239603281021118, "learning_rate": 2.431045542014112e-08, "loss": 0.855, "step": 81675 }, { "epoch": 0.9954541576785736, "grad_norm": 2.20475697517395, "learning_rate": 2.3989737010904428e-08, "loss": 0.7874, "step": 81680 }, { "epoch": 0.9955150939027214, "grad_norm": 1.942013144493103, "learning_rate": 2.3669018601667736e-08, "loss": 0.7856, "step": 81685 }, { "epoch": 0.9955760301268692, "grad_norm": 2.1052920818328857, "learning_rate": 2.3348300192431047e-08, "loss": 0.798, "step": 81690 }, { "epoch": 0.9956369663510171, "grad_norm": 2.1204936504364014, "learning_rate": 2.3027581783194355e-08, "loss": 0.766, "step": 81695 }, { "epoch": 0.9956979025751649, "grad_norm": 2.0929720401763916, "learning_rate": 2.2706863373957667e-08, "loss": 0.7971, "step": 81700 }, { "epoch": 0.9957588387993126, "grad_norm": 2.022819757461548, "learning_rate": 2.2386144964720978e-08, "loss": 0.8126, "step": 81705 }, { "epoch": 0.9958197750234604, "grad_norm": 2.0314619541168213, "learning_rate": 2.206542655548429e-08, "loss": 0.8548, "step": 81710 }, { "epoch": 0.9958807112476082, "grad_norm": 2.1726841926574707, "learning_rate": 2.1744708146247597e-08, "loss": 0.7646, "step": 81715 }, { "epoch": 0.9959416474717561, "grad_norm": 2.033398389816284, "learning_rate": 2.1423989737010905e-08, "loss": 0.7884, "step": 81720 }, { "epoch": 0.9960025836959039, "grad_norm": 1.7500749826431274, "learning_rate": 2.1103271327774216e-08, "loss": 0.8501, "step": 81725 }, { "epoch": 0.9960635199200517, "grad_norm": 2.308415412902832, "learning_rate": 2.0782552918537524e-08, "loss": 0.7761, "step": 81730 }, { "epoch": 0.9961244561441995, "grad_norm": 1.9584667682647705, "learning_rate": 2.0461834509300835e-08, "loss": 0.8232, "step": 81735 }, { "epoch": 0.9961853923683472, "grad_norm": 2.267990827560425, "learning_rate": 2.0141116100064143e-08, "loss": 0.7463, "step": 81740 }, { "epoch": 0.9962463285924951, "grad_norm": 2.062861919403076, "learning_rate": 1.9820397690827458e-08, "loss": 0.8208, "step": 81745 }, { "epoch": 0.9963072648166429, "grad_norm": 1.7650525569915771, "learning_rate": 1.9499679281590766e-08, "loss": 0.7794, "step": 81750 }, { "epoch": 0.9963682010407907, "grad_norm": 1.8423911333084106, "learning_rate": 1.9178960872354074e-08, "loss": 0.8104, "step": 81755 }, { "epoch": 0.9964291372649385, "grad_norm": 2.0032427310943604, "learning_rate": 1.8858242463117385e-08, "loss": 0.8736, "step": 81760 }, { "epoch": 0.9964900734890864, "grad_norm": 1.6842825412750244, "learning_rate": 1.8537524053880693e-08, "loss": 0.8168, "step": 81765 }, { "epoch": 0.9965510097132342, "grad_norm": 1.8228340148925781, "learning_rate": 1.8216805644644004e-08, "loss": 0.8967, "step": 81770 }, { "epoch": 0.9966119459373819, "grad_norm": 2.113706111907959, "learning_rate": 1.7896087235407315e-08, "loss": 0.7551, "step": 81775 }, { "epoch": 0.9966728821615297, "grad_norm": 1.759298324584961, "learning_rate": 1.7575368826170623e-08, "loss": 0.8403, "step": 81780 }, { "epoch": 0.9967338183856775, "grad_norm": 1.7716004848480225, "learning_rate": 1.725465041693393e-08, "loss": 0.8353, "step": 81785 }, { "epoch": 0.9967947546098254, "grad_norm": 1.8899941444396973, "learning_rate": 1.6933932007697243e-08, "loss": 0.8021, "step": 81790 }, { "epoch": 0.9968556908339732, "grad_norm": 2.135631561279297, "learning_rate": 1.6613213598460554e-08, "loss": 0.7712, "step": 81795 }, { "epoch": 0.996916627058121, "grad_norm": 2.159858226776123, "learning_rate": 1.6292495189223862e-08, "loss": 0.7886, "step": 81800 }, { "epoch": 0.9969775632822688, "grad_norm": 1.7217541933059692, "learning_rate": 1.5971776779987173e-08, "loss": 0.8556, "step": 81805 }, { "epoch": 0.9970384995064165, "grad_norm": 2.1262104511260986, "learning_rate": 1.5651058370750484e-08, "loss": 0.8206, "step": 81810 }, { "epoch": 0.9970994357305644, "grad_norm": 1.9304852485656738, "learning_rate": 1.5330339961513792e-08, "loss": 0.7608, "step": 81815 }, { "epoch": 0.9971603719547122, "grad_norm": 1.729025959968567, "learning_rate": 1.50096215522771e-08, "loss": 0.764, "step": 81820 }, { "epoch": 0.99722130817886, "grad_norm": 2.027378797531128, "learning_rate": 1.4688903143040411e-08, "loss": 0.8445, "step": 81825 }, { "epoch": 0.9972822444030078, "grad_norm": 1.6598576307296753, "learning_rate": 1.4368184733803723e-08, "loss": 0.8074, "step": 81830 }, { "epoch": 0.9973431806271557, "grad_norm": 2.644582986831665, "learning_rate": 1.404746632456703e-08, "loss": 0.8138, "step": 81835 }, { "epoch": 0.9974041168513035, "grad_norm": 1.944461703300476, "learning_rate": 1.372674791533034e-08, "loss": 0.8544, "step": 81840 }, { "epoch": 0.9974650530754512, "grad_norm": 1.8728851079940796, "learning_rate": 1.3406029506093652e-08, "loss": 0.8083, "step": 81845 }, { "epoch": 0.997525989299599, "grad_norm": 1.8733476400375366, "learning_rate": 1.3085311096856961e-08, "loss": 0.7953, "step": 81850 }, { "epoch": 0.9975869255237468, "grad_norm": 2.162219762802124, "learning_rate": 1.276459268762027e-08, "loss": 0.826, "step": 81855 }, { "epoch": 0.9976478617478947, "grad_norm": 2.2357420921325684, "learning_rate": 1.244387427838358e-08, "loss": 0.8526, "step": 81860 }, { "epoch": 0.9977087979720425, "grad_norm": 2.0681469440460205, "learning_rate": 1.2123155869146892e-08, "loss": 0.8365, "step": 81865 }, { "epoch": 0.9977697341961903, "grad_norm": 1.9813205003738403, "learning_rate": 1.18024374599102e-08, "loss": 0.8983, "step": 81870 }, { "epoch": 0.9978306704203381, "grad_norm": 1.765732765197754, "learning_rate": 1.1481719050673509e-08, "loss": 0.8353, "step": 81875 }, { "epoch": 0.9978916066444858, "grad_norm": 1.9896056652069092, "learning_rate": 1.1161000641436819e-08, "loss": 0.7147, "step": 81880 }, { "epoch": 0.9979525428686337, "grad_norm": 1.9813916683197021, "learning_rate": 1.084028223220013e-08, "loss": 0.7607, "step": 81885 }, { "epoch": 0.9980134790927815, "grad_norm": 2.0351216793060303, "learning_rate": 1.051956382296344e-08, "loss": 0.8746, "step": 81890 }, { "epoch": 0.9980744153169293, "grad_norm": 1.5583906173706055, "learning_rate": 1.0198845413726749e-08, "loss": 0.7848, "step": 81895 }, { "epoch": 0.9981353515410771, "grad_norm": 1.9747804403305054, "learning_rate": 9.878127004490057e-09, "loss": 0.7914, "step": 81900 }, { "epoch": 0.998196287765225, "grad_norm": 2.1085400581359863, "learning_rate": 9.557408595253368e-09, "loss": 0.8576, "step": 81905 }, { "epoch": 0.9982572239893728, "grad_norm": 2.2425880432128906, "learning_rate": 9.236690186016678e-09, "loss": 0.8105, "step": 81910 }, { "epoch": 0.9983181602135205, "grad_norm": 1.705693244934082, "learning_rate": 8.915971776779988e-09, "loss": 0.7837, "step": 81915 }, { "epoch": 0.9983790964376683, "grad_norm": 1.9849036931991577, "learning_rate": 8.595253367543297e-09, "loss": 0.7853, "step": 81920 }, { "epoch": 0.9984400326618161, "grad_norm": 1.9068498611450195, "learning_rate": 8.274534958306608e-09, "loss": 0.7959, "step": 81925 }, { "epoch": 0.998500968885964, "grad_norm": 1.8545469045639038, "learning_rate": 7.953816549069918e-09, "loss": 0.8288, "step": 81930 }, { "epoch": 0.9985619051101118, "grad_norm": 2.233306646347046, "learning_rate": 7.633098139833228e-09, "loss": 0.7343, "step": 81935 }, { "epoch": 0.9986228413342596, "grad_norm": 1.8016514778137207, "learning_rate": 7.312379730596536e-09, "loss": 0.871, "step": 81940 }, { "epoch": 0.9986837775584074, "grad_norm": 2.057915210723877, "learning_rate": 6.991661321359847e-09, "loss": 0.7788, "step": 81945 }, { "epoch": 0.9987447137825551, "grad_norm": 1.7010587453842163, "learning_rate": 6.670942912123156e-09, "loss": 0.8373, "step": 81950 }, { "epoch": 0.998805650006703, "grad_norm": 2.2616536617279053, "learning_rate": 6.350224502886467e-09, "loss": 0.8203, "step": 81955 }, { "epoch": 0.9988665862308508, "grad_norm": 2.1701598167419434, "learning_rate": 6.0295060936497756e-09, "loss": 0.8385, "step": 81960 }, { "epoch": 0.9989275224549986, "grad_norm": 1.9796041250228882, "learning_rate": 5.708787684413086e-09, "loss": 0.8067, "step": 81965 }, { "epoch": 0.9989884586791464, "grad_norm": 1.8418093919754028, "learning_rate": 5.388069275176396e-09, "loss": 0.7895, "step": 81970 }, { "epoch": 0.9990493949032943, "grad_norm": 2.0986149311065674, "learning_rate": 5.067350865939705e-09, "loss": 0.8316, "step": 81975 }, { "epoch": 0.999110331127442, "grad_norm": 2.1250457763671875, "learning_rate": 4.746632456703016e-09, "loss": 0.8277, "step": 81980 }, { "epoch": 0.9991712673515898, "grad_norm": 2.260631561279297, "learning_rate": 4.425914047466325e-09, "loss": 0.8381, "step": 81985 }, { "epoch": 0.9992322035757376, "grad_norm": 1.7287061214447021, "learning_rate": 4.105195638229635e-09, "loss": 0.7744, "step": 81990 }, { "epoch": 0.9992931397998854, "grad_norm": 1.9775831699371338, "learning_rate": 3.7844772289929444e-09, "loss": 0.8388, "step": 81995 }, { "epoch": 0.9993540760240333, "grad_norm": 2.1925971508026123, "learning_rate": 3.4637588197562544e-09, "loss": 0.8439, "step": 82000 }, { "epoch": 0.9994150122481811, "grad_norm": 1.7647286653518677, "learning_rate": 3.143040410519564e-09, "loss": 0.7885, "step": 82005 }, { "epoch": 0.9994759484723289, "grad_norm": 1.9889168739318848, "learning_rate": 2.8223220012828736e-09, "loss": 0.8703, "step": 82010 }, { "epoch": 0.9995368846964766, "grad_norm": 2.0551135540008545, "learning_rate": 2.5016035920461836e-09, "loss": 0.786, "step": 82015 }, { "epoch": 0.9995978209206244, "grad_norm": 2.020777463912964, "learning_rate": 2.1808851828094932e-09, "loss": 0.7586, "step": 82020 }, { "epoch": 0.9996587571447723, "grad_norm": 2.1238489151000977, "learning_rate": 1.8601667735728035e-09, "loss": 0.8197, "step": 82025 }, { "epoch": 0.9997196933689201, "grad_norm": 2.1310789585113525, "learning_rate": 1.539448364336113e-09, "loss": 0.8634, "step": 82030 }, { "epoch": 0.9997806295930679, "grad_norm": 2.0892128944396973, "learning_rate": 1.2187299550994229e-09, "loss": 0.9061, "step": 82035 }, { "epoch": 0.9998415658172157, "grad_norm": 2.6662800312042236, "learning_rate": 8.980115458627327e-10, "loss": 0.8218, "step": 82040 }, { "epoch": 0.9999025020413636, "grad_norm": 1.9014744758605957, "learning_rate": 5.772931366260424e-10, "loss": 0.8709, "step": 82045 }, { "epoch": 0.9999634382655113, "grad_norm": 1.851790189743042, "learning_rate": 2.565747273893522e-10, "loss": 0.827, "step": 82050 }, { "epoch": 1.0, "step": 82053, "total_flos": 5.788063778412796e+19, "train_loss": 0.8559111680455433, "train_runtime": 61005.8923, "train_samples_per_second": 43.04, "train_steps_per_second": 1.345 } ], "logging_steps": 5, "max_steps": 82053, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.788063778412796e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }