{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 82053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.0936224147806904e-05, "grad_norm": 3.9362905025482178, "learning_rate": 4.874482086278334e-09, "loss": 1.2391, "step": 5 }, { "epoch": 0.00012187244829561381, "grad_norm": 3.505309820175171, "learning_rate": 1.096758469412625e-08, "loss": 1.2925, "step": 10 }, { "epoch": 0.0001828086724434207, "grad_norm": 3.3816146850585938, "learning_rate": 1.7060687301974167e-08, "loss": 1.2497, "step": 15 }, { "epoch": 0.00024374489659122762, "grad_norm": 4.378155708312988, "learning_rate": 2.3153789909822082e-08, "loss": 1.232, "step": 20 }, { "epoch": 0.00030468112073903455, "grad_norm": 4.186976909637451, "learning_rate": 2.924689251767e-08, "loss": 1.2813, "step": 25 }, { "epoch": 0.0003656173448868414, "grad_norm": 3.8541486263275146, "learning_rate": 3.5339995125517914e-08, "loss": 1.2723, "step": 30 }, { "epoch": 0.0004265535690346483, "grad_norm": 3.5811848640441895, "learning_rate": 4.143309773336583e-08, "loss": 1.2886, "step": 35 }, { "epoch": 0.00048748979318245523, "grad_norm": 3.9332046508789062, "learning_rate": 4.752620034121375e-08, "loss": 1.3004, "step": 40 }, { "epoch": 0.0005484260173302622, "grad_norm": 4.019774436950684, "learning_rate": 5.361930294906167e-08, "loss": 1.214, "step": 45 }, { "epoch": 0.0006093622414780691, "grad_norm": 5.0765061378479, "learning_rate": 5.971240555690958e-08, "loss": 1.3171, "step": 50 }, { "epoch": 0.0006702984656258759, "grad_norm": 3.895282745361328, "learning_rate": 6.580550816475749e-08, "loss": 1.2906, "step": 55 }, { "epoch": 0.0007312346897736828, "grad_norm": 2.998605966567993, "learning_rate": 7.189861077260542e-08, "loss": 1.2109, "step": 60 }, { "epoch": 0.0007921709139214897, "grad_norm": 3.464761734008789, "learning_rate": 7.799171338045334e-08, "loss": 1.2984, "step": 65 }, { "epoch": 0.0008531071380692966, "grad_norm": 3.638300895690918, "learning_rate": 8.408481598830125e-08, "loss": 1.2541, "step": 70 }, { "epoch": 0.0009140433622171036, "grad_norm": 3.66316556930542, "learning_rate": 9.017791859614917e-08, "loss": 1.2797, "step": 75 }, { "epoch": 0.0009749795863649105, "grad_norm": 4.35277795791626, "learning_rate": 9.627102120399708e-08, "loss": 1.3373, "step": 80 }, { "epoch": 0.0010359158105127174, "grad_norm": 3.9725470542907715, "learning_rate": 1.02364123811845e-07, "loss": 1.2585, "step": 85 }, { "epoch": 0.0010968520346605244, "grad_norm": 3.8616750240325928, "learning_rate": 1.0845722641969292e-07, "loss": 1.2646, "step": 90 }, { "epoch": 0.0011577882588083312, "grad_norm": 3.970890522003174, "learning_rate": 1.1455032902754083e-07, "loss": 1.1701, "step": 95 }, { "epoch": 0.0012187244829561382, "grad_norm": 3.7826249599456787, "learning_rate": 1.2064343163538875e-07, "loss": 1.304, "step": 100 }, { "epoch": 0.001279660707103945, "grad_norm": 3.304621696472168, "learning_rate": 1.2673653424323666e-07, "loss": 1.2958, "step": 105 }, { "epoch": 0.0013405969312517518, "grad_norm": 3.471403121948242, "learning_rate": 1.3282963685108458e-07, "loss": 1.2705, "step": 110 }, { "epoch": 0.0014015331553995588, "grad_norm": 3.458974838256836, "learning_rate": 1.389227394589325e-07, "loss": 1.3294, "step": 115 }, { "epoch": 0.0014624693795473656, "grad_norm": 3.4428188800811768, "learning_rate": 1.450158420667804e-07, "loss": 1.2728, "step": 120 }, { "epoch": 0.0015234056036951727, "grad_norm": 3.349489450454712, "learning_rate": 1.5110894467462833e-07, "loss": 1.3255, "step": 125 }, { "epoch": 0.0015843418278429795, "grad_norm": 3.203742742538452, "learning_rate": 1.5720204728247624e-07, "loss": 1.2038, "step": 130 }, { "epoch": 0.0016452780519907865, "grad_norm": 3.3162038326263428, "learning_rate": 1.6329514989032416e-07, "loss": 1.2853, "step": 135 }, { "epoch": 0.0017062142761385933, "grad_norm": 3.581425189971924, "learning_rate": 1.6938825249817207e-07, "loss": 1.1923, "step": 140 }, { "epoch": 0.0017671505002864003, "grad_norm": 3.5434768199920654, "learning_rate": 1.7548135510602e-07, "loss": 1.2548, "step": 145 }, { "epoch": 0.001828086724434207, "grad_norm": 3.283493757247925, "learning_rate": 1.815744577138679e-07, "loss": 1.2317, "step": 150 }, { "epoch": 0.0018890229485820141, "grad_norm": 3.5605716705322266, "learning_rate": 1.8766756032171582e-07, "loss": 1.1958, "step": 155 }, { "epoch": 0.001949959172729821, "grad_norm": 3.538708448410034, "learning_rate": 1.9376066292956374e-07, "loss": 1.2435, "step": 160 }, { "epoch": 0.002010895396877628, "grad_norm": 3.492628574371338, "learning_rate": 1.9985376553741168e-07, "loss": 1.2527, "step": 165 }, { "epoch": 0.0020718316210254347, "grad_norm": 2.9864614009857178, "learning_rate": 2.059468681452596e-07, "loss": 1.1641, "step": 170 }, { "epoch": 0.0021327678451732415, "grad_norm": 2.8034121990203857, "learning_rate": 2.120399707531075e-07, "loss": 1.2304, "step": 175 }, { "epoch": 0.0021937040693210488, "grad_norm": 3.2967560291290283, "learning_rate": 2.1813307336095543e-07, "loss": 1.2393, "step": 180 }, { "epoch": 0.0022546402934688556, "grad_norm": 3.5977256298065186, "learning_rate": 2.2422617596880334e-07, "loss": 1.2671, "step": 185 }, { "epoch": 0.0023155765176166624, "grad_norm": 2.772099494934082, "learning_rate": 2.3031927857665126e-07, "loss": 1.1317, "step": 190 }, { "epoch": 0.002376512741764469, "grad_norm": 2.7183215618133545, "learning_rate": 2.3641238118449917e-07, "loss": 1.1643, "step": 195 }, { "epoch": 0.0024374489659122764, "grad_norm": 2.926683187484741, "learning_rate": 2.4250548379234706e-07, "loss": 1.1212, "step": 200 }, { "epoch": 0.0024983851900600832, "grad_norm": 2.69508957862854, "learning_rate": 2.48598586400195e-07, "loss": 1.1375, "step": 205 }, { "epoch": 0.00255932141420789, "grad_norm": 2.666896343231201, "learning_rate": 2.546916890080429e-07, "loss": 1.1792, "step": 210 }, { "epoch": 0.002620257638355697, "grad_norm": 2.743194818496704, "learning_rate": 2.6078479161589084e-07, "loss": 1.2106, "step": 215 }, { "epoch": 0.0026811938625035036, "grad_norm": 2.6941542625427246, "learning_rate": 2.668778942237387e-07, "loss": 1.2316, "step": 220 }, { "epoch": 0.002742130086651311, "grad_norm": 2.6239306926727295, "learning_rate": 2.7297099683158667e-07, "loss": 1.1503, "step": 225 }, { "epoch": 0.0028030663107991177, "grad_norm": 2.4870035648345947, "learning_rate": 2.7906409943943456e-07, "loss": 1.1711, "step": 230 }, { "epoch": 0.0028640025349469245, "grad_norm": 2.8261516094207764, "learning_rate": 2.851572020472825e-07, "loss": 1.1839, "step": 235 }, { "epoch": 0.0029249387590947313, "grad_norm": 2.8859550952911377, "learning_rate": 2.912503046551304e-07, "loss": 1.1841, "step": 240 }, { "epoch": 0.0029858749832425385, "grad_norm": 2.669638156890869, "learning_rate": 2.9734340726297833e-07, "loss": 1.2049, "step": 245 }, { "epoch": 0.0030468112073903453, "grad_norm": 3.9971940517425537, "learning_rate": 3.0343650987082627e-07, "loss": 1.0972, "step": 250 }, { "epoch": 0.003107747431538152, "grad_norm": 2.872126340866089, "learning_rate": 3.0952961247867416e-07, "loss": 1.1743, "step": 255 }, { "epoch": 0.003168683655685959, "grad_norm": 2.620603561401367, "learning_rate": 3.156227150865221e-07, "loss": 1.1771, "step": 260 }, { "epoch": 0.003229619879833766, "grad_norm": 2.545419216156006, "learning_rate": 3.2171581769437e-07, "loss": 1.1335, "step": 265 }, { "epoch": 0.003290556103981573, "grad_norm": 2.733769178390503, "learning_rate": 3.2780892030221794e-07, "loss": 1.2292, "step": 270 }, { "epoch": 0.0033514923281293798, "grad_norm": 2.4394352436065674, "learning_rate": 3.339020229100658e-07, "loss": 1.214, "step": 275 }, { "epoch": 0.0034124285522771866, "grad_norm": 2.2672674655914307, "learning_rate": 3.3999512551791377e-07, "loss": 1.1606, "step": 280 }, { "epoch": 0.003473364776424994, "grad_norm": 2.6655185222625732, "learning_rate": 3.4608822812576166e-07, "loss": 1.1343, "step": 285 }, { "epoch": 0.0035343010005728006, "grad_norm": 2.883915424346924, "learning_rate": 3.521813307336096e-07, "loss": 1.1704, "step": 290 }, { "epoch": 0.0035952372247206074, "grad_norm": 2.6363744735717773, "learning_rate": 3.582744333414575e-07, "loss": 1.1322, "step": 295 }, { "epoch": 0.003656173448868414, "grad_norm": 2.4832842350006104, "learning_rate": 3.6436753594930543e-07, "loss": 1.0918, "step": 300 }, { "epoch": 0.0037171096730162214, "grad_norm": 2.700274705886841, "learning_rate": 3.704606385571533e-07, "loss": 1.1741, "step": 305 }, { "epoch": 0.0037780458971640282, "grad_norm": 2.674513339996338, "learning_rate": 3.765537411650012e-07, "loss": 1.2454, "step": 310 }, { "epoch": 0.003838982121311835, "grad_norm": 1.9951525926589966, "learning_rate": 3.8264684377284915e-07, "loss": 1.1464, "step": 315 }, { "epoch": 0.003899918345459642, "grad_norm": 2.7005615234375, "learning_rate": 3.8873994638069704e-07, "loss": 1.195, "step": 320 }, { "epoch": 0.003960854569607449, "grad_norm": 2.3688161373138428, "learning_rate": 3.94833048988545e-07, "loss": 1.2642, "step": 325 }, { "epoch": 0.004021790793755256, "grad_norm": 2.7275078296661377, "learning_rate": 4.0092615159639287e-07, "loss": 1.1506, "step": 330 }, { "epoch": 0.004082727017903062, "grad_norm": 3.047724723815918, "learning_rate": 4.070192542042408e-07, "loss": 1.0853, "step": 335 }, { "epoch": 0.0041436632420508695, "grad_norm": 2.2948594093322754, "learning_rate": 4.131123568120887e-07, "loss": 1.2112, "step": 340 }, { "epoch": 0.004204599466198677, "grad_norm": 2.3711495399475098, "learning_rate": 4.1920545941993665e-07, "loss": 1.1359, "step": 345 }, { "epoch": 0.004265535690346483, "grad_norm": 2.50526762008667, "learning_rate": 4.2529856202778454e-07, "loss": 1.1432, "step": 350 }, { "epoch": 0.00432647191449429, "grad_norm": 2.290536642074585, "learning_rate": 4.313916646356325e-07, "loss": 1.2283, "step": 355 }, { "epoch": 0.0043874081386420976, "grad_norm": 2.6062114238739014, "learning_rate": 4.3748476724348047e-07, "loss": 1.1839, "step": 360 }, { "epoch": 0.004448344362789904, "grad_norm": 2.5423572063446045, "learning_rate": 4.4357786985132836e-07, "loss": 1.2201, "step": 365 }, { "epoch": 0.004509280586937711, "grad_norm": 2.1528851985931396, "learning_rate": 4.496709724591763e-07, "loss": 1.1929, "step": 370 }, { "epoch": 0.0045702168110855175, "grad_norm": 2.3668465614318848, "learning_rate": 4.557640750670242e-07, "loss": 1.0844, "step": 375 }, { "epoch": 0.004631153035233325, "grad_norm": 2.3637773990631104, "learning_rate": 4.618571776748721e-07, "loss": 1.1707, "step": 380 }, { "epoch": 0.004692089259381132, "grad_norm": 1.994493842124939, "learning_rate": 4.6795028028272e-07, "loss": 1.1435, "step": 385 }, { "epoch": 0.004753025483528938, "grad_norm": 2.250922441482544, "learning_rate": 4.740433828905679e-07, "loss": 1.1832, "step": 390 }, { "epoch": 0.004813961707676746, "grad_norm": 2.38035249710083, "learning_rate": 4.801364854984159e-07, "loss": 1.1714, "step": 395 }, { "epoch": 0.004874897931824553, "grad_norm": 2.1575679779052734, "learning_rate": 4.862295881062637e-07, "loss": 1.0813, "step": 400 }, { "epoch": 0.004935834155972359, "grad_norm": 2.3509998321533203, "learning_rate": 4.923226907141116e-07, "loss": 1.1278, "step": 405 }, { "epoch": 0.0049967703801201665, "grad_norm": 2.48842716217041, "learning_rate": 4.984157933219596e-07, "loss": 1.2326, "step": 410 }, { "epoch": 0.005057706604267973, "grad_norm": 2.59228253364563, "learning_rate": 5.045088959298075e-07, "loss": 1.1449, "step": 415 }, { "epoch": 0.00511864282841578, "grad_norm": 2.1924867630004883, "learning_rate": 5.106019985376554e-07, "loss": 1.2186, "step": 420 }, { "epoch": 0.005179579052563587, "grad_norm": 2.6852645874023438, "learning_rate": 5.166951011455033e-07, "loss": 1.106, "step": 425 }, { "epoch": 0.005240515276711394, "grad_norm": 2.0928955078125, "learning_rate": 5.227882037533513e-07, "loss": 1.1636, "step": 430 }, { "epoch": 0.005301451500859201, "grad_norm": 2.6856303215026855, "learning_rate": 5.288813063611992e-07, "loss": 1.1013, "step": 435 }, { "epoch": 0.005362387725007007, "grad_norm": 2.2194149494171143, "learning_rate": 5.349744089690471e-07, "loss": 1.1325, "step": 440 }, { "epoch": 0.0054233239491548145, "grad_norm": 2.820070266723633, "learning_rate": 5.41067511576895e-07, "loss": 1.0694, "step": 445 }, { "epoch": 0.005484260173302622, "grad_norm": 2.4806153774261475, "learning_rate": 5.47160614184743e-07, "loss": 1.0778, "step": 450 }, { "epoch": 0.005545196397450428, "grad_norm": 2.140007257461548, "learning_rate": 5.532537167925908e-07, "loss": 1.0567, "step": 455 }, { "epoch": 0.005606132621598235, "grad_norm": 1.847922444343567, "learning_rate": 5.593468194004387e-07, "loss": 1.0746, "step": 460 }, { "epoch": 0.005667068845746043, "grad_norm": 2.330336570739746, "learning_rate": 5.654399220082866e-07, "loss": 1.0608, "step": 465 }, { "epoch": 0.005728005069893849, "grad_norm": 2.627915859222412, "learning_rate": 5.715330246161345e-07, "loss": 1.1733, "step": 470 }, { "epoch": 0.005788941294041656, "grad_norm": 2.5277955532073975, "learning_rate": 5.776261272239825e-07, "loss": 1.0624, "step": 475 }, { "epoch": 0.0058498775181894625, "grad_norm": 2.2048728466033936, "learning_rate": 5.837192298318304e-07, "loss": 1.0491, "step": 480 }, { "epoch": 0.00591081374233727, "grad_norm": 2.308880090713501, "learning_rate": 5.898123324396783e-07, "loss": 1.104, "step": 485 }, { "epoch": 0.005971749966485077, "grad_norm": 3.2973248958587646, "learning_rate": 5.959054350475262e-07, "loss": 1.1863, "step": 490 }, { "epoch": 0.006032686190632883, "grad_norm": 2.051680326461792, "learning_rate": 6.019985376553742e-07, "loss": 1.295, "step": 495 }, { "epoch": 0.006093622414780691, "grad_norm": 1.9568674564361572, "learning_rate": 6.080916402632221e-07, "loss": 1.154, "step": 500 }, { "epoch": 0.006154558638928498, "grad_norm": 2.6509671211242676, "learning_rate": 6.1418474287107e-07, "loss": 1.1138, "step": 505 }, { "epoch": 0.006215494863076304, "grad_norm": 2.1336019039154053, "learning_rate": 6.202778454789178e-07, "loss": 1.1062, "step": 510 }, { "epoch": 0.0062764310872241115, "grad_norm": 2.637924909591675, "learning_rate": 6.263709480867658e-07, "loss": 1.0741, "step": 515 }, { "epoch": 0.006337367311371918, "grad_norm": 2.3735153675079346, "learning_rate": 6.324640506946138e-07, "loss": 1.1303, "step": 520 }, { "epoch": 0.006398303535519725, "grad_norm": 2.4911253452301025, "learning_rate": 6.385571533024616e-07, "loss": 1.1199, "step": 525 }, { "epoch": 0.006459239759667532, "grad_norm": 2.2193801403045654, "learning_rate": 6.446502559103096e-07, "loss": 1.1077, "step": 530 }, { "epoch": 0.006520175983815339, "grad_norm": 2.487494945526123, "learning_rate": 6.507433585181575e-07, "loss": 1.1546, "step": 535 }, { "epoch": 0.006581112207963146, "grad_norm": 3.0068578720092773, "learning_rate": 6.568364611260054e-07, "loss": 1.14, "step": 540 }, { "epoch": 0.006642048432110952, "grad_norm": 2.559873342514038, "learning_rate": 6.629295637338533e-07, "loss": 1.1753, "step": 545 }, { "epoch": 0.0067029846562587595, "grad_norm": 2.3061952590942383, "learning_rate": 6.690226663417013e-07, "loss": 1.0699, "step": 550 }, { "epoch": 0.006763920880406567, "grad_norm": 2.177849292755127, "learning_rate": 6.751157689495492e-07, "loss": 1.192, "step": 555 }, { "epoch": 0.006824857104554373, "grad_norm": 2.4048409461975098, "learning_rate": 6.81208871557397e-07, "loss": 1.1745, "step": 560 }, { "epoch": 0.00688579332870218, "grad_norm": 2.08768367767334, "learning_rate": 6.873019741652449e-07, "loss": 1.132, "step": 565 }, { "epoch": 0.006946729552849988, "grad_norm": 2.4322562217712402, "learning_rate": 6.933950767730929e-07, "loss": 1.1304, "step": 570 }, { "epoch": 0.007007665776997794, "grad_norm": 2.2372384071350098, "learning_rate": 6.994881793809407e-07, "loss": 1.1178, "step": 575 }, { "epoch": 0.007068602001145601, "grad_norm": 1.9693150520324707, "learning_rate": 7.055812819887887e-07, "loss": 1.1228, "step": 580 }, { "epoch": 0.0071295382252934076, "grad_norm": 2.6766700744628906, "learning_rate": 7.116743845966366e-07, "loss": 1.1089, "step": 585 }, { "epoch": 0.007190474449441215, "grad_norm": 2.134523391723633, "learning_rate": 7.177674872044846e-07, "loss": 1.0383, "step": 590 }, { "epoch": 0.007251410673589022, "grad_norm": 2.2494235038757324, "learning_rate": 7.238605898123326e-07, "loss": 1.0924, "step": 595 }, { "epoch": 0.007312346897736828, "grad_norm": 1.8870952129364014, "learning_rate": 7.299536924201804e-07, "loss": 1.0649, "step": 600 }, { "epoch": 0.007373283121884636, "grad_norm": 2.4844822883605957, "learning_rate": 7.360467950280284e-07, "loss": 1.1745, "step": 605 }, { "epoch": 0.007434219346032443, "grad_norm": 2.0714759826660156, "learning_rate": 7.421398976358763e-07, "loss": 1.0753, "step": 610 }, { "epoch": 0.007495155570180249, "grad_norm": 2.1989798545837402, "learning_rate": 7.482330002437243e-07, "loss": 0.9941, "step": 615 }, { "epoch": 0.0075560917943280565, "grad_norm": 2.450382709503174, "learning_rate": 7.54326102851572e-07, "loss": 1.0821, "step": 620 }, { "epoch": 0.007617028018475863, "grad_norm": 2.2551209926605225, "learning_rate": 7.6041920545942e-07, "loss": 1.051, "step": 625 }, { "epoch": 0.00767796424262367, "grad_norm": 1.959818959236145, "learning_rate": 7.665123080672679e-07, "loss": 1.1057, "step": 630 }, { "epoch": 0.007738900466771477, "grad_norm": 3.1334762573242188, "learning_rate": 7.726054106751159e-07, "loss": 1.0512, "step": 635 }, { "epoch": 0.007799836690919284, "grad_norm": 2.2239768505096436, "learning_rate": 7.786985132829637e-07, "loss": 0.9841, "step": 640 }, { "epoch": 0.00786077291506709, "grad_norm": 2.595806360244751, "learning_rate": 7.847916158908117e-07, "loss": 1.1322, "step": 645 }, { "epoch": 0.007921709139214897, "grad_norm": 2.0281026363372803, "learning_rate": 7.908847184986596e-07, "loss": 1.0542, "step": 650 }, { "epoch": 0.007982645363362705, "grad_norm": 2.2579357624053955, "learning_rate": 7.969778211065076e-07, "loss": 1.0954, "step": 655 }, { "epoch": 0.008043581587510512, "grad_norm": 2.503070116043091, "learning_rate": 8.030709237143554e-07, "loss": 1.0885, "step": 660 }, { "epoch": 0.008104517811658319, "grad_norm": 2.41497540473938, "learning_rate": 8.091640263222034e-07, "loss": 1.1427, "step": 665 }, { "epoch": 0.008165454035806124, "grad_norm": 2.4873836040496826, "learning_rate": 8.152571289300512e-07, "loss": 1.0658, "step": 670 }, { "epoch": 0.008226390259953932, "grad_norm": 2.428795099258423, "learning_rate": 8.213502315378992e-07, "loss": 1.1635, "step": 675 }, { "epoch": 0.008287326484101739, "grad_norm": 2.433234214782715, "learning_rate": 8.27443334145747e-07, "loss": 1.0639, "step": 680 }, { "epoch": 0.008348262708249546, "grad_norm": 3.0688626766204834, "learning_rate": 8.33536436753595e-07, "loss": 1.0991, "step": 685 }, { "epoch": 0.008409198932397353, "grad_norm": 2.78928279876709, "learning_rate": 8.396295393614429e-07, "loss": 1.0325, "step": 690 }, { "epoch": 0.00847013515654516, "grad_norm": 2.0965471267700195, "learning_rate": 8.457226419692908e-07, "loss": 1.0701, "step": 695 }, { "epoch": 0.008531071380692966, "grad_norm": 2.2203452587127686, "learning_rate": 8.518157445771387e-07, "loss": 1.0352, "step": 700 }, { "epoch": 0.008592007604840773, "grad_norm": 3.1762425899505615, "learning_rate": 8.579088471849867e-07, "loss": 1.0611, "step": 705 }, { "epoch": 0.00865294382898858, "grad_norm": 2.0579049587249756, "learning_rate": 8.640019497928346e-07, "loss": 1.0724, "step": 710 }, { "epoch": 0.008713880053136388, "grad_norm": 1.9408974647521973, "learning_rate": 8.700950524006825e-07, "loss": 1.1035, "step": 715 }, { "epoch": 0.008774816277284195, "grad_norm": 2.194699287414551, "learning_rate": 8.761881550085304e-07, "loss": 1.0718, "step": 720 }, { "epoch": 0.008835752501432, "grad_norm": 2.0578503608703613, "learning_rate": 8.822812576163783e-07, "loss": 1.0216, "step": 725 }, { "epoch": 0.008896688725579808, "grad_norm": 2.273627281188965, "learning_rate": 8.883743602242261e-07, "loss": 0.9968, "step": 730 }, { "epoch": 0.008957624949727615, "grad_norm": 2.25054931640625, "learning_rate": 8.944674628320741e-07, "loss": 1.0621, "step": 735 }, { "epoch": 0.009018561173875422, "grad_norm": 2.0660016536712646, "learning_rate": 9.00560565439922e-07, "loss": 1.0949, "step": 740 }, { "epoch": 0.00907949739802323, "grad_norm": 2.1484103202819824, "learning_rate": 9.0665366804777e-07, "loss": 0.9687, "step": 745 }, { "epoch": 0.009140433622171035, "grad_norm": 2.3057379722595215, "learning_rate": 9.12746770655618e-07, "loss": 1.0788, "step": 750 }, { "epoch": 0.009201369846318842, "grad_norm": 2.196765661239624, "learning_rate": 9.188398732634658e-07, "loss": 1.0367, "step": 755 }, { "epoch": 0.00926230607046665, "grad_norm": 2.619968891143799, "learning_rate": 9.249329758713138e-07, "loss": 1.0191, "step": 760 }, { "epoch": 0.009323242294614457, "grad_norm": 2.4902865886688232, "learning_rate": 9.310260784791617e-07, "loss": 1.0174, "step": 765 }, { "epoch": 0.009384178518762264, "grad_norm": 1.8221099376678467, "learning_rate": 9.371191810870097e-07, "loss": 1.0365, "step": 770 }, { "epoch": 0.00944511474291007, "grad_norm": 2.651728868484497, "learning_rate": 9.432122836948575e-07, "loss": 1.1464, "step": 775 }, { "epoch": 0.009506050967057877, "grad_norm": 2.4130325317382812, "learning_rate": 9.493053863027054e-07, "loss": 1.1157, "step": 780 }, { "epoch": 0.009566987191205684, "grad_norm": 2.2816951274871826, "learning_rate": 9.553984889105532e-07, "loss": 1.0983, "step": 785 }, { "epoch": 0.009627923415353491, "grad_norm": 2.8218228816986084, "learning_rate": 9.614915915184012e-07, "loss": 1.0771, "step": 790 }, { "epoch": 0.009688859639501298, "grad_norm": 2.1778571605682373, "learning_rate": 9.675846941262492e-07, "loss": 1.1196, "step": 795 }, { "epoch": 0.009749795863649106, "grad_norm": 2.1733038425445557, "learning_rate": 9.736777967340972e-07, "loss": 1.132, "step": 800 }, { "epoch": 0.009810732087796911, "grad_norm": 2.442572593688965, "learning_rate": 9.79770899341945e-07, "loss": 1.0797, "step": 805 }, { "epoch": 0.009871668311944718, "grad_norm": 2.3029189109802246, "learning_rate": 9.85864001949793e-07, "loss": 1.0925, "step": 810 }, { "epoch": 0.009932604536092526, "grad_norm": 2.7412612438201904, "learning_rate": 9.919571045576408e-07, "loss": 1.0212, "step": 815 }, { "epoch": 0.009993540760240333, "grad_norm": 2.0642073154449463, "learning_rate": 9.980502071654888e-07, "loss": 1.0275, "step": 820 }, { "epoch": 0.01005447698438814, "grad_norm": 2.258232355117798, "learning_rate": 1.0041433097733366e-06, "loss": 1.0345, "step": 825 }, { "epoch": 0.010115413208535946, "grad_norm": 2.1130049228668213, "learning_rate": 1.0102364123811846e-06, "loss": 1.1538, "step": 830 }, { "epoch": 0.010176349432683753, "grad_norm": 2.8442885875701904, "learning_rate": 1.0163295149890325e-06, "loss": 1.0953, "step": 835 }, { "epoch": 0.01023728565683156, "grad_norm": 3.0801596641540527, "learning_rate": 1.0224226175968805e-06, "loss": 1.0232, "step": 840 }, { "epoch": 0.010298221880979367, "grad_norm": 2.1763975620269775, "learning_rate": 1.0285157202047283e-06, "loss": 1.0351, "step": 845 }, { "epoch": 0.010359158105127175, "grad_norm": 2.014939069747925, "learning_rate": 1.0346088228125763e-06, "loss": 1.0151, "step": 850 }, { "epoch": 0.01042009432927498, "grad_norm": 5.274635314941406, "learning_rate": 1.040701925420424e-06, "loss": 1.0903, "step": 855 }, { "epoch": 0.010481030553422787, "grad_norm": 2.2539806365966797, "learning_rate": 1.046795028028272e-06, "loss": 1.0629, "step": 860 }, { "epoch": 0.010541966777570595, "grad_norm": 2.5167036056518555, "learning_rate": 1.0528881306361199e-06, "loss": 1.02, "step": 865 }, { "epoch": 0.010602903001718402, "grad_norm": 2.3801679611206055, "learning_rate": 1.0589812332439679e-06, "loss": 1.1081, "step": 870 }, { "epoch": 0.010663839225866209, "grad_norm": 2.0620622634887695, "learning_rate": 1.0650743358518159e-06, "loss": 1.0715, "step": 875 }, { "epoch": 0.010724775450014015, "grad_norm": 2.3412675857543945, "learning_rate": 1.0711674384596637e-06, "loss": 0.9767, "step": 880 }, { "epoch": 0.010785711674161822, "grad_norm": 2.605701446533203, "learning_rate": 1.0772605410675117e-06, "loss": 1.0512, "step": 885 }, { "epoch": 0.010846647898309629, "grad_norm": 2.1805458068847656, "learning_rate": 1.0833536436753596e-06, "loss": 1.0617, "step": 890 }, { "epoch": 0.010907584122457436, "grad_norm": 1.9568969011306763, "learning_rate": 1.0894467462832074e-06, "loss": 1.081, "step": 895 }, { "epoch": 0.010968520346605243, "grad_norm": 2.0446410179138184, "learning_rate": 1.0955398488910554e-06, "loss": 1.1183, "step": 900 }, { "epoch": 0.01102945657075305, "grad_norm": 2.469813346862793, "learning_rate": 1.1016329514989034e-06, "loss": 1.0883, "step": 905 }, { "epoch": 0.011090392794900856, "grad_norm": 2.338277816772461, "learning_rate": 1.1077260541067512e-06, "loss": 1.0706, "step": 910 }, { "epoch": 0.011151329019048663, "grad_norm": 2.135443925857544, "learning_rate": 1.1138191567145992e-06, "loss": 1.0259, "step": 915 }, { "epoch": 0.01121226524319647, "grad_norm": 2.5016512870788574, "learning_rate": 1.119912259322447e-06, "loss": 0.9823, "step": 920 }, { "epoch": 0.011273201467344278, "grad_norm": 2.646484851837158, "learning_rate": 1.126005361930295e-06, "loss": 1.0124, "step": 925 }, { "epoch": 0.011334137691492085, "grad_norm": 2.066351890563965, "learning_rate": 1.132098464538143e-06, "loss": 1.0933, "step": 930 }, { "epoch": 0.01139507391563989, "grad_norm": 2.066114902496338, "learning_rate": 1.138191567145991e-06, "loss": 1.1249, "step": 935 }, { "epoch": 0.011456010139787698, "grad_norm": 2.0607264041900635, "learning_rate": 1.1442846697538388e-06, "loss": 1.0489, "step": 940 }, { "epoch": 0.011516946363935505, "grad_norm": 2.2696609497070312, "learning_rate": 1.1503777723616867e-06, "loss": 1.0164, "step": 945 }, { "epoch": 0.011577882588083312, "grad_norm": 2.2202513217926025, "learning_rate": 1.1564708749695345e-06, "loss": 1.0245, "step": 950 }, { "epoch": 0.01163881881223112, "grad_norm": 2.050004243850708, "learning_rate": 1.1625639775773825e-06, "loss": 1.0678, "step": 955 }, { "epoch": 0.011699755036378925, "grad_norm": 2.17169189453125, "learning_rate": 1.1686570801852303e-06, "loss": 1.0804, "step": 960 }, { "epoch": 0.011760691260526732, "grad_norm": 2.4623453617095947, "learning_rate": 1.1747501827930783e-06, "loss": 0.9823, "step": 965 }, { "epoch": 0.01182162748467454, "grad_norm": 2.089599847793579, "learning_rate": 1.1808432854009263e-06, "loss": 1.1122, "step": 970 }, { "epoch": 0.011882563708822347, "grad_norm": 2.3027889728546143, "learning_rate": 1.1869363880087743e-06, "loss": 1.0882, "step": 975 }, { "epoch": 0.011943499932970154, "grad_norm": 2.5075466632843018, "learning_rate": 1.193029490616622e-06, "loss": 1.0406, "step": 980 }, { "epoch": 0.01200443615711796, "grad_norm": 2.408702850341797, "learning_rate": 1.19912259322447e-06, "loss": 1.0427, "step": 985 }, { "epoch": 0.012065372381265767, "grad_norm": 2.2225656509399414, "learning_rate": 1.2052156958323179e-06, "loss": 1.0103, "step": 990 }, { "epoch": 0.012126308605413574, "grad_norm": 3.3906874656677246, "learning_rate": 1.2113087984401659e-06, "loss": 1.0233, "step": 995 }, { "epoch": 0.012187244829561381, "grad_norm": 2.5627644062042236, "learning_rate": 1.2174019010480136e-06, "loss": 1.1147, "step": 1000 }, { "epoch": 0.012248181053709188, "grad_norm": 2.071758985519409, "learning_rate": 1.2234950036558616e-06, "loss": 1.0883, "step": 1005 }, { "epoch": 0.012309117277856996, "grad_norm": 2.2363414764404297, "learning_rate": 1.2295881062637096e-06, "loss": 1.1317, "step": 1010 }, { "epoch": 0.012370053502004801, "grad_norm": 2.0319664478302, "learning_rate": 1.2356812088715574e-06, "loss": 1.0253, "step": 1015 }, { "epoch": 0.012430989726152608, "grad_norm": 2.106229782104492, "learning_rate": 1.2417743114794054e-06, "loss": 1.0402, "step": 1020 }, { "epoch": 0.012491925950300416, "grad_norm": 2.206895589828491, "learning_rate": 1.2478674140872534e-06, "loss": 1.0399, "step": 1025 }, { "epoch": 0.012552862174448223, "grad_norm": 2.0470988750457764, "learning_rate": 1.2539605166951014e-06, "loss": 1.0511, "step": 1030 }, { "epoch": 0.01261379839859603, "grad_norm": 2.792018175125122, "learning_rate": 1.2600536193029492e-06, "loss": 1.008, "step": 1035 }, { "epoch": 0.012674734622743836, "grad_norm": 1.997732162475586, "learning_rate": 1.266146721910797e-06, "loss": 1.0095, "step": 1040 }, { "epoch": 0.012735670846891643, "grad_norm": 2.148228168487549, "learning_rate": 1.272239824518645e-06, "loss": 1.0321, "step": 1045 }, { "epoch": 0.01279660707103945, "grad_norm": 2.06235933303833, "learning_rate": 1.278332927126493e-06, "loss": 1.0655, "step": 1050 }, { "epoch": 0.012857543295187257, "grad_norm": 2.4998295307159424, "learning_rate": 1.2844260297343407e-06, "loss": 1.0699, "step": 1055 }, { "epoch": 0.012918479519335065, "grad_norm": 1.804108738899231, "learning_rate": 1.2905191323421887e-06, "loss": 0.9514, "step": 1060 }, { "epoch": 0.01297941574348287, "grad_norm": 2.4925150871276855, "learning_rate": 1.2966122349500367e-06, "loss": 0.9996, "step": 1065 }, { "epoch": 0.013040351967630677, "grad_norm": 2.1598169803619385, "learning_rate": 1.3027053375578847e-06, "loss": 1.0923, "step": 1070 }, { "epoch": 0.013101288191778485, "grad_norm": 2.1867282390594482, "learning_rate": 1.3087984401657325e-06, "loss": 1.1027, "step": 1075 }, { "epoch": 0.013162224415926292, "grad_norm": 2.4988887310028076, "learning_rate": 1.3148915427735803e-06, "loss": 1.077, "step": 1080 }, { "epoch": 0.013223160640074099, "grad_norm": 2.551316976547241, "learning_rate": 1.3209846453814285e-06, "loss": 0.9891, "step": 1085 }, { "epoch": 0.013284096864221905, "grad_norm": 2.160423994064331, "learning_rate": 1.3270777479892763e-06, "loss": 1.0196, "step": 1090 }, { "epoch": 0.013345033088369712, "grad_norm": 2.188905715942383, "learning_rate": 1.333170850597124e-06, "loss": 1.1114, "step": 1095 }, { "epoch": 0.013405969312517519, "grad_norm": 1.9721975326538086, "learning_rate": 1.339263953204972e-06, "loss": 1.0238, "step": 1100 }, { "epoch": 0.013466905536665326, "grad_norm": 2.267063617706299, "learning_rate": 1.34535705581282e-06, "loss": 1.0293, "step": 1105 }, { "epoch": 0.013527841760813133, "grad_norm": 2.4284701347351074, "learning_rate": 1.351450158420668e-06, "loss": 1.0835, "step": 1110 }, { "epoch": 0.01358877798496094, "grad_norm": 1.83641517162323, "learning_rate": 1.3575432610285158e-06, "loss": 1.0565, "step": 1115 }, { "epoch": 0.013649714209108746, "grad_norm": 2.346635341644287, "learning_rate": 1.3636363636363636e-06, "loss": 1.104, "step": 1120 }, { "epoch": 0.013710650433256553, "grad_norm": 2.7853996753692627, "learning_rate": 1.3697294662442118e-06, "loss": 1.0474, "step": 1125 }, { "epoch": 0.01377158665740436, "grad_norm": 2.262174606323242, "learning_rate": 1.3758225688520596e-06, "loss": 1.0011, "step": 1130 }, { "epoch": 0.013832522881552168, "grad_norm": 2.0552937984466553, "learning_rate": 1.3819156714599074e-06, "loss": 1.1049, "step": 1135 }, { "epoch": 0.013893459105699975, "grad_norm": 2.090169906616211, "learning_rate": 1.3880087740677554e-06, "loss": 1.0018, "step": 1140 }, { "epoch": 0.01395439532984778, "grad_norm": 2.371208429336548, "learning_rate": 1.3941018766756034e-06, "loss": 1.0423, "step": 1145 }, { "epoch": 0.014015331553995588, "grad_norm": 2.0906546115875244, "learning_rate": 1.4001949792834514e-06, "loss": 1.0207, "step": 1150 }, { "epoch": 0.014076267778143395, "grad_norm": 2.699928045272827, "learning_rate": 1.4062880818912992e-06, "loss": 1.0076, "step": 1155 }, { "epoch": 0.014137204002291202, "grad_norm": 2.3623435497283936, "learning_rate": 1.412381184499147e-06, "loss": 1.0587, "step": 1160 }, { "epoch": 0.01419814022643901, "grad_norm": 3.1802124977111816, "learning_rate": 1.4184742871069951e-06, "loss": 0.9977, "step": 1165 }, { "epoch": 0.014259076450586815, "grad_norm": 5.360105991363525, "learning_rate": 1.424567389714843e-06, "loss": 1.0948, "step": 1170 }, { "epoch": 0.014320012674734622, "grad_norm": 2.286569118499756, "learning_rate": 1.4306604923226907e-06, "loss": 1.0015, "step": 1175 }, { "epoch": 0.01438094889888243, "grad_norm": 1.900286078453064, "learning_rate": 1.4367535949305387e-06, "loss": 0.9958, "step": 1180 }, { "epoch": 0.014441885123030237, "grad_norm": 2.332761526107788, "learning_rate": 1.4428466975383867e-06, "loss": 1.1756, "step": 1185 }, { "epoch": 0.014502821347178044, "grad_norm": 2.947939872741699, "learning_rate": 1.4489398001462345e-06, "loss": 1.0745, "step": 1190 }, { "epoch": 0.01456375757132585, "grad_norm": 2.790154218673706, "learning_rate": 1.4550329027540825e-06, "loss": 1.0399, "step": 1195 }, { "epoch": 0.014624693795473657, "grad_norm": 2.0634372234344482, "learning_rate": 1.4611260053619303e-06, "loss": 1.15, "step": 1200 }, { "epoch": 0.014685630019621464, "grad_norm": 2.1943235397338867, "learning_rate": 1.4672191079697785e-06, "loss": 1.0601, "step": 1205 }, { "epoch": 0.014746566243769271, "grad_norm": 2.5785956382751465, "learning_rate": 1.4733122105776263e-06, "loss": 1.0537, "step": 1210 }, { "epoch": 0.014807502467917079, "grad_norm": 1.8831377029418945, "learning_rate": 1.479405313185474e-06, "loss": 1.0358, "step": 1215 }, { "epoch": 0.014868438692064886, "grad_norm": 2.1131651401519775, "learning_rate": 1.4854984157933222e-06, "loss": 1.0544, "step": 1220 }, { "epoch": 0.014929374916212691, "grad_norm": 2.4729676246643066, "learning_rate": 1.49159151840117e-06, "loss": 1.0995, "step": 1225 }, { "epoch": 0.014990311140360498, "grad_norm": 2.3520667552948, "learning_rate": 1.4976846210090178e-06, "loss": 1.1099, "step": 1230 }, { "epoch": 0.015051247364508306, "grad_norm": 1.9015685319900513, "learning_rate": 1.5037777236168658e-06, "loss": 0.987, "step": 1235 }, { "epoch": 0.015112183588656113, "grad_norm": 2.278381109237671, "learning_rate": 1.5098708262247138e-06, "loss": 1.0478, "step": 1240 }, { "epoch": 0.01517311981280392, "grad_norm": 2.6460511684417725, "learning_rate": 1.5159639288325618e-06, "loss": 1.0654, "step": 1245 }, { "epoch": 0.015234056036951726, "grad_norm": 2.723259210586548, "learning_rate": 1.5220570314404096e-06, "loss": 1.023, "step": 1250 }, { "epoch": 0.015294992261099533, "grad_norm": 2.5405375957489014, "learning_rate": 1.5281501340482574e-06, "loss": 1.0656, "step": 1255 }, { "epoch": 0.01535592848524734, "grad_norm": 1.9185701608657837, "learning_rate": 1.5342432366561056e-06, "loss": 0.9923, "step": 1260 }, { "epoch": 0.015416864709395147, "grad_norm": 2.2635629177093506, "learning_rate": 1.5403363392639534e-06, "loss": 1.0516, "step": 1265 }, { "epoch": 0.015477800933542955, "grad_norm": 1.862667202949524, "learning_rate": 1.5464294418718011e-06, "loss": 1.0286, "step": 1270 }, { "epoch": 0.01553873715769076, "grad_norm": 2.032419204711914, "learning_rate": 1.5525225444796491e-06, "loss": 1.0344, "step": 1275 }, { "epoch": 0.015599673381838567, "grad_norm": 2.1396772861480713, "learning_rate": 1.5586156470874971e-06, "loss": 1.0495, "step": 1280 }, { "epoch": 0.015660609605986375, "grad_norm": 3.283897876739502, "learning_rate": 1.5647087496953451e-06, "loss": 1.0712, "step": 1285 }, { "epoch": 0.01572154583013418, "grad_norm": 1.9895405769348145, "learning_rate": 1.570801852303193e-06, "loss": 0.9653, "step": 1290 }, { "epoch": 0.01578248205428199, "grad_norm": 2.077326536178589, "learning_rate": 1.5768949549110407e-06, "loss": 1.0815, "step": 1295 }, { "epoch": 0.015843418278429795, "grad_norm": 2.3152592182159424, "learning_rate": 1.582988057518889e-06, "loss": 0.9411, "step": 1300 }, { "epoch": 0.015904354502577604, "grad_norm": 2.228309392929077, "learning_rate": 1.5890811601267367e-06, "loss": 1.0498, "step": 1305 }, { "epoch": 0.01596529072672541, "grad_norm": 2.4937052726745605, "learning_rate": 1.5951742627345845e-06, "loss": 1.0163, "step": 1310 }, { "epoch": 0.016026226950873215, "grad_norm": 2.3902642726898193, "learning_rate": 1.6012673653424325e-06, "loss": 1.0238, "step": 1315 }, { "epoch": 0.016087163175021024, "grad_norm": 2.077824354171753, "learning_rate": 1.6073604679502805e-06, "loss": 1.0459, "step": 1320 }, { "epoch": 0.01614809939916883, "grad_norm": 2.3579888343811035, "learning_rate": 1.6134535705581284e-06, "loss": 1.0328, "step": 1325 }, { "epoch": 0.016209035623316638, "grad_norm": 2.210258960723877, "learning_rate": 1.6195466731659762e-06, "loss": 1.026, "step": 1330 }, { "epoch": 0.016269971847464443, "grad_norm": 2.216275930404663, "learning_rate": 1.625639775773824e-06, "loss": 0.992, "step": 1335 }, { "epoch": 0.01633090807161225, "grad_norm": 2.1545820236206055, "learning_rate": 1.6317328783816722e-06, "loss": 1.028, "step": 1340 }, { "epoch": 0.016391844295760058, "grad_norm": 2.1411590576171875, "learning_rate": 1.63782598098952e-06, "loss": 1.0721, "step": 1345 }, { "epoch": 0.016452780519907863, "grad_norm": 2.456355333328247, "learning_rate": 1.6439190835973678e-06, "loss": 1.0225, "step": 1350 }, { "epoch": 0.016513716744055672, "grad_norm": 2.4319510459899902, "learning_rate": 1.6500121862052158e-06, "loss": 1.0125, "step": 1355 }, { "epoch": 0.016574652968203478, "grad_norm": 2.6314754486083984, "learning_rate": 1.6561052888130638e-06, "loss": 1.0609, "step": 1360 }, { "epoch": 0.016635589192351283, "grad_norm": 2.007913827896118, "learning_rate": 1.6621983914209116e-06, "loss": 1.0357, "step": 1365 }, { "epoch": 0.016696525416499092, "grad_norm": 2.0359299182891846, "learning_rate": 1.6682914940287596e-06, "loss": 1.0486, "step": 1370 }, { "epoch": 0.016757461640646898, "grad_norm": 2.3556363582611084, "learning_rate": 1.6743845966366076e-06, "loss": 1.0023, "step": 1375 }, { "epoch": 0.016818397864794707, "grad_norm": 2.240969181060791, "learning_rate": 1.6804776992444555e-06, "loss": 1.0258, "step": 1380 }, { "epoch": 0.016879334088942512, "grad_norm": 2.059478759765625, "learning_rate": 1.6865708018523033e-06, "loss": 1.0117, "step": 1385 }, { "epoch": 0.01694027031309032, "grad_norm": 2.013552665710449, "learning_rate": 1.6926639044601511e-06, "loss": 1.0407, "step": 1390 }, { "epoch": 0.017001206537238127, "grad_norm": 2.3464014530181885, "learning_rate": 1.6987570070679993e-06, "loss": 1.0626, "step": 1395 }, { "epoch": 0.017062142761385932, "grad_norm": 2.0726606845855713, "learning_rate": 1.7048501096758471e-06, "loss": 1.0206, "step": 1400 }, { "epoch": 0.01712307898553374, "grad_norm": 2.1449601650238037, "learning_rate": 1.7109432122836949e-06, "loss": 1.0945, "step": 1405 }, { "epoch": 0.017184015209681547, "grad_norm": 1.842429757118225, "learning_rate": 1.7170363148915429e-06, "loss": 1.0845, "step": 1410 }, { "epoch": 0.017244951433829356, "grad_norm": 2.539241313934326, "learning_rate": 1.7231294174993909e-06, "loss": 1.0513, "step": 1415 }, { "epoch": 0.01730588765797716, "grad_norm": 2.0632665157318115, "learning_rate": 1.7292225201072389e-06, "loss": 1.072, "step": 1420 }, { "epoch": 0.017366823882124967, "grad_norm": 2.4427976608276367, "learning_rate": 1.7353156227150867e-06, "loss": 1.0564, "step": 1425 }, { "epoch": 0.017427760106272776, "grad_norm": 2.141024589538574, "learning_rate": 1.7414087253229344e-06, "loss": 0.9369, "step": 1430 }, { "epoch": 0.01748869633042058, "grad_norm": 2.277878522872925, "learning_rate": 1.7475018279307826e-06, "loss": 0.9848, "step": 1435 }, { "epoch": 0.01754963255456839, "grad_norm": 2.2378668785095215, "learning_rate": 1.7535949305386304e-06, "loss": 0.986, "step": 1440 }, { "epoch": 0.017610568778716196, "grad_norm": 2.2713027000427246, "learning_rate": 1.7596880331464782e-06, "loss": 0.9306, "step": 1445 }, { "epoch": 0.017671505002864, "grad_norm": 2.1677467823028564, "learning_rate": 1.7657811357543262e-06, "loss": 0.9937, "step": 1450 }, { "epoch": 0.01773244122701181, "grad_norm": 3.010105848312378, "learning_rate": 1.7718742383621742e-06, "loss": 0.9489, "step": 1455 }, { "epoch": 0.017793377451159616, "grad_norm": 2.4034106731414795, "learning_rate": 1.7779673409700222e-06, "loss": 1.0861, "step": 1460 }, { "epoch": 0.017854313675307425, "grad_norm": 2.3110105991363525, "learning_rate": 1.78406044357787e-06, "loss": 1.0022, "step": 1465 }, { "epoch": 0.01791524989945523, "grad_norm": 2.0844321250915527, "learning_rate": 1.7901535461857178e-06, "loss": 0.9542, "step": 1470 }, { "epoch": 0.017976186123603036, "grad_norm": 2.2467050552368164, "learning_rate": 1.796246648793566e-06, "loss": 1.0573, "step": 1475 }, { "epoch": 0.018037122347750845, "grad_norm": 2.0278074741363525, "learning_rate": 1.8023397514014138e-06, "loss": 0.9896, "step": 1480 }, { "epoch": 0.01809805857189865, "grad_norm": 2.5632269382476807, "learning_rate": 1.8084328540092615e-06, "loss": 1.0474, "step": 1485 }, { "epoch": 0.01815899479604646, "grad_norm": 2.1736085414886475, "learning_rate": 1.8145259566171095e-06, "loss": 0.9698, "step": 1490 }, { "epoch": 0.018219931020194265, "grad_norm": 2.0691323280334473, "learning_rate": 1.8206190592249575e-06, "loss": 1.0414, "step": 1495 }, { "epoch": 0.01828086724434207, "grad_norm": 2.413508653640747, "learning_rate": 1.8267121618328053e-06, "loss": 0.9549, "step": 1500 }, { "epoch": 0.01834180346848988, "grad_norm": 1.9939329624176025, "learning_rate": 1.8328052644406533e-06, "loss": 0.9648, "step": 1505 }, { "epoch": 0.018402739692637685, "grad_norm": 2.465878486633301, "learning_rate": 1.838898367048501e-06, "loss": 0.9969, "step": 1510 }, { "epoch": 0.018463675916785494, "grad_norm": 2.752112627029419, "learning_rate": 1.8449914696563493e-06, "loss": 1.06, "step": 1515 }, { "epoch": 0.0185246121409333, "grad_norm": 2.1763272285461426, "learning_rate": 1.851084572264197e-06, "loss": 0.9912, "step": 1520 }, { "epoch": 0.018585548365081105, "grad_norm": 2.6601603031158447, "learning_rate": 1.8571776748720449e-06, "loss": 1.0021, "step": 1525 }, { "epoch": 0.018646484589228914, "grad_norm": 2.2841691970825195, "learning_rate": 1.863270777479893e-06, "loss": 1.0889, "step": 1530 }, { "epoch": 0.01870742081337672, "grad_norm": 2.603740692138672, "learning_rate": 1.8693638800877409e-06, "loss": 0.9846, "step": 1535 }, { "epoch": 0.018768357037524528, "grad_norm": 2.109689950942993, "learning_rate": 1.8754569826955886e-06, "loss": 1.0981, "step": 1540 }, { "epoch": 0.018829293261672334, "grad_norm": 2.1682140827178955, "learning_rate": 1.8815500853034366e-06, "loss": 0.9611, "step": 1545 }, { "epoch": 0.01889022948582014, "grad_norm": 2.2273528575897217, "learning_rate": 1.8876431879112846e-06, "loss": 1.0425, "step": 1550 }, { "epoch": 0.018951165709967948, "grad_norm": 3.023566722869873, "learning_rate": 1.8937362905191326e-06, "loss": 1.0063, "step": 1555 }, { "epoch": 0.019012101934115753, "grad_norm": 2.1196107864379883, "learning_rate": 1.8998293931269804e-06, "loss": 1.077, "step": 1560 }, { "epoch": 0.019073038158263562, "grad_norm": 2.161176919937134, "learning_rate": 1.9059224957348282e-06, "loss": 0.9973, "step": 1565 }, { "epoch": 0.019133974382411368, "grad_norm": 2.431644916534424, "learning_rate": 1.912015598342676e-06, "loss": 0.9487, "step": 1570 }, { "epoch": 0.019194910606559173, "grad_norm": 2.1764400005340576, "learning_rate": 1.9181087009505244e-06, "loss": 0.9949, "step": 1575 }, { "epoch": 0.019255846830706982, "grad_norm": 2.7355024814605713, "learning_rate": 1.924201803558372e-06, "loss": 0.9984, "step": 1580 }, { "epoch": 0.019316783054854788, "grad_norm": 2.047616720199585, "learning_rate": 1.93029490616622e-06, "loss": 0.9482, "step": 1585 }, { "epoch": 0.019377719279002597, "grad_norm": 1.971948266029358, "learning_rate": 1.936388008774068e-06, "loss": 1.0442, "step": 1590 }, { "epoch": 0.019438655503150402, "grad_norm": 2.4725215435028076, "learning_rate": 1.942481111381916e-06, "loss": 1.0458, "step": 1595 }, { "epoch": 0.01949959172729821, "grad_norm": 2.2548818588256836, "learning_rate": 1.9485742139897637e-06, "loss": 1.0568, "step": 1600 }, { "epoch": 0.019560527951446017, "grad_norm": 2.1055097579956055, "learning_rate": 1.9546673165976115e-06, "loss": 0.9721, "step": 1605 }, { "epoch": 0.019621464175593822, "grad_norm": 2.0375983715057373, "learning_rate": 1.9607604192054597e-06, "loss": 0.9991, "step": 1610 }, { "epoch": 0.01968240039974163, "grad_norm": 2.278418779373169, "learning_rate": 1.9668535218133075e-06, "loss": 1.031, "step": 1615 }, { "epoch": 0.019743336623889437, "grad_norm": 2.383363723754883, "learning_rate": 1.9729466244211553e-06, "loss": 1.0012, "step": 1620 }, { "epoch": 0.019804272848037246, "grad_norm": 3.026120662689209, "learning_rate": 1.979039727029003e-06, "loss": 1.0271, "step": 1625 }, { "epoch": 0.01986520907218505, "grad_norm": 1.989630937576294, "learning_rate": 1.9851328296368513e-06, "loss": 1.0104, "step": 1630 }, { "epoch": 0.019926145296332857, "grad_norm": 2.4075491428375244, "learning_rate": 1.991225932244699e-06, "loss": 0.9827, "step": 1635 }, { "epoch": 0.019987081520480666, "grad_norm": 2.266587972640991, "learning_rate": 1.997319034852547e-06, "loss": 1.0741, "step": 1640 }, { "epoch": 0.02004801774462847, "grad_norm": 2.07952618598938, "learning_rate": 2.003412137460395e-06, "loss": 0.9542, "step": 1645 }, { "epoch": 0.02010895396877628, "grad_norm": 2.4330294132232666, "learning_rate": 2.009505240068243e-06, "loss": 0.9811, "step": 1650 }, { "epoch": 0.020169890192924086, "grad_norm": 2.2235426902770996, "learning_rate": 2.0155983426760906e-06, "loss": 1.0346, "step": 1655 }, { "epoch": 0.02023082641707189, "grad_norm": 2.0129122734069824, "learning_rate": 2.021691445283939e-06, "loss": 1.063, "step": 1660 }, { "epoch": 0.0202917626412197, "grad_norm": 2.0479588508605957, "learning_rate": 2.0277845478917866e-06, "loss": 1.0068, "step": 1665 }, { "epoch": 0.020352698865367506, "grad_norm": 2.2530014514923096, "learning_rate": 2.033877650499635e-06, "loss": 1.0943, "step": 1670 }, { "epoch": 0.020413635089515315, "grad_norm": 2.433109760284424, "learning_rate": 2.0399707531074826e-06, "loss": 1.0661, "step": 1675 }, { "epoch": 0.02047457131366312, "grad_norm": 2.3750457763671875, "learning_rate": 2.0460638557153304e-06, "loss": 0.9885, "step": 1680 }, { "epoch": 0.020535507537810926, "grad_norm": 1.8426400423049927, "learning_rate": 2.0521569583231786e-06, "loss": 0.9573, "step": 1685 }, { "epoch": 0.020596443761958735, "grad_norm": 2.226689338684082, "learning_rate": 2.0582500609310264e-06, "loss": 0.9741, "step": 1690 }, { "epoch": 0.02065737998610654, "grad_norm": 2.268059492111206, "learning_rate": 2.064343163538874e-06, "loss": 0.9397, "step": 1695 }, { "epoch": 0.02071831621025435, "grad_norm": 2.3345773220062256, "learning_rate": 2.070436266146722e-06, "loss": 1.0147, "step": 1700 }, { "epoch": 0.020779252434402155, "grad_norm": 2.673659086227417, "learning_rate": 2.07652936875457e-06, "loss": 0.9812, "step": 1705 }, { "epoch": 0.02084018865854996, "grad_norm": 2.0060555934906006, "learning_rate": 2.082622471362418e-06, "loss": 0.9316, "step": 1710 }, { "epoch": 0.02090112488269777, "grad_norm": 2.6829044818878174, "learning_rate": 2.0887155739702657e-06, "loss": 1.0248, "step": 1715 }, { "epoch": 0.020962061106845575, "grad_norm": 2.10984468460083, "learning_rate": 2.0948086765781135e-06, "loss": 0.9524, "step": 1720 }, { "epoch": 0.021022997330993384, "grad_norm": 2.1611666679382324, "learning_rate": 2.1009017791859617e-06, "loss": 1.0628, "step": 1725 }, { "epoch": 0.02108393355514119, "grad_norm": 2.2003064155578613, "learning_rate": 2.1069948817938095e-06, "loss": 1.0016, "step": 1730 }, { "epoch": 0.021144869779288995, "grad_norm": 1.9106661081314087, "learning_rate": 2.1130879844016573e-06, "loss": 1.0519, "step": 1735 }, { "epoch": 0.021205806003436804, "grad_norm": 2.150825023651123, "learning_rate": 2.1191810870095055e-06, "loss": 1.005, "step": 1740 }, { "epoch": 0.02126674222758461, "grad_norm": 2.261240005493164, "learning_rate": 2.1252741896173533e-06, "loss": 1.0132, "step": 1745 }, { "epoch": 0.021327678451732418, "grad_norm": 1.7882792949676514, "learning_rate": 2.1313672922252015e-06, "loss": 1.0532, "step": 1750 }, { "epoch": 0.021388614675880224, "grad_norm": 1.8669379949569702, "learning_rate": 2.1374603948330493e-06, "loss": 1.0867, "step": 1755 }, { "epoch": 0.02144955090002803, "grad_norm": 2.442112445831299, "learning_rate": 2.143553497440897e-06, "loss": 0.971, "step": 1760 }, { "epoch": 0.021510487124175838, "grad_norm": 2.0174357891082764, "learning_rate": 2.1496466000487452e-06, "loss": 1.0891, "step": 1765 }, { "epoch": 0.021571423348323644, "grad_norm": 2.523355007171631, "learning_rate": 2.155739702656593e-06, "loss": 1.0433, "step": 1770 }, { "epoch": 0.021632359572471452, "grad_norm": 2.2308878898620605, "learning_rate": 2.161832805264441e-06, "loss": 0.9967, "step": 1775 }, { "epoch": 0.021693295796619258, "grad_norm": 2.097597599029541, "learning_rate": 2.1679259078722886e-06, "loss": 0.9913, "step": 1780 }, { "epoch": 0.021754232020767063, "grad_norm": 2.223784923553467, "learning_rate": 2.174019010480137e-06, "loss": 1.0297, "step": 1785 }, { "epoch": 0.021815168244914872, "grad_norm": 2.248760938644409, "learning_rate": 2.1801121130879846e-06, "loss": 1.0054, "step": 1790 }, { "epoch": 0.021876104469062678, "grad_norm": 1.8876302242279053, "learning_rate": 2.1862052156958324e-06, "loss": 1.0285, "step": 1795 }, { "epoch": 0.021937040693210487, "grad_norm": 2.3961939811706543, "learning_rate": 2.19229831830368e-06, "loss": 1.0813, "step": 1800 }, { "epoch": 0.021997976917358292, "grad_norm": 2.537365436553955, "learning_rate": 2.1983914209115284e-06, "loss": 1.1216, "step": 1805 }, { "epoch": 0.0220589131415061, "grad_norm": 2.44482684135437, "learning_rate": 2.204484523519376e-06, "loss": 1.0426, "step": 1810 }, { "epoch": 0.022119849365653907, "grad_norm": 2.078054189682007, "learning_rate": 2.210577626127224e-06, "loss": 0.9518, "step": 1815 }, { "epoch": 0.022180785589801712, "grad_norm": 1.8020566701889038, "learning_rate": 2.216670728735072e-06, "loss": 1.0449, "step": 1820 }, { "epoch": 0.02224172181394952, "grad_norm": 2.3515326976776123, "learning_rate": 2.22276383134292e-06, "loss": 0.9385, "step": 1825 }, { "epoch": 0.022302658038097327, "grad_norm": 2.5632505416870117, "learning_rate": 2.2288569339507677e-06, "loss": 0.9352, "step": 1830 }, { "epoch": 0.022363594262245136, "grad_norm": 2.2390480041503906, "learning_rate": 2.234950036558616e-06, "loss": 1.0353, "step": 1835 }, { "epoch": 0.02242453048639294, "grad_norm": 2.142590284347534, "learning_rate": 2.2410431391664637e-06, "loss": 1.001, "step": 1840 }, { "epoch": 0.022485466710540747, "grad_norm": 2.2709310054779053, "learning_rate": 2.247136241774312e-06, "loss": 1.0735, "step": 1845 }, { "epoch": 0.022546402934688556, "grad_norm": 2.7452664375305176, "learning_rate": 2.2532293443821597e-06, "loss": 0.9062, "step": 1850 }, { "epoch": 0.02260733915883636, "grad_norm": 2.0635597705841064, "learning_rate": 2.2593224469900075e-06, "loss": 0.9738, "step": 1855 }, { "epoch": 0.02266827538298417, "grad_norm": 2.486863613128662, "learning_rate": 2.2654155495978557e-06, "loss": 0.9751, "step": 1860 }, { "epoch": 0.022729211607131976, "grad_norm": 1.9153282642364502, "learning_rate": 2.2715086522057035e-06, "loss": 1.039, "step": 1865 }, { "epoch": 0.02279014783127978, "grad_norm": 1.9582257270812988, "learning_rate": 2.2776017548135512e-06, "loss": 0.9718, "step": 1870 }, { "epoch": 0.02285108405542759, "grad_norm": 1.9481794834136963, "learning_rate": 2.283694857421399e-06, "loss": 1.0377, "step": 1875 }, { "epoch": 0.022912020279575396, "grad_norm": 2.147998571395874, "learning_rate": 2.2897879600292472e-06, "loss": 0.9716, "step": 1880 }, { "epoch": 0.022972956503723205, "grad_norm": 2.2569825649261475, "learning_rate": 2.295881062637095e-06, "loss": 0.9275, "step": 1885 }, { "epoch": 0.02303389272787101, "grad_norm": 2.3974993228912354, "learning_rate": 2.301974165244943e-06, "loss": 1.0827, "step": 1890 }, { "epoch": 0.023094828952018816, "grad_norm": 2.653066873550415, "learning_rate": 2.3080672678527906e-06, "loss": 1.088, "step": 1895 }, { "epoch": 0.023155765176166625, "grad_norm": 2.066629409790039, "learning_rate": 2.3141603704606388e-06, "loss": 0.969, "step": 1900 }, { "epoch": 0.02321670140031443, "grad_norm": 2.0890140533447266, "learning_rate": 2.3202534730684866e-06, "loss": 1.0191, "step": 1905 }, { "epoch": 0.02327763762446224, "grad_norm": 2.199596643447876, "learning_rate": 2.3263465756763344e-06, "loss": 1.0238, "step": 1910 }, { "epoch": 0.023338573848610045, "grad_norm": 2.845881462097168, "learning_rate": 2.3324396782841826e-06, "loss": 1.0876, "step": 1915 }, { "epoch": 0.02339951007275785, "grad_norm": 1.9457749128341675, "learning_rate": 2.3385327808920303e-06, "loss": 0.9673, "step": 1920 }, { "epoch": 0.02346044629690566, "grad_norm": 2.179579257965088, "learning_rate": 2.3446258834998786e-06, "loss": 1.0253, "step": 1925 }, { "epoch": 0.023521382521053465, "grad_norm": 2.2792418003082275, "learning_rate": 2.3507189861077263e-06, "loss": 0.9836, "step": 1930 }, { "epoch": 0.023582318745201274, "grad_norm": 2.413606882095337, "learning_rate": 2.356812088715574e-06, "loss": 0.99, "step": 1935 }, { "epoch": 0.02364325496934908, "grad_norm": 1.9352936744689941, "learning_rate": 2.3629051913234223e-06, "loss": 1.004, "step": 1940 }, { "epoch": 0.023704191193496885, "grad_norm": 2.5371127128601074, "learning_rate": 2.36899829393127e-06, "loss": 1.0645, "step": 1945 }, { "epoch": 0.023765127417644694, "grad_norm": 2.115696430206299, "learning_rate": 2.375091396539118e-06, "loss": 0.912, "step": 1950 }, { "epoch": 0.0238260636417925, "grad_norm": 2.1499292850494385, "learning_rate": 2.3811844991469657e-06, "loss": 0.9439, "step": 1955 }, { "epoch": 0.023886999865940308, "grad_norm": 2.015935182571411, "learning_rate": 2.387277601754814e-06, "loss": 0.9961, "step": 1960 }, { "epoch": 0.023947936090088114, "grad_norm": 2.463473320007324, "learning_rate": 2.3933707043626617e-06, "loss": 0.9284, "step": 1965 }, { "epoch": 0.02400887231423592, "grad_norm": 2.8566136360168457, "learning_rate": 2.3994638069705094e-06, "loss": 1.0225, "step": 1970 }, { "epoch": 0.024069808538383728, "grad_norm": 2.2600295543670654, "learning_rate": 2.4055569095783572e-06, "loss": 1.0404, "step": 1975 }, { "epoch": 0.024130744762531534, "grad_norm": 2.300839900970459, "learning_rate": 2.4116500121862054e-06, "loss": 0.9755, "step": 1980 }, { "epoch": 0.024191680986679343, "grad_norm": 2.3608486652374268, "learning_rate": 2.4177431147940532e-06, "loss": 1.0406, "step": 1985 }, { "epoch": 0.024252617210827148, "grad_norm": 2.20394229888916, "learning_rate": 2.423836217401901e-06, "loss": 1.0671, "step": 1990 }, { "epoch": 0.024313553434974954, "grad_norm": 2.244903087615967, "learning_rate": 2.4299293200097492e-06, "loss": 1.0099, "step": 1995 }, { "epoch": 0.024374489659122762, "grad_norm": 2.2748048305511475, "learning_rate": 2.436022422617597e-06, "loss": 1.0229, "step": 2000 }, { "epoch": 0.024435425883270568, "grad_norm": 2.4717562198638916, "learning_rate": 2.4421155252254448e-06, "loss": 1.0012, "step": 2005 }, { "epoch": 0.024496362107418377, "grad_norm": 2.42574143409729, "learning_rate": 2.448208627833293e-06, "loss": 0.9924, "step": 2010 }, { "epoch": 0.024557298331566182, "grad_norm": 2.302643299102783, "learning_rate": 2.4543017304411408e-06, "loss": 1.0178, "step": 2015 }, { "epoch": 0.02461823455571399, "grad_norm": 1.9975855350494385, "learning_rate": 2.460394833048989e-06, "loss": 0.9582, "step": 2020 }, { "epoch": 0.024679170779861797, "grad_norm": 2.08790922164917, "learning_rate": 2.4664879356568368e-06, "loss": 1.0139, "step": 2025 }, { "epoch": 0.024740107004009602, "grad_norm": 2.1084280014038086, "learning_rate": 2.4725810382646845e-06, "loss": 1.0121, "step": 2030 }, { "epoch": 0.02480104322815741, "grad_norm": 1.9036120176315308, "learning_rate": 2.4786741408725328e-06, "loss": 1.0232, "step": 2035 }, { "epoch": 0.024861979452305217, "grad_norm": 2.0728657245635986, "learning_rate": 2.4847672434803805e-06, "loss": 1.028, "step": 2040 }, { "epoch": 0.024922915676453026, "grad_norm": 2.247019052505493, "learning_rate": 2.4908603460882283e-06, "loss": 1.0197, "step": 2045 }, { "epoch": 0.02498385190060083, "grad_norm": 1.9937857389450073, "learning_rate": 2.496953448696076e-06, "loss": 0.9958, "step": 2050 }, { "epoch": 0.025044788124748637, "grad_norm": 2.2359068393707275, "learning_rate": 2.5030465513039243e-06, "loss": 1.0056, "step": 2055 }, { "epoch": 0.025105724348896446, "grad_norm": 2.7611119747161865, "learning_rate": 2.5091396539117717e-06, "loss": 1.0512, "step": 2060 }, { "epoch": 0.02516666057304425, "grad_norm": 2.26347279548645, "learning_rate": 2.51523275651962e-06, "loss": 0.9868, "step": 2065 }, { "epoch": 0.02522759679719206, "grad_norm": 2.1319448947906494, "learning_rate": 2.521325859127468e-06, "loss": 1.0419, "step": 2070 }, { "epoch": 0.025288533021339866, "grad_norm": 2.161566734313965, "learning_rate": 2.5274189617353154e-06, "loss": 1.0449, "step": 2075 }, { "epoch": 0.02534946924548767, "grad_norm": 2.6198513507843018, "learning_rate": 2.5335120643431636e-06, "loss": 1.016, "step": 2080 }, { "epoch": 0.02541040546963548, "grad_norm": 2.075214385986328, "learning_rate": 2.539605166951012e-06, "loss": 0.9858, "step": 2085 }, { "epoch": 0.025471341693783286, "grad_norm": 1.9553757905960083, "learning_rate": 2.5456982695588596e-06, "loss": 0.9922, "step": 2090 }, { "epoch": 0.025532277917931095, "grad_norm": 2.2054555416107178, "learning_rate": 2.5517913721667074e-06, "loss": 1.0218, "step": 2095 }, { "epoch": 0.0255932141420789, "grad_norm": 2.459339141845703, "learning_rate": 2.557884474774555e-06, "loss": 1.086, "step": 2100 }, { "epoch": 0.025654150366226706, "grad_norm": 2.413055896759033, "learning_rate": 2.5639775773824034e-06, "loss": 1.0334, "step": 2105 }, { "epoch": 0.025715086590374515, "grad_norm": 2.164513111114502, "learning_rate": 2.570070679990251e-06, "loss": 0.9461, "step": 2110 }, { "epoch": 0.02577602281452232, "grad_norm": 2.196512460708618, "learning_rate": 2.576163782598099e-06, "loss": 0.9627, "step": 2115 }, { "epoch": 0.02583695903867013, "grad_norm": 2.5406501293182373, "learning_rate": 2.582256885205947e-06, "loss": 0.9803, "step": 2120 }, { "epoch": 0.025897895262817935, "grad_norm": 2.2885313034057617, "learning_rate": 2.5883499878137954e-06, "loss": 1.0012, "step": 2125 }, { "epoch": 0.02595883148696574, "grad_norm": 2.861323356628418, "learning_rate": 2.5944430904216428e-06, "loss": 1.009, "step": 2130 }, { "epoch": 0.02601976771111355, "grad_norm": 2.0133233070373535, "learning_rate": 2.600536193029491e-06, "loss": 0.9877, "step": 2135 }, { "epoch": 0.026080703935261355, "grad_norm": 2.3944664001464844, "learning_rate": 2.606629295637339e-06, "loss": 0.9398, "step": 2140 }, { "epoch": 0.026141640159409164, "grad_norm": 1.9328230619430542, "learning_rate": 2.6127223982451865e-06, "loss": 1.0294, "step": 2145 }, { "epoch": 0.02620257638355697, "grad_norm": 2.5191502571105957, "learning_rate": 2.6188155008530347e-06, "loss": 1.0483, "step": 2150 }, { "epoch": 0.026263512607704775, "grad_norm": 2.1865394115448, "learning_rate": 2.624908603460882e-06, "loss": 0.9844, "step": 2155 }, { "epoch": 0.026324448831852584, "grad_norm": 2.2650420665740967, "learning_rate": 2.6310017060687303e-06, "loss": 0.9868, "step": 2160 }, { "epoch": 0.02638538505600039, "grad_norm": 2.082469940185547, "learning_rate": 2.6370948086765785e-06, "loss": 0.9712, "step": 2165 }, { "epoch": 0.026446321280148198, "grad_norm": 1.6370609998703003, "learning_rate": 2.6431879112844263e-06, "loss": 1.0295, "step": 2170 }, { "epoch": 0.026507257504296004, "grad_norm": 2.450423240661621, "learning_rate": 2.649281013892274e-06, "loss": 0.9902, "step": 2175 }, { "epoch": 0.02656819372844381, "grad_norm": 2.084383487701416, "learning_rate": 2.6553741165001223e-06, "loss": 1.0391, "step": 2180 }, { "epoch": 0.026629129952591618, "grad_norm": 2.208310127258301, "learning_rate": 2.66146721910797e-06, "loss": 0.9392, "step": 2185 }, { "epoch": 0.026690066176739424, "grad_norm": 2.2593486309051514, "learning_rate": 2.667560321715818e-06, "loss": 0.9956, "step": 2190 }, { "epoch": 0.026751002400887233, "grad_norm": 2.1211230754852295, "learning_rate": 2.6736534243236656e-06, "loss": 1.0274, "step": 2195 }, { "epoch": 0.026811938625035038, "grad_norm": 2.053881883621216, "learning_rate": 2.679746526931514e-06, "loss": 0.9974, "step": 2200 }, { "epoch": 0.026872874849182844, "grad_norm": 2.007422685623169, "learning_rate": 2.685839629539362e-06, "loss": 0.9891, "step": 2205 }, { "epoch": 0.026933811073330653, "grad_norm": 1.9331860542297363, "learning_rate": 2.6919327321472094e-06, "loss": 1.0267, "step": 2210 }, { "epoch": 0.026994747297478458, "grad_norm": 2.2696406841278076, "learning_rate": 2.6980258347550576e-06, "loss": 1.0103, "step": 2215 }, { "epoch": 0.027055683521626267, "grad_norm": 2.225210666656494, "learning_rate": 2.704118937362906e-06, "loss": 0.953, "step": 2220 }, { "epoch": 0.027116619745774072, "grad_norm": 2.3060405254364014, "learning_rate": 2.710212039970753e-06, "loss": 0.9522, "step": 2225 }, { "epoch": 0.02717755596992188, "grad_norm": 2.1196227073669434, "learning_rate": 2.7163051425786014e-06, "loss": 0.9471, "step": 2230 }, { "epoch": 0.027238492194069687, "grad_norm": 2.9500374794006348, "learning_rate": 2.7223982451864487e-06, "loss": 1.0087, "step": 2235 }, { "epoch": 0.027299428418217492, "grad_norm": 2.12129807472229, "learning_rate": 2.728491347794297e-06, "loss": 0.9871, "step": 2240 }, { "epoch": 0.0273603646423653, "grad_norm": 2.5834388732910156, "learning_rate": 2.734584450402145e-06, "loss": 0.9116, "step": 2245 }, { "epoch": 0.027421300866513107, "grad_norm": 2.1289713382720947, "learning_rate": 2.7406775530099925e-06, "loss": 1.0013, "step": 2250 }, { "epoch": 0.027482237090660916, "grad_norm": 2.3514468669891357, "learning_rate": 2.7467706556178407e-06, "loss": 0.9569, "step": 2255 }, { "epoch": 0.02754317331480872, "grad_norm": 2.2730185985565186, "learning_rate": 2.752863758225689e-06, "loss": 0.9184, "step": 2260 }, { "epoch": 0.027604109538956527, "grad_norm": 2.072981595993042, "learning_rate": 2.7589568608335367e-06, "loss": 1.0159, "step": 2265 }, { "epoch": 0.027665045763104336, "grad_norm": 2.019702196121216, "learning_rate": 2.7650499634413845e-06, "loss": 1.0486, "step": 2270 }, { "epoch": 0.02772598198725214, "grad_norm": 1.993539810180664, "learning_rate": 2.7711430660492323e-06, "loss": 0.9554, "step": 2275 }, { "epoch": 0.02778691821139995, "grad_norm": 2.1764752864837646, "learning_rate": 2.7772361686570805e-06, "loss": 1.0287, "step": 2280 }, { "epoch": 0.027847854435547756, "grad_norm": 2.005216121673584, "learning_rate": 2.7833292712649283e-06, "loss": 1.0453, "step": 2285 }, { "epoch": 0.02790879065969556, "grad_norm": 2.33762788772583, "learning_rate": 2.789422373872776e-06, "loss": 0.998, "step": 2290 }, { "epoch": 0.02796972688384337, "grad_norm": 3.1347782611846924, "learning_rate": 2.7955154764806243e-06, "loss": 1.0677, "step": 2295 }, { "epoch": 0.028030663107991176, "grad_norm": 1.9946335554122925, "learning_rate": 2.8016085790884725e-06, "loss": 1.0027, "step": 2300 }, { "epoch": 0.028091599332138985, "grad_norm": 2.1208159923553467, "learning_rate": 2.80770168169632e-06, "loss": 0.9812, "step": 2305 }, { "epoch": 0.02815253555628679, "grad_norm": 1.9835519790649414, "learning_rate": 2.813794784304168e-06, "loss": 0.9598, "step": 2310 }, { "epoch": 0.028213471780434596, "grad_norm": 2.006824016571045, "learning_rate": 2.8198878869120162e-06, "loss": 0.9486, "step": 2315 }, { "epoch": 0.028274408004582405, "grad_norm": 2.0469579696655273, "learning_rate": 2.8259809895198636e-06, "loss": 1.1135, "step": 2320 }, { "epoch": 0.02833534422873021, "grad_norm": 2.447772264480591, "learning_rate": 2.832074092127712e-06, "loss": 0.9852, "step": 2325 }, { "epoch": 0.02839628045287802, "grad_norm": 2.177111864089966, "learning_rate": 2.838167194735559e-06, "loss": 0.9885, "step": 2330 }, { "epoch": 0.028457216677025825, "grad_norm": 2.1866142749786377, "learning_rate": 2.8442602973434074e-06, "loss": 0.999, "step": 2335 }, { "epoch": 0.02851815290117363, "grad_norm": 2.1724886894226074, "learning_rate": 2.8503533999512556e-06, "loss": 0.9771, "step": 2340 }, { "epoch": 0.02857908912532144, "grad_norm": 2.59372878074646, "learning_rate": 2.8564465025591034e-06, "loss": 0.9821, "step": 2345 }, { "epoch": 0.028640025349469245, "grad_norm": 2.251540184020996, "learning_rate": 2.862539605166951e-06, "loss": 1.0481, "step": 2350 }, { "epoch": 0.028700961573617054, "grad_norm": 2.024679660797119, "learning_rate": 2.8686327077747994e-06, "loss": 1.0369, "step": 2355 }, { "epoch": 0.02876189779776486, "grad_norm": 2.801973342895508, "learning_rate": 2.874725810382647e-06, "loss": 1.0202, "step": 2360 }, { "epoch": 0.028822834021912665, "grad_norm": 1.8953609466552734, "learning_rate": 2.880818912990495e-06, "loss": 1.0127, "step": 2365 }, { "epoch": 0.028883770246060474, "grad_norm": 2.4269275665283203, "learning_rate": 2.8869120155983427e-06, "loss": 1.0351, "step": 2370 }, { "epoch": 0.02894470647020828, "grad_norm": 2.4128365516662598, "learning_rate": 2.893005118206191e-06, "loss": 1.0943, "step": 2375 }, { "epoch": 0.029005642694356088, "grad_norm": 2.290492057800293, "learning_rate": 2.8990982208140387e-06, "loss": 0.9736, "step": 2380 }, { "epoch": 0.029066578918503894, "grad_norm": 2.244640588760376, "learning_rate": 2.9051913234218865e-06, "loss": 0.9575, "step": 2385 }, { "epoch": 0.0291275151426517, "grad_norm": 2.132124662399292, "learning_rate": 2.9112844260297347e-06, "loss": 1.099, "step": 2390 }, { "epoch": 0.029188451366799508, "grad_norm": 2.4371280670166016, "learning_rate": 2.917377528637583e-06, "loss": 0.9265, "step": 2395 }, { "epoch": 0.029249387590947314, "grad_norm": 2.1064815521240234, "learning_rate": 2.9234706312454303e-06, "loss": 0.9592, "step": 2400 }, { "epoch": 0.029310323815095123, "grad_norm": 1.7741284370422363, "learning_rate": 2.9295637338532785e-06, "loss": 0.9327, "step": 2405 }, { "epoch": 0.029371260039242928, "grad_norm": 1.9352706670761108, "learning_rate": 2.935656836461126e-06, "loss": 0.9826, "step": 2410 }, { "epoch": 0.029432196263390734, "grad_norm": 2.2833118438720703, "learning_rate": 2.941749939068974e-06, "loss": 1.0488, "step": 2415 }, { "epoch": 0.029493132487538543, "grad_norm": 2.496473789215088, "learning_rate": 2.9478430416768222e-06, "loss": 1.02, "step": 2420 }, { "epoch": 0.029554068711686348, "grad_norm": 2.04172420501709, "learning_rate": 2.9539361442846696e-06, "loss": 0.9785, "step": 2425 }, { "epoch": 0.029615004935834157, "grad_norm": 2.6524553298950195, "learning_rate": 2.960029246892518e-06, "loss": 0.9772, "step": 2430 }, { "epoch": 0.029675941159981963, "grad_norm": 2.5978951454162598, "learning_rate": 2.966122349500366e-06, "loss": 1.0005, "step": 2435 }, { "epoch": 0.02973687738412977, "grad_norm": 2.2039761543273926, "learning_rate": 2.972215452108214e-06, "loss": 0.9699, "step": 2440 }, { "epoch": 0.029797813608277577, "grad_norm": 2.4892992973327637, "learning_rate": 2.9783085547160616e-06, "loss": 1.0576, "step": 2445 }, { "epoch": 0.029858749832425382, "grad_norm": 2.1758835315704346, "learning_rate": 2.9844016573239098e-06, "loss": 1.001, "step": 2450 }, { "epoch": 0.02991968605657319, "grad_norm": 2.223872423171997, "learning_rate": 2.9904947599317576e-06, "loss": 0.9715, "step": 2455 }, { "epoch": 0.029980622280720997, "grad_norm": 2.140903949737549, "learning_rate": 2.9965878625396054e-06, "loss": 0.9753, "step": 2460 }, { "epoch": 0.030041558504868806, "grad_norm": 2.0109994411468506, "learning_rate": 3.002680965147453e-06, "loss": 0.9238, "step": 2465 }, { "epoch": 0.03010249472901661, "grad_norm": 2.1795551776885986, "learning_rate": 3.0087740677553013e-06, "loss": 1.0466, "step": 2470 }, { "epoch": 0.030163430953164417, "grad_norm": 2.6273350715637207, "learning_rate": 3.0148671703631495e-06, "loss": 0.921, "step": 2475 }, { "epoch": 0.030224367177312226, "grad_norm": 2.1251392364501953, "learning_rate": 3.020960272970997e-06, "loss": 0.9796, "step": 2480 }, { "epoch": 0.03028530340146003, "grad_norm": 1.9157634973526, "learning_rate": 3.027053375578845e-06, "loss": 0.9774, "step": 2485 }, { "epoch": 0.03034623962560784, "grad_norm": 2.1332757472991943, "learning_rate": 3.0331464781866933e-06, "loss": 1.0535, "step": 2490 }, { "epoch": 0.030407175849755646, "grad_norm": 2.014636993408203, "learning_rate": 3.0392395807945407e-06, "loss": 0.977, "step": 2495 }, { "epoch": 0.03046811207390345, "grad_norm": 1.8243730068206787, "learning_rate": 3.045332683402389e-06, "loss": 0.9672, "step": 2500 }, { "epoch": 0.03052904829805126, "grad_norm": 2.1456656455993652, "learning_rate": 3.0514257860102362e-06, "loss": 1.0901, "step": 2505 }, { "epoch": 0.030589984522199066, "grad_norm": 2.3844902515411377, "learning_rate": 3.0575188886180845e-06, "loss": 1.0052, "step": 2510 }, { "epoch": 0.030650920746346875, "grad_norm": 2.064275026321411, "learning_rate": 3.0636119912259327e-06, "loss": 1.0146, "step": 2515 }, { "epoch": 0.03071185697049468, "grad_norm": 2.1599464416503906, "learning_rate": 3.0697050938337804e-06, "loss": 1.0166, "step": 2520 }, { "epoch": 0.030772793194642486, "grad_norm": 2.661630153656006, "learning_rate": 3.0757981964416282e-06, "loss": 0.9599, "step": 2525 }, { "epoch": 0.030833729418790295, "grad_norm": 2.069490432739258, "learning_rate": 3.0818912990494764e-06, "loss": 1.0173, "step": 2530 }, { "epoch": 0.0308946656429381, "grad_norm": 2.640892267227173, "learning_rate": 3.0879844016573242e-06, "loss": 0.988, "step": 2535 }, { "epoch": 0.03095560186708591, "grad_norm": 2.03193736076355, "learning_rate": 3.094077504265172e-06, "loss": 1.0239, "step": 2540 }, { "epoch": 0.031016538091233715, "grad_norm": 2.377469301223755, "learning_rate": 3.1001706068730198e-06, "loss": 0.9354, "step": 2545 }, { "epoch": 0.03107747431538152, "grad_norm": 2.65142560005188, "learning_rate": 3.106263709480868e-06, "loss": 1.0717, "step": 2550 }, { "epoch": 0.03113841053952933, "grad_norm": 2.0322470664978027, "learning_rate": 3.1123568120887158e-06, "loss": 0.9518, "step": 2555 }, { "epoch": 0.031199346763677135, "grad_norm": 2.0424673557281494, "learning_rate": 3.1184499146965636e-06, "loss": 0.9967, "step": 2560 }, { "epoch": 0.03126028298782494, "grad_norm": 2.204331874847412, "learning_rate": 3.1245430173044118e-06, "loss": 0.9674, "step": 2565 }, { "epoch": 0.03132121921197275, "grad_norm": 1.9003984928131104, "learning_rate": 3.13063611991226e-06, "loss": 0.995, "step": 2570 }, { "epoch": 0.03138215543612056, "grad_norm": 2.1863222122192383, "learning_rate": 3.1367292225201073e-06, "loss": 0.9347, "step": 2575 }, { "epoch": 0.03144309166026836, "grad_norm": 2.2518444061279297, "learning_rate": 3.1428223251279555e-06, "loss": 0.9953, "step": 2580 }, { "epoch": 0.03150402788441617, "grad_norm": 2.4878015518188477, "learning_rate": 3.148915427735803e-06, "loss": 0.994, "step": 2585 }, { "epoch": 0.03156496410856398, "grad_norm": 2.2985970973968506, "learning_rate": 3.155008530343651e-06, "loss": 0.9518, "step": 2590 }, { "epoch": 0.03162590033271179, "grad_norm": 2.0420734882354736, "learning_rate": 3.1611016329514993e-06, "loss": 0.9676, "step": 2595 }, { "epoch": 0.03168683655685959, "grad_norm": 2.5416483879089355, "learning_rate": 3.1671947355593467e-06, "loss": 0.9706, "step": 2600 }, { "epoch": 0.0317477727810074, "grad_norm": 2.1827890872955322, "learning_rate": 3.173287838167195e-06, "loss": 0.9408, "step": 2605 }, { "epoch": 0.03180870900515521, "grad_norm": 2.0067992210388184, "learning_rate": 3.179380940775043e-06, "loss": 0.9842, "step": 2610 }, { "epoch": 0.03186964522930301, "grad_norm": 2.3223025798797607, "learning_rate": 3.185474043382891e-06, "loss": 0.982, "step": 2615 }, { "epoch": 0.03193058145345082, "grad_norm": 3.651122570037842, "learning_rate": 3.1915671459907387e-06, "loss": 0.9648, "step": 2620 }, { "epoch": 0.03199151767759863, "grad_norm": 2.686007022857666, "learning_rate": 3.197660248598587e-06, "loss": 1.034, "step": 2625 }, { "epoch": 0.03205245390174643, "grad_norm": 2.291522741317749, "learning_rate": 3.2037533512064346e-06, "loss": 0.9874, "step": 2630 }, { "epoch": 0.03211339012589424, "grad_norm": 2.33229923248291, "learning_rate": 3.2098464538142824e-06, "loss": 0.9914, "step": 2635 }, { "epoch": 0.03217432635004205, "grad_norm": 2.190917491912842, "learning_rate": 3.2159395564221302e-06, "loss": 1.0129, "step": 2640 }, { "epoch": 0.032235262574189856, "grad_norm": 2.2239022254943848, "learning_rate": 3.2220326590299784e-06, "loss": 0.999, "step": 2645 }, { "epoch": 0.03229619879833766, "grad_norm": 2.1455931663513184, "learning_rate": 3.2281257616378266e-06, "loss": 0.9349, "step": 2650 }, { "epoch": 0.03235713502248547, "grad_norm": 2.0242700576782227, "learning_rate": 3.234218864245674e-06, "loss": 0.9863, "step": 2655 }, { "epoch": 0.032418071246633276, "grad_norm": 2.432790994644165, "learning_rate": 3.240311966853522e-06, "loss": 0.9293, "step": 2660 }, { "epoch": 0.03247900747078108, "grad_norm": 2.1901357173919678, "learning_rate": 3.2464050694613704e-06, "loss": 0.9511, "step": 2665 }, { "epoch": 0.03253994369492889, "grad_norm": 2.3847720623016357, "learning_rate": 3.2524981720692178e-06, "loss": 1.0205, "step": 2670 }, { "epoch": 0.032600879919076696, "grad_norm": 1.8824608325958252, "learning_rate": 3.258591274677066e-06, "loss": 0.9679, "step": 2675 }, { "epoch": 0.0326618161432245, "grad_norm": 2.1413137912750244, "learning_rate": 3.2646843772849133e-06, "loss": 1.0113, "step": 2680 }, { "epoch": 0.03272275236737231, "grad_norm": 2.0156333446502686, "learning_rate": 3.2707774798927615e-06, "loss": 0.9764, "step": 2685 }, { "epoch": 0.032783688591520116, "grad_norm": 2.042227268218994, "learning_rate": 3.2768705825006097e-06, "loss": 1.0065, "step": 2690 }, { "epoch": 0.032844624815667925, "grad_norm": 2.3583436012268066, "learning_rate": 3.282963685108457e-06, "loss": 0.994, "step": 2695 }, { "epoch": 0.03290556103981573, "grad_norm": 2.2098002433776855, "learning_rate": 3.2890567877163053e-06, "loss": 0.9579, "step": 2700 }, { "epoch": 0.032966497263963536, "grad_norm": 2.1260464191436768, "learning_rate": 3.2951498903241535e-06, "loss": 1.1, "step": 2705 }, { "epoch": 0.033027433488111345, "grad_norm": 2.3926327228546143, "learning_rate": 3.3012429929320013e-06, "loss": 0.9708, "step": 2710 }, { "epoch": 0.03308836971225915, "grad_norm": 2.0064942836761475, "learning_rate": 3.307336095539849e-06, "loss": 1.0507, "step": 2715 }, { "epoch": 0.033149305936406956, "grad_norm": 2.268812894821167, "learning_rate": 3.313429198147697e-06, "loss": 0.9253, "step": 2720 }, { "epoch": 0.033210242160554765, "grad_norm": 2.023709774017334, "learning_rate": 3.319522300755545e-06, "loss": 1.0361, "step": 2725 }, { "epoch": 0.03327117838470257, "grad_norm": 2.151951313018799, "learning_rate": 3.325615403363393e-06, "loss": 1.0213, "step": 2730 }, { "epoch": 0.033332114608850376, "grad_norm": 1.8687371015548706, "learning_rate": 3.3317085059712406e-06, "loss": 1.0342, "step": 2735 }, { "epoch": 0.033393050832998185, "grad_norm": 2.1927433013916016, "learning_rate": 3.337801608579089e-06, "loss": 0.9852, "step": 2740 }, { "epoch": 0.033453987057145994, "grad_norm": 1.8720521926879883, "learning_rate": 3.343894711186937e-06, "loss": 0.9886, "step": 2745 }, { "epoch": 0.033514923281293796, "grad_norm": 2.1549158096313477, "learning_rate": 3.3499878137947844e-06, "loss": 1.0157, "step": 2750 }, { "epoch": 0.033575859505441605, "grad_norm": 2.244654893875122, "learning_rate": 3.3560809164026326e-06, "loss": 1.0517, "step": 2755 }, { "epoch": 0.033636795729589414, "grad_norm": 2.1617770195007324, "learning_rate": 3.362174019010481e-06, "loss": 0.9292, "step": 2760 }, { "epoch": 0.033697731953737216, "grad_norm": 2.66784405708313, "learning_rate": 3.368267121618328e-06, "loss": 0.9903, "step": 2765 }, { "epoch": 0.033758668177885025, "grad_norm": 1.7903568744659424, "learning_rate": 3.3743602242261764e-06, "loss": 0.9995, "step": 2770 }, { "epoch": 0.033819604402032834, "grad_norm": 2.0538747310638428, "learning_rate": 3.3804533268340238e-06, "loss": 0.9686, "step": 2775 }, { "epoch": 0.03388054062618064, "grad_norm": 2.491258382797241, "learning_rate": 3.386546429441872e-06, "loss": 0.984, "step": 2780 }, { "epoch": 0.033941476850328445, "grad_norm": 2.230109930038452, "learning_rate": 3.39263953204972e-06, "loss": 1.0304, "step": 2785 }, { "epoch": 0.034002413074476254, "grad_norm": 1.9637675285339355, "learning_rate": 3.398732634657568e-06, "loss": 0.9377, "step": 2790 }, { "epoch": 0.03406334929862406, "grad_norm": 2.4951913356781006, "learning_rate": 3.4048257372654157e-06, "loss": 0.974, "step": 2795 }, { "epoch": 0.034124285522771865, "grad_norm": 2.0110740661621094, "learning_rate": 3.410918839873264e-06, "loss": 0.9544, "step": 2800 }, { "epoch": 0.034185221746919674, "grad_norm": 2.1786813735961914, "learning_rate": 3.4170119424811117e-06, "loss": 1.039, "step": 2805 }, { "epoch": 0.03424615797106748, "grad_norm": 2.1144351959228516, "learning_rate": 3.4231050450889595e-06, "loss": 0.9352, "step": 2810 }, { "epoch": 0.034307094195215285, "grad_norm": 2.158630609512329, "learning_rate": 3.4291981476968073e-06, "loss": 0.9903, "step": 2815 }, { "epoch": 0.034368030419363094, "grad_norm": 2.352821111679077, "learning_rate": 3.4352912503046555e-06, "loss": 0.9823, "step": 2820 }, { "epoch": 0.0344289666435109, "grad_norm": 2.532902479171753, "learning_rate": 3.4413843529125037e-06, "loss": 1.0158, "step": 2825 }, { "epoch": 0.03448990286765871, "grad_norm": 2.298511505126953, "learning_rate": 3.447477455520351e-06, "loss": 0.9391, "step": 2830 }, { "epoch": 0.034550839091806514, "grad_norm": 2.0837512016296387, "learning_rate": 3.4535705581281993e-06, "loss": 1.0496, "step": 2835 }, { "epoch": 0.03461177531595432, "grad_norm": 2.0044009685516357, "learning_rate": 3.4596636607360475e-06, "loss": 0.9733, "step": 2840 }, { "epoch": 0.03467271154010213, "grad_norm": 2.34397554397583, "learning_rate": 3.465756763343895e-06, "loss": 0.9641, "step": 2845 }, { "epoch": 0.034733647764249934, "grad_norm": 2.3731324672698975, "learning_rate": 3.471849865951743e-06, "loss": 0.9813, "step": 2850 }, { "epoch": 0.03479458398839774, "grad_norm": 2.3944308757781982, "learning_rate": 3.4779429685595904e-06, "loss": 1.0385, "step": 2855 }, { "epoch": 0.03485552021254555, "grad_norm": 2.4269092082977295, "learning_rate": 3.4840360711674386e-06, "loss": 0.9797, "step": 2860 }, { "epoch": 0.034916456436693354, "grad_norm": 2.1041364669799805, "learning_rate": 3.490129173775287e-06, "loss": 1.0103, "step": 2865 }, { "epoch": 0.03497739266084116, "grad_norm": 2.58022141456604, "learning_rate": 3.496222276383134e-06, "loss": 0.9329, "step": 2870 }, { "epoch": 0.03503832888498897, "grad_norm": 2.234161615371704, "learning_rate": 3.5023153789909824e-06, "loss": 0.9818, "step": 2875 }, { "epoch": 0.03509926510913678, "grad_norm": 2.429107666015625, "learning_rate": 3.5084084815988306e-06, "loss": 0.9905, "step": 2880 }, { "epoch": 0.03516020133328458, "grad_norm": 2.108717918395996, "learning_rate": 3.5145015842066784e-06, "loss": 0.9603, "step": 2885 }, { "epoch": 0.03522113755743239, "grad_norm": 2.0085041522979736, "learning_rate": 3.520594686814526e-06, "loss": 0.9192, "step": 2890 }, { "epoch": 0.0352820737815802, "grad_norm": 1.8027620315551758, "learning_rate": 3.526687789422374e-06, "loss": 0.9575, "step": 2895 }, { "epoch": 0.035343010005728, "grad_norm": 2.303748369216919, "learning_rate": 3.532780892030222e-06, "loss": 0.9238, "step": 2900 }, { "epoch": 0.03540394622987581, "grad_norm": 2.056295871734619, "learning_rate": 3.53887399463807e-06, "loss": 0.9381, "step": 2905 }, { "epoch": 0.03546488245402362, "grad_norm": 2.2710018157958984, "learning_rate": 3.5449670972459177e-06, "loss": 0.9349, "step": 2910 }, { "epoch": 0.03552581867817142, "grad_norm": 1.9540404081344604, "learning_rate": 3.551060199853766e-06, "loss": 0.9291, "step": 2915 }, { "epoch": 0.03558675490231923, "grad_norm": 2.2747464179992676, "learning_rate": 3.557153302461614e-06, "loss": 1.0063, "step": 2920 }, { "epoch": 0.03564769112646704, "grad_norm": 2.1931777000427246, "learning_rate": 3.5632464050694615e-06, "loss": 0.9728, "step": 2925 }, { "epoch": 0.03570862735061485, "grad_norm": 2.314342498779297, "learning_rate": 3.5693395076773097e-06, "loss": 0.9876, "step": 2930 }, { "epoch": 0.03576956357476265, "grad_norm": 2.2723426818847656, "learning_rate": 3.575432610285158e-06, "loss": 1.0018, "step": 2935 }, { "epoch": 0.03583049979891046, "grad_norm": 1.9077463150024414, "learning_rate": 3.5815257128930053e-06, "loss": 0.9327, "step": 2940 }, { "epoch": 0.03589143602305827, "grad_norm": 2.585850477218628, "learning_rate": 3.5876188155008535e-06, "loss": 0.9303, "step": 2945 }, { "epoch": 0.03595237224720607, "grad_norm": 2.1230101585388184, "learning_rate": 3.593711918108701e-06, "loss": 0.9773, "step": 2950 }, { "epoch": 0.03601330847135388, "grad_norm": 2.7341465950012207, "learning_rate": 3.599805020716549e-06, "loss": 0.9863, "step": 2955 }, { "epoch": 0.03607424469550169, "grad_norm": 2.132169723510742, "learning_rate": 3.6058981233243972e-06, "loss": 0.9669, "step": 2960 }, { "epoch": 0.0361351809196495, "grad_norm": 2.360743761062622, "learning_rate": 3.611991225932245e-06, "loss": 0.9842, "step": 2965 }, { "epoch": 0.0361961171437973, "grad_norm": 2.1437182426452637, "learning_rate": 3.618084328540093e-06, "loss": 1.0245, "step": 2970 }, { "epoch": 0.03625705336794511, "grad_norm": 1.8826771974563599, "learning_rate": 3.624177431147941e-06, "loss": 0.9384, "step": 2975 }, { "epoch": 0.03631798959209292, "grad_norm": 2.406168222427368, "learning_rate": 3.630270533755789e-06, "loss": 0.9117, "step": 2980 }, { "epoch": 0.03637892581624072, "grad_norm": 2.2204556465148926, "learning_rate": 3.6363636363636366e-06, "loss": 0.9404, "step": 2985 }, { "epoch": 0.03643986204038853, "grad_norm": 2.2893309593200684, "learning_rate": 3.6424567389714844e-06, "loss": 0.9512, "step": 2990 }, { "epoch": 0.03650079826453634, "grad_norm": 2.4335930347442627, "learning_rate": 3.6485498415793326e-06, "loss": 1.036, "step": 2995 }, { "epoch": 0.03656173448868414, "grad_norm": 2.0619916915893555, "learning_rate": 3.6546429441871808e-06, "loss": 0.9966, "step": 3000 }, { "epoch": 0.03662267071283195, "grad_norm": 2.289220094680786, "learning_rate": 3.660736046795028e-06, "loss": 0.9981, "step": 3005 }, { "epoch": 0.03668360693697976, "grad_norm": 2.206705093383789, "learning_rate": 3.6668291494028763e-06, "loss": 0.9815, "step": 3010 }, { "epoch": 0.03674454316112757, "grad_norm": 2.141752004623413, "learning_rate": 3.6729222520107246e-06, "loss": 0.9383, "step": 3015 }, { "epoch": 0.03680547938527537, "grad_norm": 2.2266435623168945, "learning_rate": 3.679015354618572e-06, "loss": 0.9323, "step": 3020 }, { "epoch": 0.03686641560942318, "grad_norm": 2.0401322841644287, "learning_rate": 3.68510845722642e-06, "loss": 1.0154, "step": 3025 }, { "epoch": 0.03692735183357099, "grad_norm": 2.1146633625030518, "learning_rate": 3.6912015598342675e-06, "loss": 1.0059, "step": 3030 }, { "epoch": 0.03698828805771879, "grad_norm": 2.3096938133239746, "learning_rate": 3.6972946624421157e-06, "loss": 0.9887, "step": 3035 }, { "epoch": 0.0370492242818666, "grad_norm": 2.2103381156921387, "learning_rate": 3.703387765049964e-06, "loss": 1.0144, "step": 3040 }, { "epoch": 0.03711016050601441, "grad_norm": 2.1669387817382812, "learning_rate": 3.7094808676578113e-06, "loss": 0.9299, "step": 3045 }, { "epoch": 0.03717109673016221, "grad_norm": 2.164634943008423, "learning_rate": 3.7155739702656595e-06, "loss": 1.021, "step": 3050 }, { "epoch": 0.03723203295431002, "grad_norm": 2.0261154174804688, "learning_rate": 3.7216670728735077e-06, "loss": 0.9896, "step": 3055 }, { "epoch": 0.03729296917845783, "grad_norm": 1.8775279521942139, "learning_rate": 3.7277601754813555e-06, "loss": 0.9747, "step": 3060 }, { "epoch": 0.037353905402605636, "grad_norm": 2.473219633102417, "learning_rate": 3.7338532780892032e-06, "loss": 0.9242, "step": 3065 }, { "epoch": 0.03741484162675344, "grad_norm": 1.9203826189041138, "learning_rate": 3.7399463806970514e-06, "loss": 0.9374, "step": 3070 }, { "epoch": 0.03747577785090125, "grad_norm": 2.250211238861084, "learning_rate": 3.7460394833048992e-06, "loss": 1.0209, "step": 3075 }, { "epoch": 0.037536714075049056, "grad_norm": 2.055544137954712, "learning_rate": 3.752132585912747e-06, "loss": 0.9072, "step": 3080 }, { "epoch": 0.03759765029919686, "grad_norm": 2.0352957248687744, "learning_rate": 3.758225688520595e-06, "loss": 0.9192, "step": 3085 }, { "epoch": 0.03765858652334467, "grad_norm": 2.1021058559417725, "learning_rate": 3.764318791128443e-06, "loss": 1.0446, "step": 3090 }, { "epoch": 0.037719522747492476, "grad_norm": 2.660313844680786, "learning_rate": 3.770411893736291e-06, "loss": 0.9702, "step": 3095 }, { "epoch": 0.03778045897164028, "grad_norm": 2.2911903858184814, "learning_rate": 3.7765049963441386e-06, "loss": 0.9691, "step": 3100 }, { "epoch": 0.03784139519578809, "grad_norm": 2.2695930004119873, "learning_rate": 3.7825980989519868e-06, "loss": 0.9154, "step": 3105 }, { "epoch": 0.037902331419935896, "grad_norm": 1.9351710081100464, "learning_rate": 3.788691201559835e-06, "loss": 0.8786, "step": 3110 }, { "epoch": 0.037963267644083705, "grad_norm": 2.088778495788574, "learning_rate": 3.7947843041676823e-06, "loss": 1.0082, "step": 3115 }, { "epoch": 0.03802420386823151, "grad_norm": 2.285645008087158, "learning_rate": 3.8008774067755305e-06, "loss": 0.9845, "step": 3120 }, { "epoch": 0.038085140092379316, "grad_norm": 2.01654314994812, "learning_rate": 3.806970509383378e-06, "loss": 0.8997, "step": 3125 }, { "epoch": 0.038146076316527125, "grad_norm": 2.558713436126709, "learning_rate": 3.813063611991226e-06, "loss": 0.9875, "step": 3130 }, { "epoch": 0.03820701254067493, "grad_norm": 2.257852792739868, "learning_rate": 3.819156714599074e-06, "loss": 0.9713, "step": 3135 }, { "epoch": 0.038267948764822736, "grad_norm": 1.8502939939498901, "learning_rate": 3.825249817206922e-06, "loss": 0.8821, "step": 3140 }, { "epoch": 0.038328884988970545, "grad_norm": 2.228125810623169, "learning_rate": 3.83134291981477e-06, "loss": 1.0087, "step": 3145 }, { "epoch": 0.03838982121311835, "grad_norm": 3.0821213722229004, "learning_rate": 3.837436022422618e-06, "loss": 0.9881, "step": 3150 }, { "epoch": 0.038450757437266156, "grad_norm": 2.1935205459594727, "learning_rate": 3.8435291250304655e-06, "loss": 0.9673, "step": 3155 }, { "epoch": 0.038511693661413965, "grad_norm": 1.9775300025939941, "learning_rate": 3.849622227638314e-06, "loss": 0.9787, "step": 3160 }, { "epoch": 0.038572629885561774, "grad_norm": 2.480847120285034, "learning_rate": 3.855715330246161e-06, "loss": 1.012, "step": 3165 }, { "epoch": 0.038633566109709576, "grad_norm": 2.165637493133545, "learning_rate": 3.861808432854009e-06, "loss": 0.9519, "step": 3170 }, { "epoch": 0.038694502333857385, "grad_norm": 2.1376986503601074, "learning_rate": 3.8679015354618574e-06, "loss": 0.9286, "step": 3175 }, { "epoch": 0.038755438558005194, "grad_norm": 1.9706858396530151, "learning_rate": 3.873994638069705e-06, "loss": 1.0187, "step": 3180 }, { "epoch": 0.038816374782152996, "grad_norm": 2.641906499862671, "learning_rate": 3.880087740677553e-06, "loss": 0.9696, "step": 3185 }, { "epoch": 0.038877311006300805, "grad_norm": 1.7628661394119263, "learning_rate": 3.886180843285401e-06, "loss": 0.903, "step": 3190 }, { "epoch": 0.038938247230448614, "grad_norm": 2.5265820026397705, "learning_rate": 3.892273945893249e-06, "loss": 1.0017, "step": 3195 }, { "epoch": 0.03899918345459642, "grad_norm": 2.3875532150268555, "learning_rate": 3.898367048501097e-06, "loss": 1.0285, "step": 3200 }, { "epoch": 0.039060119678744225, "grad_norm": 2.105201244354248, "learning_rate": 3.904460151108945e-06, "loss": 0.9493, "step": 3205 }, { "epoch": 0.039121055902892034, "grad_norm": 2.154881000518799, "learning_rate": 3.910553253716793e-06, "loss": 1.0237, "step": 3210 }, { "epoch": 0.03918199212703984, "grad_norm": 2.0469555854797363, "learning_rate": 3.9166463563246405e-06, "loss": 0.9583, "step": 3215 }, { "epoch": 0.039242928351187645, "grad_norm": 2.2383177280426025, "learning_rate": 3.922739458932489e-06, "loss": 0.9783, "step": 3220 }, { "epoch": 0.039303864575335454, "grad_norm": 2.275247812271118, "learning_rate": 3.928832561540337e-06, "loss": 0.9776, "step": 3225 }, { "epoch": 0.03936480079948326, "grad_norm": 2.1496176719665527, "learning_rate": 3.934925664148185e-06, "loss": 0.9478, "step": 3230 }, { "epoch": 0.039425737023631065, "grad_norm": 2.1215627193450928, "learning_rate": 3.9410187667560325e-06, "loss": 0.9708, "step": 3235 }, { "epoch": 0.039486673247778874, "grad_norm": 2.2623047828674316, "learning_rate": 3.947111869363881e-06, "loss": 0.9225, "step": 3240 }, { "epoch": 0.03954760947192668, "grad_norm": 2.3199453353881836, "learning_rate": 3.953204971971729e-06, "loss": 0.9684, "step": 3245 }, { "epoch": 0.03960854569607449, "grad_norm": 2.0688130855560303, "learning_rate": 3.959298074579576e-06, "loss": 0.9094, "step": 3250 }, { "epoch": 0.039669481920222294, "grad_norm": 2.1390907764434814, "learning_rate": 3.9653911771874245e-06, "loss": 1.0492, "step": 3255 }, { "epoch": 0.0397304181443701, "grad_norm": 3.4336488246917725, "learning_rate": 3.971484279795272e-06, "loss": 0.9865, "step": 3260 }, { "epoch": 0.03979135436851791, "grad_norm": 2.2044265270233154, "learning_rate": 3.97757738240312e-06, "loss": 1.0054, "step": 3265 }, { "epoch": 0.039852290592665714, "grad_norm": 2.0768051147460938, "learning_rate": 3.983670485010968e-06, "loss": 0.9568, "step": 3270 }, { "epoch": 0.03991322681681352, "grad_norm": 2.945565700531006, "learning_rate": 3.989763587618816e-06, "loss": 1.0513, "step": 3275 }, { "epoch": 0.03997416304096133, "grad_norm": 2.451591730117798, "learning_rate": 3.995856690226664e-06, "loss": 0.9157, "step": 3280 }, { "epoch": 0.040035099265109134, "grad_norm": 2.8215837478637695, "learning_rate": 4.001949792834512e-06, "loss": 0.9347, "step": 3285 }, { "epoch": 0.04009603548925694, "grad_norm": 2.440276861190796, "learning_rate": 4.008042895442359e-06, "loss": 0.9701, "step": 3290 }, { "epoch": 0.04015697171340475, "grad_norm": 2.1598923206329346, "learning_rate": 4.014135998050208e-06, "loss": 1.0544, "step": 3295 }, { "epoch": 0.04021790793755256, "grad_norm": 2.3218212127685547, "learning_rate": 4.020229100658055e-06, "loss": 0.9719, "step": 3300 }, { "epoch": 0.04027884416170036, "grad_norm": 1.8723444938659668, "learning_rate": 4.026322203265903e-06, "loss": 1.0027, "step": 3305 }, { "epoch": 0.04033978038584817, "grad_norm": 1.8552546501159668, "learning_rate": 4.032415305873751e-06, "loss": 0.9363, "step": 3310 }, { "epoch": 0.04040071660999598, "grad_norm": 2.10253643989563, "learning_rate": 4.038508408481599e-06, "loss": 0.9209, "step": 3315 }, { "epoch": 0.04046165283414378, "grad_norm": 2.3745534420013428, "learning_rate": 4.044601511089447e-06, "loss": 0.9547, "step": 3320 }, { "epoch": 0.04052258905829159, "grad_norm": 2.439772129058838, "learning_rate": 4.050694613697295e-06, "loss": 0.8955, "step": 3325 }, { "epoch": 0.0405835252824394, "grad_norm": 2.256240129470825, "learning_rate": 4.0567877163051425e-06, "loss": 0.9474, "step": 3330 }, { "epoch": 0.0406444615065872, "grad_norm": 2.1677842140197754, "learning_rate": 4.062880818912991e-06, "loss": 0.9603, "step": 3335 }, { "epoch": 0.04070539773073501, "grad_norm": 2.069974184036255, "learning_rate": 4.068973921520838e-06, "loss": 0.9515, "step": 3340 }, { "epoch": 0.04076633395488282, "grad_norm": 2.3586792945861816, "learning_rate": 4.075067024128686e-06, "loss": 0.9773, "step": 3345 }, { "epoch": 0.04082727017903063, "grad_norm": 2.0715208053588867, "learning_rate": 4.0811601267365345e-06, "loss": 0.8471, "step": 3350 }, { "epoch": 0.04088820640317843, "grad_norm": 2.0356411933898926, "learning_rate": 4.087253229344382e-06, "loss": 0.9804, "step": 3355 }, { "epoch": 0.04094914262732624, "grad_norm": 2.269951343536377, "learning_rate": 4.09334633195223e-06, "loss": 1.0227, "step": 3360 }, { "epoch": 0.04101007885147405, "grad_norm": 2.079028367996216, "learning_rate": 4.099439434560078e-06, "loss": 0.9994, "step": 3365 }, { "epoch": 0.04107101507562185, "grad_norm": 1.918954610824585, "learning_rate": 4.1055325371679265e-06, "loss": 1.0055, "step": 3370 }, { "epoch": 0.04113195129976966, "grad_norm": 2.0549657344818115, "learning_rate": 4.111625639775774e-06, "loss": 0.9906, "step": 3375 }, { "epoch": 0.04119288752391747, "grad_norm": 1.8185443878173828, "learning_rate": 4.117718742383622e-06, "loss": 0.9213, "step": 3380 }, { "epoch": 0.04125382374806527, "grad_norm": 2.0990259647369385, "learning_rate": 4.12381184499147e-06, "loss": 1.0481, "step": 3385 }, { "epoch": 0.04131475997221308, "grad_norm": 2.401463747024536, "learning_rate": 4.129904947599318e-06, "loss": 0.9465, "step": 3390 }, { "epoch": 0.04137569619636089, "grad_norm": 2.0259780883789062, "learning_rate": 4.135998050207166e-06, "loss": 0.9387, "step": 3395 }, { "epoch": 0.0414366324205087, "grad_norm": 2.1218369007110596, "learning_rate": 4.142091152815014e-06, "loss": 0.987, "step": 3400 }, { "epoch": 0.0414975686446565, "grad_norm": 2.532149076461792, "learning_rate": 4.148184255422862e-06, "loss": 0.9658, "step": 3405 }, { "epoch": 0.04155850486880431, "grad_norm": 2.0243899822235107, "learning_rate": 4.15427735803071e-06, "loss": 0.9279, "step": 3410 }, { "epoch": 0.04161944109295212, "grad_norm": 2.139150857925415, "learning_rate": 4.160370460638558e-06, "loss": 0.9644, "step": 3415 }, { "epoch": 0.04168037731709992, "grad_norm": 2.559588670730591, "learning_rate": 4.166463563246406e-06, "loss": 1.0369, "step": 3420 }, { "epoch": 0.04174131354124773, "grad_norm": 2.229896068572998, "learning_rate": 4.172556665854253e-06, "loss": 0.9585, "step": 3425 }, { "epoch": 0.04180224976539554, "grad_norm": 2.276395559310913, "learning_rate": 4.178649768462102e-06, "loss": 0.9451, "step": 3430 }, { "epoch": 0.04186318598954335, "grad_norm": 2.401604652404785, "learning_rate": 4.184742871069949e-06, "loss": 0.9439, "step": 3435 }, { "epoch": 0.04192412221369115, "grad_norm": 2.007479429244995, "learning_rate": 4.190835973677797e-06, "loss": 0.9411, "step": 3440 }, { "epoch": 0.04198505843783896, "grad_norm": 1.9013057947158813, "learning_rate": 4.196929076285645e-06, "loss": 0.8485, "step": 3445 }, { "epoch": 0.04204599466198677, "grad_norm": 2.474655866622925, "learning_rate": 4.203022178893493e-06, "loss": 0.9546, "step": 3450 }, { "epoch": 0.04210693088613457, "grad_norm": 1.9308233261108398, "learning_rate": 4.209115281501341e-06, "loss": 0.8895, "step": 3455 }, { "epoch": 0.04216786711028238, "grad_norm": 2.2704524993896484, "learning_rate": 4.215208384109189e-06, "loss": 0.9328, "step": 3460 }, { "epoch": 0.04222880333443019, "grad_norm": 1.872929334640503, "learning_rate": 4.2213014867170365e-06, "loss": 0.9621, "step": 3465 }, { "epoch": 0.04228973955857799, "grad_norm": 2.5429117679595947, "learning_rate": 4.227394589324885e-06, "loss": 0.9638, "step": 3470 }, { "epoch": 0.0423506757827258, "grad_norm": 2.2186617851257324, "learning_rate": 4.233487691932732e-06, "loss": 1.0673, "step": 3475 }, { "epoch": 0.04241161200687361, "grad_norm": 2.8176636695861816, "learning_rate": 4.23958079454058e-06, "loss": 0.9134, "step": 3480 }, { "epoch": 0.042472548231021416, "grad_norm": 2.2745935916900635, "learning_rate": 4.2456738971484285e-06, "loss": 0.9246, "step": 3485 }, { "epoch": 0.04253348445516922, "grad_norm": 1.8617099523544312, "learning_rate": 4.251766999756276e-06, "loss": 1.0078, "step": 3490 }, { "epoch": 0.04259442067931703, "grad_norm": 2.209038496017456, "learning_rate": 4.257860102364124e-06, "loss": 0.9355, "step": 3495 }, { "epoch": 0.042655356903464836, "grad_norm": 2.273878335952759, "learning_rate": 4.263953204971972e-06, "loss": 0.9992, "step": 3500 }, { "epoch": 0.04271629312761264, "grad_norm": 2.066422462463379, "learning_rate": 4.27004630757982e-06, "loss": 0.9399, "step": 3505 }, { "epoch": 0.04277722935176045, "grad_norm": 2.0601515769958496, "learning_rate": 4.276139410187668e-06, "loss": 0.9943, "step": 3510 }, { "epoch": 0.042838165575908256, "grad_norm": 1.8384459018707275, "learning_rate": 4.282232512795515e-06, "loss": 0.9447, "step": 3515 }, { "epoch": 0.04289910180005606, "grad_norm": 1.9631686210632324, "learning_rate": 4.288325615403363e-06, "loss": 0.9973, "step": 3520 }, { "epoch": 0.04296003802420387, "grad_norm": 1.9385024309158325, "learning_rate": 4.294418718011212e-06, "loss": 1.0155, "step": 3525 }, { "epoch": 0.043020974248351676, "grad_norm": 1.882185935974121, "learning_rate": 4.300511820619059e-06, "loss": 0.8218, "step": 3530 }, { "epoch": 0.043081910472499485, "grad_norm": 2.090304374694824, "learning_rate": 4.306604923226907e-06, "loss": 0.9563, "step": 3535 }, { "epoch": 0.04314284669664729, "grad_norm": 1.9396369457244873, "learning_rate": 4.312698025834755e-06, "loss": 0.9794, "step": 3540 }, { "epoch": 0.043203782920795096, "grad_norm": 2.146796941757202, "learning_rate": 4.3187911284426036e-06, "loss": 0.9502, "step": 3545 }, { "epoch": 0.043264719144942905, "grad_norm": 1.952165126800537, "learning_rate": 4.324884231050451e-06, "loss": 1.0092, "step": 3550 }, { "epoch": 0.04332565536909071, "grad_norm": 2.328916072845459, "learning_rate": 4.330977333658299e-06, "loss": 0.8615, "step": 3555 }, { "epoch": 0.043386591593238516, "grad_norm": 2.469700574874878, "learning_rate": 4.337070436266147e-06, "loss": 0.9277, "step": 3560 }, { "epoch": 0.043447527817386325, "grad_norm": 2.1600749492645264, "learning_rate": 4.343163538873995e-06, "loss": 0.9856, "step": 3565 }, { "epoch": 0.04350846404153413, "grad_norm": 2.405890703201294, "learning_rate": 4.349256641481843e-06, "loss": 0.9306, "step": 3570 }, { "epoch": 0.043569400265681936, "grad_norm": 2.08870792388916, "learning_rate": 4.355349744089691e-06, "loss": 1.003, "step": 3575 }, { "epoch": 0.043630336489829745, "grad_norm": 2.2221837043762207, "learning_rate": 4.361442846697539e-06, "loss": 1.0031, "step": 3580 }, { "epoch": 0.043691272713977554, "grad_norm": 3.0048961639404297, "learning_rate": 4.367535949305387e-06, "loss": 0.9744, "step": 3585 }, { "epoch": 0.043752208938125356, "grad_norm": 2.6521992683410645, "learning_rate": 4.373629051913235e-06, "loss": 0.9481, "step": 3590 }, { "epoch": 0.043813145162273165, "grad_norm": 2.0681607723236084, "learning_rate": 4.379722154521083e-06, "loss": 1.0276, "step": 3595 }, { "epoch": 0.043874081386420974, "grad_norm": 2.2887091636657715, "learning_rate": 4.3858152571289305e-06, "loss": 0.9435, "step": 3600 }, { "epoch": 0.043935017610568776, "grad_norm": 2.272735357284546, "learning_rate": 4.391908359736779e-06, "loss": 0.9635, "step": 3605 }, { "epoch": 0.043995953834716585, "grad_norm": 2.1049270629882812, "learning_rate": 4.398001462344626e-06, "loss": 1.0501, "step": 3610 }, { "epoch": 0.044056890058864394, "grad_norm": 2.3685097694396973, "learning_rate": 4.404094564952474e-06, "loss": 0.9942, "step": 3615 }, { "epoch": 0.0441178262830122, "grad_norm": 1.8907880783081055, "learning_rate": 4.4101876675603224e-06, "loss": 0.9678, "step": 3620 }, { "epoch": 0.044178762507160005, "grad_norm": 2.6476292610168457, "learning_rate": 4.41628077016817e-06, "loss": 1.0011, "step": 3625 }, { "epoch": 0.044239698731307814, "grad_norm": 2.0885040760040283, "learning_rate": 4.422373872776018e-06, "loss": 0.9535, "step": 3630 }, { "epoch": 0.04430063495545562, "grad_norm": 1.9568232297897339, "learning_rate": 4.428466975383866e-06, "loss": 0.9886, "step": 3635 }, { "epoch": 0.044361571179603425, "grad_norm": 1.8715626001358032, "learning_rate": 4.4345600779917136e-06, "loss": 0.9594, "step": 3640 }, { "epoch": 0.044422507403751234, "grad_norm": 2.3019094467163086, "learning_rate": 4.440653180599562e-06, "loss": 1.0739, "step": 3645 }, { "epoch": 0.04448344362789904, "grad_norm": 1.8217363357543945, "learning_rate": 4.446746283207409e-06, "loss": 0.9356, "step": 3650 }, { "epoch": 0.044544379852046845, "grad_norm": 2.1607441902160645, "learning_rate": 4.452839385815257e-06, "loss": 0.9994, "step": 3655 }, { "epoch": 0.044605316076194654, "grad_norm": 2.000795841217041, "learning_rate": 4.4589324884231056e-06, "loss": 0.9906, "step": 3660 }, { "epoch": 0.04466625230034246, "grad_norm": 2.0633347034454346, "learning_rate": 4.465025591030953e-06, "loss": 1.0084, "step": 3665 }, { "epoch": 0.04472718852449027, "grad_norm": 2.169811964035034, "learning_rate": 4.471118693638801e-06, "loss": 0.9656, "step": 3670 }, { "epoch": 0.044788124748638074, "grad_norm": 2.228743314743042, "learning_rate": 4.477211796246649e-06, "loss": 1.0063, "step": 3675 }, { "epoch": 0.04484906097278588, "grad_norm": 2.2212271690368652, "learning_rate": 4.483304898854497e-06, "loss": 0.9923, "step": 3680 }, { "epoch": 0.04490999719693369, "grad_norm": 1.7021006345748901, "learning_rate": 4.489398001462345e-06, "loss": 0.9227, "step": 3685 }, { "epoch": 0.044970933421081494, "grad_norm": 2.326497793197632, "learning_rate": 4.495491104070193e-06, "loss": 1.0184, "step": 3690 }, { "epoch": 0.0450318696452293, "grad_norm": 2.181628942489624, "learning_rate": 4.5015842066780405e-06, "loss": 1.0272, "step": 3695 }, { "epoch": 0.04509280586937711, "grad_norm": 2.048267126083374, "learning_rate": 4.507677309285889e-06, "loss": 1.0355, "step": 3700 }, { "epoch": 0.045153742093524914, "grad_norm": 2.1785640716552734, "learning_rate": 4.513770411893736e-06, "loss": 0.9976, "step": 3705 }, { "epoch": 0.04521467831767272, "grad_norm": 2.026219606399536, "learning_rate": 4.519863514501584e-06, "loss": 0.9107, "step": 3710 }, { "epoch": 0.04527561454182053, "grad_norm": 2.070739507675171, "learning_rate": 4.5259566171094324e-06, "loss": 1.0247, "step": 3715 }, { "epoch": 0.04533655076596834, "grad_norm": 1.9066191911697388, "learning_rate": 4.532049719717281e-06, "loss": 0.9431, "step": 3720 }, { "epoch": 0.04539748699011614, "grad_norm": 2.221503734588623, "learning_rate": 4.538142822325128e-06, "loss": 0.9686, "step": 3725 }, { "epoch": 0.04545842321426395, "grad_norm": 2.0166754722595215, "learning_rate": 4.544235924932976e-06, "loss": 0.9065, "step": 3730 }, { "epoch": 0.04551935943841176, "grad_norm": 1.8901559114456177, "learning_rate": 4.550329027540824e-06, "loss": 0.9438, "step": 3735 }, { "epoch": 0.04558029566255956, "grad_norm": 2.1153724193573, "learning_rate": 4.556422130148672e-06, "loss": 0.8701, "step": 3740 }, { "epoch": 0.04564123188670737, "grad_norm": 2.3358066082000732, "learning_rate": 4.56251523275652e-06, "loss": 0.8961, "step": 3745 }, { "epoch": 0.04570216811085518, "grad_norm": 1.9369031190872192, "learning_rate": 4.568608335364368e-06, "loss": 1.006, "step": 3750 }, { "epoch": 0.04576310433500298, "grad_norm": 2.06307315826416, "learning_rate": 4.574701437972216e-06, "loss": 0.8988, "step": 3755 }, { "epoch": 0.04582404055915079, "grad_norm": 2.153454065322876, "learning_rate": 4.580794540580064e-06, "loss": 1.0502, "step": 3760 }, { "epoch": 0.0458849767832986, "grad_norm": 2.084298610687256, "learning_rate": 4.586887643187912e-06, "loss": 1.022, "step": 3765 }, { "epoch": 0.04594591300744641, "grad_norm": 2.174589157104492, "learning_rate": 4.59298074579576e-06, "loss": 0.9816, "step": 3770 }, { "epoch": 0.04600684923159421, "grad_norm": 1.9926854372024536, "learning_rate": 4.5990738484036075e-06, "loss": 0.9425, "step": 3775 }, { "epoch": 0.04606778545574202, "grad_norm": 2.3530213832855225, "learning_rate": 4.605166951011456e-06, "loss": 0.9703, "step": 3780 }, { "epoch": 0.04612872167988983, "grad_norm": 2.0010502338409424, "learning_rate": 4.611260053619303e-06, "loss": 0.9165, "step": 3785 }, { "epoch": 0.04618965790403763, "grad_norm": 2.1420202255249023, "learning_rate": 4.617353156227151e-06, "loss": 0.9953, "step": 3790 }, { "epoch": 0.04625059412818544, "grad_norm": 2.3444788455963135, "learning_rate": 4.6234462588349995e-06, "loss": 1.0119, "step": 3795 }, { "epoch": 0.04631153035233325, "grad_norm": 2.2890501022338867, "learning_rate": 4.629539361442847e-06, "loss": 0.9498, "step": 3800 }, { "epoch": 0.04637246657648105, "grad_norm": 2.58795166015625, "learning_rate": 4.635632464050695e-06, "loss": 0.927, "step": 3805 }, { "epoch": 0.04643340280062886, "grad_norm": 2.1497702598571777, "learning_rate": 4.641725566658543e-06, "loss": 0.9105, "step": 3810 }, { "epoch": 0.04649433902477667, "grad_norm": 2.007972240447998, "learning_rate": 4.647818669266391e-06, "loss": 0.9023, "step": 3815 }, { "epoch": 0.04655527524892448, "grad_norm": 2.433474540710449, "learning_rate": 4.653911771874239e-06, "loss": 0.9977, "step": 3820 }, { "epoch": 0.04661621147307228, "grad_norm": 2.161612033843994, "learning_rate": 4.660004874482086e-06, "loss": 0.9485, "step": 3825 }, { "epoch": 0.04667714769722009, "grad_norm": 2.275118112564087, "learning_rate": 4.6660979770899344e-06, "loss": 1.0115, "step": 3830 }, { "epoch": 0.0467380839213679, "grad_norm": 2.544887065887451, "learning_rate": 4.672191079697783e-06, "loss": 1.005, "step": 3835 }, { "epoch": 0.0467990201455157, "grad_norm": 2.0269901752471924, "learning_rate": 4.67828418230563e-06, "loss": 0.9471, "step": 3840 }, { "epoch": 0.04685995636966351, "grad_norm": 2.1125528812408447, "learning_rate": 4.684377284913478e-06, "loss": 1.0072, "step": 3845 }, { "epoch": 0.04692089259381132, "grad_norm": 1.9589868783950806, "learning_rate": 4.690470387521326e-06, "loss": 0.9447, "step": 3850 }, { "epoch": 0.04698182881795913, "grad_norm": 2.407726526260376, "learning_rate": 4.696563490129174e-06, "loss": 0.9709, "step": 3855 }, { "epoch": 0.04704276504210693, "grad_norm": 1.8213821649551392, "learning_rate": 4.702656592737022e-06, "loss": 0.9655, "step": 3860 }, { "epoch": 0.04710370126625474, "grad_norm": 1.8294352293014526, "learning_rate": 4.70874969534487e-06, "loss": 0.947, "step": 3865 }, { "epoch": 0.04716463749040255, "grad_norm": 2.4274585247039795, "learning_rate": 4.7148427979527175e-06, "loss": 0.9965, "step": 3870 }, { "epoch": 0.04722557371455035, "grad_norm": 1.9872039556503296, "learning_rate": 4.720935900560566e-06, "loss": 0.9075, "step": 3875 }, { "epoch": 0.04728650993869816, "grad_norm": 2.3701984882354736, "learning_rate": 4.727029003168413e-06, "loss": 0.9494, "step": 3880 }, { "epoch": 0.04734744616284597, "grad_norm": 2.450488328933716, "learning_rate": 4.733122105776261e-06, "loss": 1.0386, "step": 3885 }, { "epoch": 0.04740838238699377, "grad_norm": 1.8994296789169312, "learning_rate": 4.7392152083841095e-06, "loss": 1.0011, "step": 3890 }, { "epoch": 0.04746931861114158, "grad_norm": 1.8510147333145142, "learning_rate": 4.745308310991958e-06, "loss": 0.9531, "step": 3895 }, { "epoch": 0.04753025483528939, "grad_norm": 2.5481040477752686, "learning_rate": 4.751401413599805e-06, "loss": 0.9392, "step": 3900 }, { "epoch": 0.047591191059437196, "grad_norm": 1.9056538343429565, "learning_rate": 4.757494516207653e-06, "loss": 0.9494, "step": 3905 }, { "epoch": 0.047652127283585, "grad_norm": 2.315399646759033, "learning_rate": 4.7635876188155015e-06, "loss": 1.0087, "step": 3910 }, { "epoch": 0.04771306350773281, "grad_norm": 1.8936917781829834, "learning_rate": 4.769680721423349e-06, "loss": 0.9584, "step": 3915 }, { "epoch": 0.047773999731880616, "grad_norm": 1.870909571647644, "learning_rate": 4.775773824031197e-06, "loss": 0.8953, "step": 3920 }, { "epoch": 0.04783493595602842, "grad_norm": 2.1057283878326416, "learning_rate": 4.781866926639045e-06, "loss": 0.9187, "step": 3925 }, { "epoch": 0.04789587218017623, "grad_norm": 1.9904093742370605, "learning_rate": 4.7879600292468935e-06, "loss": 1.0177, "step": 3930 }, { "epoch": 0.047956808404324036, "grad_norm": 2.1990039348602295, "learning_rate": 4.794053131854741e-06, "loss": 0.9072, "step": 3935 }, { "epoch": 0.04801774462847184, "grad_norm": 2.465756416320801, "learning_rate": 4.800146234462589e-06, "loss": 1.0034, "step": 3940 }, { "epoch": 0.04807868085261965, "grad_norm": 1.9455868005752563, "learning_rate": 4.806239337070437e-06, "loss": 0.9354, "step": 3945 }, { "epoch": 0.048139617076767456, "grad_norm": 2.1364333629608154, "learning_rate": 4.812332439678285e-06, "loss": 0.904, "step": 3950 }, { "epoch": 0.048200553300915265, "grad_norm": 2.083496570587158, "learning_rate": 4.818425542286133e-06, "loss": 0.9865, "step": 3955 }, { "epoch": 0.04826148952506307, "grad_norm": 2.4452965259552, "learning_rate": 4.82451864489398e-06, "loss": 0.9535, "step": 3960 }, { "epoch": 0.048322425749210876, "grad_norm": 2.0080769062042236, "learning_rate": 4.830611747501828e-06, "loss": 1.0058, "step": 3965 }, { "epoch": 0.048383361973358685, "grad_norm": 2.161982774734497, "learning_rate": 4.836704850109677e-06, "loss": 1.0347, "step": 3970 }, { "epoch": 0.04844429819750649, "grad_norm": 2.450465679168701, "learning_rate": 4.842797952717524e-06, "loss": 1.0532, "step": 3975 }, { "epoch": 0.048505234421654296, "grad_norm": 2.3460447788238525, "learning_rate": 4.848891055325372e-06, "loss": 0.9696, "step": 3980 }, { "epoch": 0.048566170645802105, "grad_norm": 2.2660412788391113, "learning_rate": 4.85498415793322e-06, "loss": 0.8885, "step": 3985 }, { "epoch": 0.04862710686994991, "grad_norm": 2.0405964851379395, "learning_rate": 4.861077260541068e-06, "loss": 0.9495, "step": 3990 }, { "epoch": 0.048688043094097716, "grad_norm": 2.0638904571533203, "learning_rate": 4.867170363148916e-06, "loss": 0.9488, "step": 3995 }, { "epoch": 0.048748979318245525, "grad_norm": 1.833617091178894, "learning_rate": 4.873263465756764e-06, "loss": 0.9611, "step": 4000 }, { "epoch": 0.048809915542393334, "grad_norm": 2.1554882526397705, "learning_rate": 4.8793565683646115e-06, "loss": 0.8942, "step": 4005 }, { "epoch": 0.048870851766541136, "grad_norm": 2.4107930660247803, "learning_rate": 4.88544967097246e-06, "loss": 0.9151, "step": 4010 }, { "epoch": 0.048931787990688945, "grad_norm": 2.0286831855773926, "learning_rate": 4.891542773580307e-06, "loss": 0.9914, "step": 4015 }, { "epoch": 0.048992724214836754, "grad_norm": 2.016153335571289, "learning_rate": 4.897635876188155e-06, "loss": 0.9096, "step": 4020 }, { "epoch": 0.049053660438984556, "grad_norm": 2.7489840984344482, "learning_rate": 4.9037289787960035e-06, "loss": 0.9017, "step": 4025 }, { "epoch": 0.049114596663132365, "grad_norm": 1.993803858757019, "learning_rate": 4.909822081403851e-06, "loss": 0.9595, "step": 4030 }, { "epoch": 0.049175532887280174, "grad_norm": 2.1860127449035645, "learning_rate": 4.915915184011699e-06, "loss": 0.9417, "step": 4035 }, { "epoch": 0.04923646911142798, "grad_norm": 2.258742570877075, "learning_rate": 4.922008286619547e-06, "loss": 0.9522, "step": 4040 }, { "epoch": 0.049297405335575785, "grad_norm": 2.396272659301758, "learning_rate": 4.928101389227395e-06, "loss": 1.0001, "step": 4045 }, { "epoch": 0.049358341559723594, "grad_norm": 2.2153804302215576, "learning_rate": 4.934194491835243e-06, "loss": 0.9302, "step": 4050 }, { "epoch": 0.0494192777838714, "grad_norm": 2.5128464698791504, "learning_rate": 4.94028759444309e-06, "loss": 0.9623, "step": 4055 }, { "epoch": 0.049480214008019205, "grad_norm": 2.001112937927246, "learning_rate": 4.946380697050938e-06, "loss": 0.927, "step": 4060 }, { "epoch": 0.049541150232167014, "grad_norm": 2.112375259399414, "learning_rate": 4.952473799658787e-06, "loss": 1.0142, "step": 4065 }, { "epoch": 0.04960208645631482, "grad_norm": 2.051171064376831, "learning_rate": 4.958566902266635e-06, "loss": 0.9565, "step": 4070 }, { "epoch": 0.049663022680462625, "grad_norm": 3.4188027381896973, "learning_rate": 4.964660004874482e-06, "loss": 0.8889, "step": 4075 }, { "epoch": 0.049723958904610434, "grad_norm": 2.273369312286377, "learning_rate": 4.97075310748233e-06, "loss": 0.9656, "step": 4080 }, { "epoch": 0.04978489512875824, "grad_norm": 2.405642032623291, "learning_rate": 4.976846210090179e-06, "loss": 0.9012, "step": 4085 }, { "epoch": 0.04984583135290605, "grad_norm": 1.9596562385559082, "learning_rate": 4.982939312698026e-06, "loss": 0.9866, "step": 4090 }, { "epoch": 0.049906767577053854, "grad_norm": 2.4435882568359375, "learning_rate": 4.989032415305874e-06, "loss": 0.9225, "step": 4095 }, { "epoch": 0.04996770380120166, "grad_norm": 2.110245704650879, "learning_rate": 4.995125517913722e-06, "loss": 0.9469, "step": 4100 }, { "epoch": 0.05002864002534947, "grad_norm": 2.2324509620666504, "learning_rate": 4.999935856318153e-06, "loss": 0.9787, "step": 4105 }, { "epoch": 0.050089576249497274, "grad_norm": 2.2397258281707764, "learning_rate": 4.999615137908916e-06, "loss": 0.9869, "step": 4110 }, { "epoch": 0.05015051247364508, "grad_norm": 2.0199780464172363, "learning_rate": 4.99929441949968e-06, "loss": 0.9255, "step": 4115 }, { "epoch": 0.05021144869779289, "grad_norm": 2.2099974155426025, "learning_rate": 4.998973701090443e-06, "loss": 0.8974, "step": 4120 }, { "epoch": 0.050272384921940694, "grad_norm": 2.0019915103912354, "learning_rate": 4.998652982681207e-06, "loss": 0.9377, "step": 4125 }, { "epoch": 0.0503333211460885, "grad_norm": 2.0043535232543945, "learning_rate": 4.99833226427197e-06, "loss": 0.9737, "step": 4130 }, { "epoch": 0.05039425737023631, "grad_norm": 2.330782651901245, "learning_rate": 4.998011545862733e-06, "loss": 0.9366, "step": 4135 }, { "epoch": 0.05045519359438412, "grad_norm": 1.8246667385101318, "learning_rate": 4.997690827453497e-06, "loss": 0.9978, "step": 4140 }, { "epoch": 0.05051612981853192, "grad_norm": 2.3680670261383057, "learning_rate": 4.99737010904426e-06, "loss": 0.8696, "step": 4145 }, { "epoch": 0.05057706604267973, "grad_norm": 2.163390636444092, "learning_rate": 4.997049390635023e-06, "loss": 0.9095, "step": 4150 }, { "epoch": 0.05063800226682754, "grad_norm": 2.1428182125091553, "learning_rate": 4.9967286722257866e-06, "loss": 0.9024, "step": 4155 }, { "epoch": 0.05069893849097534, "grad_norm": 2.2488129138946533, "learning_rate": 4.99640795381655e-06, "loss": 1.0025, "step": 4160 }, { "epoch": 0.05075987471512315, "grad_norm": 2.0659704208374023, "learning_rate": 4.996087235407313e-06, "loss": 0.9854, "step": 4165 }, { "epoch": 0.05082081093927096, "grad_norm": 2.2170486450195312, "learning_rate": 4.9957665169980765e-06, "loss": 0.9753, "step": 4170 }, { "epoch": 0.05088174716341876, "grad_norm": 2.200618267059326, "learning_rate": 4.9954457985888395e-06, "loss": 0.9385, "step": 4175 }, { "epoch": 0.05094268338756657, "grad_norm": 2.074629306793213, "learning_rate": 4.9951250801796025e-06, "loss": 0.9759, "step": 4180 }, { "epoch": 0.05100361961171438, "grad_norm": 1.93681800365448, "learning_rate": 4.994804361770366e-06, "loss": 0.972, "step": 4185 }, { "epoch": 0.05106455583586219, "grad_norm": 2.075669288635254, "learning_rate": 4.994483643361129e-06, "loss": 0.9889, "step": 4190 }, { "epoch": 0.05112549206000999, "grad_norm": 1.929311752319336, "learning_rate": 4.994162924951892e-06, "loss": 1.0046, "step": 4195 }, { "epoch": 0.0511864282841578, "grad_norm": 2.1003100872039795, "learning_rate": 4.993842206542656e-06, "loss": 0.9045, "step": 4200 }, { "epoch": 0.05124736450830561, "grad_norm": 2.4804635047912598, "learning_rate": 4.993521488133419e-06, "loss": 0.941, "step": 4205 }, { "epoch": 0.05130830073245341, "grad_norm": 2.141833782196045, "learning_rate": 4.993200769724182e-06, "loss": 1.1923, "step": 4210 }, { "epoch": 0.05136923695660122, "grad_norm": 2.592484474182129, "learning_rate": 4.992880051314946e-06, "loss": 0.9061, "step": 4215 }, { "epoch": 0.05143017318074903, "grad_norm": 2.36490797996521, "learning_rate": 4.992559332905709e-06, "loss": 0.9541, "step": 4220 }, { "epoch": 0.05149110940489683, "grad_norm": 2.288130044937134, "learning_rate": 4.992238614496472e-06, "loss": 0.9101, "step": 4225 }, { "epoch": 0.05155204562904464, "grad_norm": 1.8853020668029785, "learning_rate": 4.991917896087236e-06, "loss": 0.9117, "step": 4230 }, { "epoch": 0.05161298185319245, "grad_norm": 2.1826367378234863, "learning_rate": 4.991597177677999e-06, "loss": 0.9149, "step": 4235 }, { "epoch": 0.05167391807734026, "grad_norm": 1.987215280532837, "learning_rate": 4.991276459268762e-06, "loss": 0.8813, "step": 4240 }, { "epoch": 0.05173485430148806, "grad_norm": 1.836188793182373, "learning_rate": 4.990955740859525e-06, "loss": 0.9313, "step": 4245 }, { "epoch": 0.05179579052563587, "grad_norm": 2.308166265487671, "learning_rate": 4.990635022450289e-06, "loss": 1.0083, "step": 4250 }, { "epoch": 0.05185672674978368, "grad_norm": 1.9143767356872559, "learning_rate": 4.990314304041052e-06, "loss": 0.939, "step": 4255 }, { "epoch": 0.05191766297393148, "grad_norm": 2.118182897567749, "learning_rate": 4.989993585631816e-06, "loss": 0.9664, "step": 4260 }, { "epoch": 0.05197859919807929, "grad_norm": 2.3367533683776855, "learning_rate": 4.989672867222579e-06, "loss": 1.0405, "step": 4265 }, { "epoch": 0.0520395354222271, "grad_norm": 1.8918825387954712, "learning_rate": 4.989352148813342e-06, "loss": 0.9372, "step": 4270 }, { "epoch": 0.05210047164637491, "grad_norm": 1.98777437210083, "learning_rate": 4.989031430404106e-06, "loss": 0.9298, "step": 4275 }, { "epoch": 0.05216140787052271, "grad_norm": 2.2459917068481445, "learning_rate": 4.988710711994869e-06, "loss": 0.9489, "step": 4280 }, { "epoch": 0.05222234409467052, "grad_norm": 2.9245643615722656, "learning_rate": 4.988389993585633e-06, "loss": 0.9441, "step": 4285 }, { "epoch": 0.05228328031881833, "grad_norm": 2.563006639480591, "learning_rate": 4.988069275176396e-06, "loss": 1.0456, "step": 4290 }, { "epoch": 0.05234421654296613, "grad_norm": 2.201111316680908, "learning_rate": 4.987748556767159e-06, "loss": 0.9675, "step": 4295 }, { "epoch": 0.05240515276711394, "grad_norm": 1.8550599813461304, "learning_rate": 4.9874278383579225e-06, "loss": 0.9792, "step": 4300 }, { "epoch": 0.05246608899126175, "grad_norm": 2.1303813457489014, "learning_rate": 4.9871071199486855e-06, "loss": 0.9424, "step": 4305 }, { "epoch": 0.05252702521540955, "grad_norm": 2.4361085891723633, "learning_rate": 4.986786401539449e-06, "loss": 0.9513, "step": 4310 }, { "epoch": 0.05258796143955736, "grad_norm": 2.151050090789795, "learning_rate": 4.986465683130212e-06, "loss": 0.9432, "step": 4315 }, { "epoch": 0.05264889766370517, "grad_norm": 2.457916259765625, "learning_rate": 4.9861449647209754e-06, "loss": 0.8928, "step": 4320 }, { "epoch": 0.052709833887852976, "grad_norm": 2.2687113285064697, "learning_rate": 4.9858242463117385e-06, "loss": 1.0136, "step": 4325 }, { "epoch": 0.05277077011200078, "grad_norm": 2.61336088180542, "learning_rate": 4.985503527902502e-06, "loss": 1.0638, "step": 4330 }, { "epoch": 0.05283170633614859, "grad_norm": 2.301290988922119, "learning_rate": 4.985182809493265e-06, "loss": 0.9274, "step": 4335 }, { "epoch": 0.052892642560296396, "grad_norm": 2.7419166564941406, "learning_rate": 4.984862091084028e-06, "loss": 0.9992, "step": 4340 }, { "epoch": 0.0529535787844442, "grad_norm": 2.2110538482666016, "learning_rate": 4.984541372674792e-06, "loss": 0.9681, "step": 4345 }, { "epoch": 0.05301451500859201, "grad_norm": 2.043732166290283, "learning_rate": 4.984220654265555e-06, "loss": 0.9906, "step": 4350 }, { "epoch": 0.053075451232739816, "grad_norm": 1.9240843057632446, "learning_rate": 4.983899935856318e-06, "loss": 0.9816, "step": 4355 }, { "epoch": 0.05313638745688762, "grad_norm": 1.982607126235962, "learning_rate": 4.983579217447082e-06, "loss": 0.955, "step": 4360 }, { "epoch": 0.05319732368103543, "grad_norm": 2.024993658065796, "learning_rate": 4.983258499037845e-06, "loss": 0.9569, "step": 4365 }, { "epoch": 0.053258259905183236, "grad_norm": 2.008641481399536, "learning_rate": 4.982937780628608e-06, "loss": 0.9171, "step": 4370 }, { "epoch": 0.053319196129331045, "grad_norm": 2.1363542079925537, "learning_rate": 4.982617062219372e-06, "loss": 1.0124, "step": 4375 }, { "epoch": 0.05338013235347885, "grad_norm": 2.3070123195648193, "learning_rate": 4.982296343810135e-06, "loss": 0.9613, "step": 4380 }, { "epoch": 0.053441068577626656, "grad_norm": 2.0939390659332275, "learning_rate": 4.981975625400898e-06, "loss": 0.9558, "step": 4385 }, { "epoch": 0.053502004801774465, "grad_norm": 2.575464963912964, "learning_rate": 4.981654906991662e-06, "loss": 0.8967, "step": 4390 }, { "epoch": 0.05356294102592227, "grad_norm": 2.090517520904541, "learning_rate": 4.981334188582425e-06, "loss": 0.8984, "step": 4395 }, { "epoch": 0.053623877250070076, "grad_norm": 2.107400894165039, "learning_rate": 4.981013470173188e-06, "loss": 0.9463, "step": 4400 }, { "epoch": 0.053684813474217885, "grad_norm": 2.210350751876831, "learning_rate": 4.980692751763952e-06, "loss": 0.9285, "step": 4405 }, { "epoch": 0.05374574969836569, "grad_norm": 1.9486621618270874, "learning_rate": 4.980372033354715e-06, "loss": 0.9239, "step": 4410 }, { "epoch": 0.053806685922513496, "grad_norm": 2.0503897666931152, "learning_rate": 4.980051314945478e-06, "loss": 0.8935, "step": 4415 }, { "epoch": 0.053867622146661305, "grad_norm": 2.5771706104278564, "learning_rate": 4.979730596536242e-06, "loss": 0.9835, "step": 4420 }, { "epoch": 0.053928558370809114, "grad_norm": 1.9543205499649048, "learning_rate": 4.979409878127005e-06, "loss": 0.8762, "step": 4425 }, { "epoch": 0.053989494594956916, "grad_norm": 2.064645290374756, "learning_rate": 4.979089159717769e-06, "loss": 0.9582, "step": 4430 }, { "epoch": 0.054050430819104725, "grad_norm": 2.4326469898223877, "learning_rate": 4.978768441308532e-06, "loss": 1.0108, "step": 4435 }, { "epoch": 0.054111367043252534, "grad_norm": 1.931441307067871, "learning_rate": 4.978447722899295e-06, "loss": 0.987, "step": 4440 }, { "epoch": 0.054172303267400336, "grad_norm": 1.9210056066513062, "learning_rate": 4.9781270044900585e-06, "loss": 0.9463, "step": 4445 }, { "epoch": 0.054233239491548145, "grad_norm": 2.239929437637329, "learning_rate": 4.9778062860808215e-06, "loss": 1.0271, "step": 4450 }, { "epoch": 0.054294175715695954, "grad_norm": 2.3973121643066406, "learning_rate": 4.977485567671585e-06, "loss": 0.9803, "step": 4455 }, { "epoch": 0.05435511193984376, "grad_norm": 2.026632070541382, "learning_rate": 4.977164849262348e-06, "loss": 0.9603, "step": 4460 }, { "epoch": 0.054416048163991565, "grad_norm": 2.2171881198883057, "learning_rate": 4.976844130853111e-06, "loss": 0.9752, "step": 4465 }, { "epoch": 0.054476984388139374, "grad_norm": 2.2971291542053223, "learning_rate": 4.976523412443875e-06, "loss": 1.0105, "step": 4470 }, { "epoch": 0.05453792061228718, "grad_norm": 2.215651512145996, "learning_rate": 4.976202694034638e-06, "loss": 1.0221, "step": 4475 }, { "epoch": 0.054598856836434985, "grad_norm": 2.1688013076782227, "learning_rate": 4.975881975625401e-06, "loss": 0.9278, "step": 4480 }, { "epoch": 0.054659793060582794, "grad_norm": 2.795686721801758, "learning_rate": 4.975561257216164e-06, "loss": 0.9396, "step": 4485 }, { "epoch": 0.0547207292847306, "grad_norm": 2.2553982734680176, "learning_rate": 4.975240538806928e-06, "loss": 0.9799, "step": 4490 }, { "epoch": 0.054781665508878405, "grad_norm": 2.2792389392852783, "learning_rate": 4.974919820397691e-06, "loss": 1.0558, "step": 4495 }, { "epoch": 0.054842601733026214, "grad_norm": 2.0480997562408447, "learning_rate": 4.974599101988454e-06, "loss": 0.88, "step": 4500 }, { "epoch": 0.05490353795717402, "grad_norm": 1.809685230255127, "learning_rate": 4.974278383579218e-06, "loss": 0.9053, "step": 4505 }, { "epoch": 0.05496447418132183, "grad_norm": 2.1256487369537354, "learning_rate": 4.973957665169981e-06, "loss": 0.9741, "step": 4510 }, { "epoch": 0.055025410405469634, "grad_norm": 2.200883388519287, "learning_rate": 4.973636946760744e-06, "loss": 0.9943, "step": 4515 }, { "epoch": 0.05508634662961744, "grad_norm": 2.4528164863586426, "learning_rate": 4.973316228351508e-06, "loss": 0.917, "step": 4520 }, { "epoch": 0.05514728285376525, "grad_norm": 1.7663525342941284, "learning_rate": 4.972995509942271e-06, "loss": 0.9905, "step": 4525 }, { "epoch": 0.055208219077913054, "grad_norm": 2.217694044113159, "learning_rate": 4.972674791533034e-06, "loss": 0.9689, "step": 4530 }, { "epoch": 0.05526915530206086, "grad_norm": 1.9925540685653687, "learning_rate": 4.972354073123798e-06, "loss": 0.9248, "step": 4535 }, { "epoch": 0.05533009152620867, "grad_norm": 2.16874623298645, "learning_rate": 4.972033354714561e-06, "loss": 0.9871, "step": 4540 }, { "epoch": 0.055391027750356474, "grad_norm": 2.3390684127807617, "learning_rate": 4.971712636305324e-06, "loss": 1.047, "step": 4545 }, { "epoch": 0.05545196397450428, "grad_norm": 2.1148860454559326, "learning_rate": 4.971391917896088e-06, "loss": 0.9799, "step": 4550 }, { "epoch": 0.05551290019865209, "grad_norm": 2.2171857357025146, "learning_rate": 4.971071199486851e-06, "loss": 0.9823, "step": 4555 }, { "epoch": 0.0555738364227999, "grad_norm": 2.3286259174346924, "learning_rate": 4.970750481077614e-06, "loss": 0.9218, "step": 4560 }, { "epoch": 0.0556347726469477, "grad_norm": 2.1417369842529297, "learning_rate": 4.970429762668378e-06, "loss": 0.9774, "step": 4565 }, { "epoch": 0.05569570887109551, "grad_norm": 2.3721773624420166, "learning_rate": 4.970109044259141e-06, "loss": 0.8849, "step": 4570 }, { "epoch": 0.05575664509524332, "grad_norm": 2.4369616508483887, "learning_rate": 4.969788325849904e-06, "loss": 0.8863, "step": 4575 }, { "epoch": 0.05581758131939112, "grad_norm": 2.6342060565948486, "learning_rate": 4.9694676074406676e-06, "loss": 0.9897, "step": 4580 }, { "epoch": 0.05587851754353893, "grad_norm": 2.029026985168457, "learning_rate": 4.9691468890314306e-06, "loss": 1.0591, "step": 4585 }, { "epoch": 0.05593945376768674, "grad_norm": 2.218843460083008, "learning_rate": 4.9688261706221944e-06, "loss": 0.9452, "step": 4590 }, { "epoch": 0.05600038999183454, "grad_norm": 1.9022529125213623, "learning_rate": 4.9685054522129575e-06, "loss": 0.9766, "step": 4595 }, { "epoch": 0.05606132621598235, "grad_norm": 2.2020909786224365, "learning_rate": 4.968184733803721e-06, "loss": 0.9751, "step": 4600 }, { "epoch": 0.05612226244013016, "grad_norm": 2.2495064735412598, "learning_rate": 4.967864015394484e-06, "loss": 0.9487, "step": 4605 }, { "epoch": 0.05618319866427797, "grad_norm": 2.1632957458496094, "learning_rate": 4.967543296985247e-06, "loss": 0.9183, "step": 4610 }, { "epoch": 0.05624413488842577, "grad_norm": 2.131624698638916, "learning_rate": 4.967222578576011e-06, "loss": 0.8551, "step": 4615 }, { "epoch": 0.05630507111257358, "grad_norm": 2.357882499694824, "learning_rate": 4.966901860166774e-06, "loss": 1.0006, "step": 4620 }, { "epoch": 0.05636600733672139, "grad_norm": 1.8777540922164917, "learning_rate": 4.966581141757537e-06, "loss": 0.9383, "step": 4625 }, { "epoch": 0.05642694356086919, "grad_norm": 2.0884621143341064, "learning_rate": 4.966260423348301e-06, "loss": 0.9594, "step": 4630 }, { "epoch": 0.056487879785017, "grad_norm": 2.1539218425750732, "learning_rate": 4.965939704939064e-06, "loss": 0.9687, "step": 4635 }, { "epoch": 0.05654881600916481, "grad_norm": 2.0321078300476074, "learning_rate": 4.965618986529827e-06, "loss": 0.9226, "step": 4640 }, { "epoch": 0.05660975223331261, "grad_norm": 2.178109884262085, "learning_rate": 4.965298268120591e-06, "loss": 0.9155, "step": 4645 }, { "epoch": 0.05667068845746042, "grad_norm": 2.267038583755493, "learning_rate": 4.964977549711354e-06, "loss": 0.9597, "step": 4650 }, { "epoch": 0.05673162468160823, "grad_norm": 2.3786604404449463, "learning_rate": 4.964656831302117e-06, "loss": 0.9498, "step": 4655 }, { "epoch": 0.05679256090575604, "grad_norm": 1.9528528451919556, "learning_rate": 4.96433611289288e-06, "loss": 0.9591, "step": 4660 }, { "epoch": 0.05685349712990384, "grad_norm": 2.7594730854034424, "learning_rate": 4.964015394483644e-06, "loss": 0.8953, "step": 4665 }, { "epoch": 0.05691443335405165, "grad_norm": 2.1950676441192627, "learning_rate": 4.963694676074407e-06, "loss": 0.9272, "step": 4670 }, { "epoch": 0.05697536957819946, "grad_norm": 2.099015235900879, "learning_rate": 4.96337395766517e-06, "loss": 1.0042, "step": 4675 }, { "epoch": 0.05703630580234726, "grad_norm": 2.5126829147338867, "learning_rate": 4.963053239255934e-06, "loss": 0.9105, "step": 4680 }, { "epoch": 0.05709724202649507, "grad_norm": 1.9535768032073975, "learning_rate": 4.962732520846697e-06, "loss": 0.9208, "step": 4685 }, { "epoch": 0.05715817825064288, "grad_norm": 2.4004063606262207, "learning_rate": 4.96241180243746e-06, "loss": 0.9582, "step": 4690 }, { "epoch": 0.05721911447479069, "grad_norm": 2.13843035697937, "learning_rate": 4.962091084028224e-06, "loss": 0.9684, "step": 4695 }, { "epoch": 0.05728005069893849, "grad_norm": 2.093946695327759, "learning_rate": 4.961770365618987e-06, "loss": 0.972, "step": 4700 }, { "epoch": 0.0573409869230863, "grad_norm": 1.9125750064849854, "learning_rate": 4.96144964720975e-06, "loss": 0.9449, "step": 4705 }, { "epoch": 0.05740192314723411, "grad_norm": 2.3880844116210938, "learning_rate": 4.961128928800514e-06, "loss": 0.9668, "step": 4710 }, { "epoch": 0.05746285937138191, "grad_norm": 1.988142490386963, "learning_rate": 4.960808210391277e-06, "loss": 0.9654, "step": 4715 }, { "epoch": 0.05752379559552972, "grad_norm": 2.188774824142456, "learning_rate": 4.96048749198204e-06, "loss": 0.914, "step": 4720 }, { "epoch": 0.05758473181967753, "grad_norm": 2.407693386077881, "learning_rate": 4.9601667735728035e-06, "loss": 0.9823, "step": 4725 }, { "epoch": 0.05764566804382533, "grad_norm": 2.300206422805786, "learning_rate": 4.9598460551635665e-06, "loss": 1.0106, "step": 4730 }, { "epoch": 0.05770660426797314, "grad_norm": 2.081063985824585, "learning_rate": 4.95952533675433e-06, "loss": 0.9845, "step": 4735 }, { "epoch": 0.05776754049212095, "grad_norm": 2.6710972785949707, "learning_rate": 4.959204618345093e-06, "loss": 0.9853, "step": 4740 }, { "epoch": 0.057828476716268756, "grad_norm": 2.4332940578460693, "learning_rate": 4.9588838999358564e-06, "loss": 0.9673, "step": 4745 }, { "epoch": 0.05788941294041656, "grad_norm": 1.959184169769287, "learning_rate": 4.95856318152662e-06, "loss": 0.9442, "step": 4750 }, { "epoch": 0.05795034916456437, "grad_norm": 1.9071435928344727, "learning_rate": 4.958242463117383e-06, "loss": 1.0052, "step": 4755 }, { "epoch": 0.058011285388712176, "grad_norm": 1.9966684579849243, "learning_rate": 4.957921744708147e-06, "loss": 0.9039, "step": 4760 }, { "epoch": 0.05807222161285998, "grad_norm": 2.154466152191162, "learning_rate": 4.95760102629891e-06, "loss": 0.9037, "step": 4765 }, { "epoch": 0.05813315783700779, "grad_norm": 2.2410995960235596, "learning_rate": 4.957280307889673e-06, "loss": 0.964, "step": 4770 }, { "epoch": 0.058194094061155596, "grad_norm": 2.212738037109375, "learning_rate": 4.956959589480437e-06, "loss": 0.9954, "step": 4775 }, { "epoch": 0.0582550302853034, "grad_norm": 2.03554105758667, "learning_rate": 4.9566388710712e-06, "loss": 0.9543, "step": 4780 }, { "epoch": 0.05831596650945121, "grad_norm": 1.8597582578659058, "learning_rate": 4.956318152661963e-06, "loss": 0.9977, "step": 4785 }, { "epoch": 0.058376902733599016, "grad_norm": 2.13322377204895, "learning_rate": 4.955997434252727e-06, "loss": 1.0273, "step": 4790 }, { "epoch": 0.058437838957746825, "grad_norm": 2.293912887573242, "learning_rate": 4.95567671584349e-06, "loss": 0.955, "step": 4795 }, { "epoch": 0.05849877518189463, "grad_norm": 2.494798421859741, "learning_rate": 4.955355997434253e-06, "loss": 0.9524, "step": 4800 }, { "epoch": 0.058559711406042436, "grad_norm": 2.8100998401641846, "learning_rate": 4.955035279025017e-06, "loss": 0.9809, "step": 4805 }, { "epoch": 0.058620647630190245, "grad_norm": 2.015406370162964, "learning_rate": 4.95471456061578e-06, "loss": 0.9359, "step": 4810 }, { "epoch": 0.05868158385433805, "grad_norm": 2.142071485519409, "learning_rate": 4.954393842206543e-06, "loss": 0.9551, "step": 4815 }, { "epoch": 0.058742520078485856, "grad_norm": 2.3174376487731934, "learning_rate": 4.954073123797306e-06, "loss": 1.0111, "step": 4820 }, { "epoch": 0.058803456302633665, "grad_norm": 1.9477663040161133, "learning_rate": 4.95375240538807e-06, "loss": 0.9527, "step": 4825 }, { "epoch": 0.05886439252678147, "grad_norm": 2.0411524772644043, "learning_rate": 4.953431686978833e-06, "loss": 0.9564, "step": 4830 }, { "epoch": 0.058925328750929276, "grad_norm": 2.079613208770752, "learning_rate": 4.953110968569596e-06, "loss": 0.9228, "step": 4835 }, { "epoch": 0.058986264975077085, "grad_norm": 2.0247299671173096, "learning_rate": 4.95279025016036e-06, "loss": 0.9394, "step": 4840 }, { "epoch": 0.059047201199224894, "grad_norm": 2.0431206226348877, "learning_rate": 4.952469531751123e-06, "loss": 0.9431, "step": 4845 }, { "epoch": 0.059108137423372696, "grad_norm": 1.9209715127944946, "learning_rate": 4.952148813341886e-06, "loss": 0.9395, "step": 4850 }, { "epoch": 0.059169073647520505, "grad_norm": 2.1755902767181396, "learning_rate": 4.9518280949326496e-06, "loss": 0.9362, "step": 4855 }, { "epoch": 0.059230009871668314, "grad_norm": 2.4584319591522217, "learning_rate": 4.951507376523413e-06, "loss": 0.9929, "step": 4860 }, { "epoch": 0.059290946095816116, "grad_norm": 2.467623472213745, "learning_rate": 4.951186658114176e-06, "loss": 0.9371, "step": 4865 }, { "epoch": 0.059351882319963925, "grad_norm": 2.024430990219116, "learning_rate": 4.9508659397049395e-06, "loss": 0.9466, "step": 4870 }, { "epoch": 0.059412818544111734, "grad_norm": 1.933137059211731, "learning_rate": 4.9505452212957025e-06, "loss": 0.9842, "step": 4875 }, { "epoch": 0.05947375476825954, "grad_norm": 2.067190647125244, "learning_rate": 4.950224502886466e-06, "loss": 0.9622, "step": 4880 }, { "epoch": 0.059534690992407345, "grad_norm": 2.126779556274414, "learning_rate": 4.949903784477229e-06, "loss": 0.9336, "step": 4885 }, { "epoch": 0.059595627216555154, "grad_norm": 2.2983028888702393, "learning_rate": 4.949583066067992e-06, "loss": 0.9415, "step": 4890 }, { "epoch": 0.05965656344070296, "grad_norm": 2.065775156021118, "learning_rate": 4.949262347658756e-06, "loss": 0.9981, "step": 4895 }, { "epoch": 0.059717499664850765, "grad_norm": 2.24076247215271, "learning_rate": 4.948941629249519e-06, "loss": 0.9744, "step": 4900 }, { "epoch": 0.059778435888998574, "grad_norm": 1.9521015882492065, "learning_rate": 4.948620910840283e-06, "loss": 0.9204, "step": 4905 }, { "epoch": 0.05983937211314638, "grad_norm": 2.200695276260376, "learning_rate": 4.948300192431046e-06, "loss": 0.9035, "step": 4910 }, { "epoch": 0.059900308337294185, "grad_norm": 2.0349855422973633, "learning_rate": 4.947979474021809e-06, "loss": 0.9777, "step": 4915 }, { "epoch": 0.059961244561441994, "grad_norm": 2.2277987003326416, "learning_rate": 4.947658755612573e-06, "loss": 0.9312, "step": 4920 }, { "epoch": 0.0600221807855898, "grad_norm": 2.3626112937927246, "learning_rate": 4.947338037203336e-06, "loss": 1.003, "step": 4925 }, { "epoch": 0.06008311700973761, "grad_norm": 2.156115770339966, "learning_rate": 4.947017318794099e-06, "loss": 0.9295, "step": 4930 }, { "epoch": 0.060144053233885414, "grad_norm": 2.7415244579315186, "learning_rate": 4.946696600384863e-06, "loss": 0.9928, "step": 4935 }, { "epoch": 0.06020498945803322, "grad_norm": 2.595630645751953, "learning_rate": 4.946375881975626e-06, "loss": 0.9956, "step": 4940 }, { "epoch": 0.06026592568218103, "grad_norm": 2.0903892517089844, "learning_rate": 4.946055163566389e-06, "loss": 0.9745, "step": 4945 }, { "epoch": 0.060326861906328834, "grad_norm": 2.0629918575286865, "learning_rate": 4.945734445157153e-06, "loss": 0.9533, "step": 4950 }, { "epoch": 0.06038779813047664, "grad_norm": 2.0302090644836426, "learning_rate": 4.945413726747916e-06, "loss": 0.967, "step": 4955 }, { "epoch": 0.06044873435462445, "grad_norm": 1.973815679550171, "learning_rate": 4.945093008338679e-06, "loss": 0.9701, "step": 4960 }, { "epoch": 0.060509670578772254, "grad_norm": 2.2228939533233643, "learning_rate": 4.944772289929443e-06, "loss": 0.8574, "step": 4965 }, { "epoch": 0.06057060680292006, "grad_norm": 2.0256011486053467, "learning_rate": 4.944451571520206e-06, "loss": 0.9085, "step": 4970 }, { "epoch": 0.06063154302706787, "grad_norm": 2.285141944885254, "learning_rate": 4.944130853110969e-06, "loss": 0.9714, "step": 4975 }, { "epoch": 0.06069247925121568, "grad_norm": 2.797790765762329, "learning_rate": 4.943810134701733e-06, "loss": 0.9573, "step": 4980 }, { "epoch": 0.06075341547536348, "grad_norm": 2.032802104949951, "learning_rate": 4.943489416292496e-06, "loss": 0.9492, "step": 4985 }, { "epoch": 0.06081435169951129, "grad_norm": 1.740852952003479, "learning_rate": 4.943168697883259e-06, "loss": 0.9656, "step": 4990 }, { "epoch": 0.0608752879236591, "grad_norm": 1.9527702331542969, "learning_rate": 4.942847979474022e-06, "loss": 0.9551, "step": 4995 }, { "epoch": 0.0609362241478069, "grad_norm": 2.3352224826812744, "learning_rate": 4.9425272610647855e-06, "loss": 0.9158, "step": 5000 }, { "epoch": 0.06099716037195471, "grad_norm": 2.2465903759002686, "learning_rate": 4.9422065426555485e-06, "loss": 0.9945, "step": 5005 }, { "epoch": 0.06105809659610252, "grad_norm": 2.3575856685638428, "learning_rate": 4.9418858242463116e-06, "loss": 1.0085, "step": 5010 }, { "epoch": 0.06111903282025032, "grad_norm": 2.126229763031006, "learning_rate": 4.9415651058370754e-06, "loss": 0.9389, "step": 5015 }, { "epoch": 0.06117996904439813, "grad_norm": 2.0068018436431885, "learning_rate": 4.9412443874278384e-06, "loss": 0.9047, "step": 5020 }, { "epoch": 0.06124090526854594, "grad_norm": 2.024073600769043, "learning_rate": 4.9409236690186015e-06, "loss": 0.956, "step": 5025 }, { "epoch": 0.06130184149269375, "grad_norm": 2.0306167602539062, "learning_rate": 4.940602950609365e-06, "loss": 0.9509, "step": 5030 }, { "epoch": 0.06136277771684155, "grad_norm": 2.153942346572876, "learning_rate": 4.940282232200128e-06, "loss": 0.9478, "step": 5035 }, { "epoch": 0.06142371394098936, "grad_norm": 2.1508092880249023, "learning_rate": 4.939961513790892e-06, "loss": 0.9657, "step": 5040 }, { "epoch": 0.06148465016513717, "grad_norm": 2.1936168670654297, "learning_rate": 4.939640795381655e-06, "loss": 0.8714, "step": 5045 }, { "epoch": 0.06154558638928497, "grad_norm": 1.9701260328292847, "learning_rate": 4.939320076972419e-06, "loss": 0.9709, "step": 5050 }, { "epoch": 0.06160652261343278, "grad_norm": 2.288543462753296, "learning_rate": 4.938999358563182e-06, "loss": 0.8993, "step": 5055 }, { "epoch": 0.06166745883758059, "grad_norm": 2.324033737182617, "learning_rate": 4.938678640153945e-06, "loss": 1.0535, "step": 5060 }, { "epoch": 0.06172839506172839, "grad_norm": 2.2949068546295166, "learning_rate": 4.938357921744709e-06, "loss": 0.9354, "step": 5065 }, { "epoch": 0.0617893312858762, "grad_norm": 1.9237924814224243, "learning_rate": 4.938037203335472e-06, "loss": 0.977, "step": 5070 }, { "epoch": 0.06185026751002401, "grad_norm": 2.191093683242798, "learning_rate": 4.937716484926235e-06, "loss": 0.9797, "step": 5075 }, { "epoch": 0.06191120373417182, "grad_norm": 2.018726348876953, "learning_rate": 4.937395766516999e-06, "loss": 0.8305, "step": 5080 }, { "epoch": 0.06197213995831962, "grad_norm": 2.0936644077301025, "learning_rate": 4.937075048107762e-06, "loss": 0.8771, "step": 5085 }, { "epoch": 0.06203307618246743, "grad_norm": 2.228210210800171, "learning_rate": 4.936754329698525e-06, "loss": 0.966, "step": 5090 }, { "epoch": 0.06209401240661524, "grad_norm": 2.765303373336792, "learning_rate": 4.936433611289289e-06, "loss": 0.9625, "step": 5095 }, { "epoch": 0.06215494863076304, "grad_norm": 1.950937271118164, "learning_rate": 4.936112892880052e-06, "loss": 0.9779, "step": 5100 }, { "epoch": 0.06221588485491085, "grad_norm": 2.4261159896850586, "learning_rate": 4.935792174470815e-06, "loss": 0.8844, "step": 5105 }, { "epoch": 0.06227682107905866, "grad_norm": 2.268289089202881, "learning_rate": 4.935471456061579e-06, "loss": 0.9669, "step": 5110 }, { "epoch": 0.06233775730320647, "grad_norm": 2.1374266147613525, "learning_rate": 4.935150737652342e-06, "loss": 0.9959, "step": 5115 }, { "epoch": 0.06239869352735427, "grad_norm": 2.6124653816223145, "learning_rate": 4.934830019243105e-06, "loss": 0.9243, "step": 5120 }, { "epoch": 0.06245962975150208, "grad_norm": 2.331411123275757, "learning_rate": 4.9345093008338686e-06, "loss": 0.8959, "step": 5125 }, { "epoch": 0.06252056597564988, "grad_norm": 1.8849228620529175, "learning_rate": 4.934188582424632e-06, "loss": 0.9663, "step": 5130 }, { "epoch": 0.06258150219979769, "grad_norm": 2.0511040687561035, "learning_rate": 4.933867864015395e-06, "loss": 0.9108, "step": 5135 }, { "epoch": 0.0626424384239455, "grad_norm": 2.2218616008758545, "learning_rate": 4.9335471456061585e-06, "loss": 0.9781, "step": 5140 }, { "epoch": 0.06270337464809331, "grad_norm": 1.9058868885040283, "learning_rate": 4.9332264271969215e-06, "loss": 0.9249, "step": 5145 }, { "epoch": 0.06276431087224112, "grad_norm": 1.926681399345398, "learning_rate": 4.9329057087876845e-06, "loss": 0.9113, "step": 5150 }, { "epoch": 0.06282524709638893, "grad_norm": 2.3974099159240723, "learning_rate": 4.932584990378448e-06, "loss": 1.0082, "step": 5155 }, { "epoch": 0.06288618332053672, "grad_norm": 2.058239221572876, "learning_rate": 4.932264271969211e-06, "loss": 1.0194, "step": 5160 }, { "epoch": 0.06294711954468453, "grad_norm": 2.0314316749572754, "learning_rate": 4.931943553559974e-06, "loss": 0.8919, "step": 5165 }, { "epoch": 0.06300805576883234, "grad_norm": 2.138688087463379, "learning_rate": 4.931622835150737e-06, "loss": 0.9033, "step": 5170 }, { "epoch": 0.06306899199298015, "grad_norm": 2.3355956077575684, "learning_rate": 4.931302116741501e-06, "loss": 0.9598, "step": 5175 }, { "epoch": 0.06312992821712796, "grad_norm": 2.079786539077759, "learning_rate": 4.930981398332264e-06, "loss": 0.9843, "step": 5180 }, { "epoch": 0.06319086444127577, "grad_norm": 2.1979329586029053, "learning_rate": 4.930660679923028e-06, "loss": 0.9825, "step": 5185 }, { "epoch": 0.06325180066542357, "grad_norm": 2.0374317169189453, "learning_rate": 4.930339961513791e-06, "loss": 0.9859, "step": 5190 }, { "epoch": 0.06331273688957137, "grad_norm": 2.1802268028259277, "learning_rate": 4.930019243104554e-06, "loss": 1.0284, "step": 5195 }, { "epoch": 0.06337367311371918, "grad_norm": 2.505406141281128, "learning_rate": 4.929698524695318e-06, "loss": 1.035, "step": 5200 }, { "epoch": 0.06343460933786699, "grad_norm": 2.008183717727661, "learning_rate": 4.929377806286081e-06, "loss": 0.9185, "step": 5205 }, { "epoch": 0.0634955455620148, "grad_norm": 1.922753930091858, "learning_rate": 4.929057087876845e-06, "loss": 0.9381, "step": 5210 }, { "epoch": 0.0635564817861626, "grad_norm": 2.649509906768799, "learning_rate": 4.928736369467608e-06, "loss": 0.957, "step": 5215 }, { "epoch": 0.06361741801031041, "grad_norm": 2.1096668243408203, "learning_rate": 4.928415651058371e-06, "loss": 0.9831, "step": 5220 }, { "epoch": 0.06367835423445821, "grad_norm": 2.0363025665283203, "learning_rate": 4.928094932649135e-06, "loss": 0.9459, "step": 5225 }, { "epoch": 0.06373929045860602, "grad_norm": 1.960835576057434, "learning_rate": 4.927774214239898e-06, "loss": 0.9633, "step": 5230 }, { "epoch": 0.06380022668275383, "grad_norm": 2.113628387451172, "learning_rate": 4.927453495830662e-06, "loss": 0.823, "step": 5235 }, { "epoch": 0.06386116290690164, "grad_norm": 2.7365033626556396, "learning_rate": 4.927132777421425e-06, "loss": 0.939, "step": 5240 }, { "epoch": 0.06392209913104945, "grad_norm": 2.2286596298217773, "learning_rate": 4.926812059012188e-06, "loss": 0.894, "step": 5245 }, { "epoch": 0.06398303535519725, "grad_norm": 2.0674309730529785, "learning_rate": 4.926491340602951e-06, "loss": 1.0177, "step": 5250 }, { "epoch": 0.06404397157934506, "grad_norm": 2.1436192989349365, "learning_rate": 4.926170622193715e-06, "loss": 0.9056, "step": 5255 }, { "epoch": 0.06410490780349286, "grad_norm": 2.2099034786224365, "learning_rate": 4.925849903784478e-06, "loss": 0.9407, "step": 5260 }, { "epoch": 0.06416584402764067, "grad_norm": 2.0297839641571045, "learning_rate": 4.925529185375241e-06, "loss": 0.9266, "step": 5265 }, { "epoch": 0.06422678025178848, "grad_norm": 2.0787642002105713, "learning_rate": 4.9252084669660045e-06, "loss": 0.9144, "step": 5270 }, { "epoch": 0.06428771647593629, "grad_norm": 2.0142362117767334, "learning_rate": 4.9248877485567675e-06, "loss": 0.9703, "step": 5275 }, { "epoch": 0.0643486527000841, "grad_norm": 2.2862629890441895, "learning_rate": 4.9245670301475306e-06, "loss": 0.9213, "step": 5280 }, { "epoch": 0.0644095889242319, "grad_norm": 1.9523248672485352, "learning_rate": 4.9242463117382944e-06, "loss": 0.9178, "step": 5285 }, { "epoch": 0.06447052514837971, "grad_norm": 1.8844631910324097, "learning_rate": 4.9239255933290574e-06, "loss": 0.9924, "step": 5290 }, { "epoch": 0.06453146137252751, "grad_norm": 2.016496419906616, "learning_rate": 4.9236048749198205e-06, "loss": 0.9051, "step": 5295 }, { "epoch": 0.06459239759667532, "grad_norm": 1.9929873943328857, "learning_rate": 4.923284156510584e-06, "loss": 0.853, "step": 5300 }, { "epoch": 0.06465333382082313, "grad_norm": 2.1102564334869385, "learning_rate": 4.922963438101347e-06, "loss": 0.9298, "step": 5305 }, { "epoch": 0.06471427004497093, "grad_norm": 2.2594616413116455, "learning_rate": 4.92264271969211e-06, "loss": 0.9309, "step": 5310 }, { "epoch": 0.06477520626911874, "grad_norm": 2.2996726036071777, "learning_rate": 4.922322001282874e-06, "loss": 0.9299, "step": 5315 }, { "epoch": 0.06483614249326655, "grad_norm": 2.2542150020599365, "learning_rate": 4.922001282873637e-06, "loss": 0.9684, "step": 5320 }, { "epoch": 0.06489707871741436, "grad_norm": 2.540433645248413, "learning_rate": 4.9216805644644e-06, "loss": 0.9528, "step": 5325 }, { "epoch": 0.06495801494156216, "grad_norm": 2.235123872756958, "learning_rate": 4.921359846055164e-06, "loss": 0.9139, "step": 5330 }, { "epoch": 0.06501895116570997, "grad_norm": 2.091385841369629, "learning_rate": 4.921039127645927e-06, "loss": 0.9584, "step": 5335 }, { "epoch": 0.06507988738985777, "grad_norm": 2.0263538360595703, "learning_rate": 4.92071840923669e-06, "loss": 0.8562, "step": 5340 }, { "epoch": 0.06514082361400558, "grad_norm": 2.0976107120513916, "learning_rate": 4.920397690827454e-06, "loss": 0.9549, "step": 5345 }, { "epoch": 0.06520175983815339, "grad_norm": 2.1109580993652344, "learning_rate": 4.920076972418217e-06, "loss": 0.9116, "step": 5350 }, { "epoch": 0.0652626960623012, "grad_norm": 1.7726367712020874, "learning_rate": 4.919756254008981e-06, "loss": 0.9618, "step": 5355 }, { "epoch": 0.065323632286449, "grad_norm": 2.1904642581939697, "learning_rate": 4.919435535599744e-06, "loss": 0.9005, "step": 5360 }, { "epoch": 0.0653845685105968, "grad_norm": 1.964765191078186, "learning_rate": 4.919114817190507e-06, "loss": 1.0087, "step": 5365 }, { "epoch": 0.06544550473474461, "grad_norm": 2.1579456329345703, "learning_rate": 4.918794098781271e-06, "loss": 1.0403, "step": 5370 }, { "epoch": 0.06550644095889242, "grad_norm": 2.0481858253479004, "learning_rate": 4.918473380372034e-06, "loss": 0.9446, "step": 5375 }, { "epoch": 0.06556737718304023, "grad_norm": 2.3253633975982666, "learning_rate": 4.918152661962798e-06, "loss": 0.9227, "step": 5380 }, { "epoch": 0.06562831340718804, "grad_norm": 1.9452900886535645, "learning_rate": 4.917831943553561e-06, "loss": 0.9762, "step": 5385 }, { "epoch": 0.06568924963133585, "grad_norm": 2.415825843811035, "learning_rate": 4.917511225144324e-06, "loss": 0.9889, "step": 5390 }, { "epoch": 0.06575018585548364, "grad_norm": 1.8791040182113647, "learning_rate": 4.9171905067350876e-06, "loss": 0.9035, "step": 5395 }, { "epoch": 0.06581112207963145, "grad_norm": 1.8688832521438599, "learning_rate": 4.916869788325851e-06, "loss": 0.9274, "step": 5400 }, { "epoch": 0.06587205830377926, "grad_norm": 1.9674782752990723, "learning_rate": 4.916549069916614e-06, "loss": 1.0067, "step": 5405 }, { "epoch": 0.06593299452792707, "grad_norm": 2.166707992553711, "learning_rate": 4.916228351507377e-06, "loss": 0.9969, "step": 5410 }, { "epoch": 0.06599393075207488, "grad_norm": 1.8928810358047485, "learning_rate": 4.9159076330981405e-06, "loss": 0.9521, "step": 5415 }, { "epoch": 0.06605486697622269, "grad_norm": 2.1687252521514893, "learning_rate": 4.9155869146889035e-06, "loss": 1.0406, "step": 5420 }, { "epoch": 0.0661158032003705, "grad_norm": 2.2276389598846436, "learning_rate": 4.9152661962796665e-06, "loss": 0.9251, "step": 5425 }, { "epoch": 0.0661767394245183, "grad_norm": 2.1028852462768555, "learning_rate": 4.91494547787043e-06, "loss": 0.9209, "step": 5430 }, { "epoch": 0.0662376756486661, "grad_norm": 2.2360053062438965, "learning_rate": 4.914624759461193e-06, "loss": 0.9795, "step": 5435 }, { "epoch": 0.06629861187281391, "grad_norm": 1.9014949798583984, "learning_rate": 4.914304041051956e-06, "loss": 0.938, "step": 5440 }, { "epoch": 0.06635954809696172, "grad_norm": 1.9601248502731323, "learning_rate": 4.91398332264272e-06, "loss": 0.9407, "step": 5445 }, { "epoch": 0.06642048432110953, "grad_norm": 1.917762041091919, "learning_rate": 4.913662604233483e-06, "loss": 0.8732, "step": 5450 }, { "epoch": 0.06648142054525734, "grad_norm": 2.511324405670166, "learning_rate": 4.913341885824246e-06, "loss": 0.9567, "step": 5455 }, { "epoch": 0.06654235676940513, "grad_norm": 2.1607937812805176, "learning_rate": 4.91302116741501e-06, "loss": 0.8701, "step": 5460 }, { "epoch": 0.06660329299355294, "grad_norm": 2.104475736618042, "learning_rate": 4.912700449005773e-06, "loss": 0.9667, "step": 5465 }, { "epoch": 0.06666422921770075, "grad_norm": 2.316772222518921, "learning_rate": 4.912379730596536e-06, "loss": 0.8953, "step": 5470 }, { "epoch": 0.06672516544184856, "grad_norm": 2.101846694946289, "learning_rate": 4.9120590121873e-06, "loss": 0.9365, "step": 5475 }, { "epoch": 0.06678610166599637, "grad_norm": 2.52949595451355, "learning_rate": 4.911738293778063e-06, "loss": 0.9113, "step": 5480 }, { "epoch": 0.06684703789014418, "grad_norm": 2.0682740211486816, "learning_rate": 4.911417575368826e-06, "loss": 0.8662, "step": 5485 }, { "epoch": 0.06690797411429199, "grad_norm": 2.409127712249756, "learning_rate": 4.91109685695959e-06, "loss": 0.8937, "step": 5490 }, { "epoch": 0.06696891033843978, "grad_norm": 2.0097084045410156, "learning_rate": 4.910776138550353e-06, "loss": 0.8619, "step": 5495 }, { "epoch": 0.06702984656258759, "grad_norm": 2.5570318698883057, "learning_rate": 4.910455420141116e-06, "loss": 0.9623, "step": 5500 }, { "epoch": 0.0670907827867354, "grad_norm": 2.0725317001342773, "learning_rate": 4.91013470173188e-06, "loss": 0.8931, "step": 5505 }, { "epoch": 0.06715171901088321, "grad_norm": 2.056048631668091, "learning_rate": 4.909813983322643e-06, "loss": 0.9094, "step": 5510 }, { "epoch": 0.06721265523503102, "grad_norm": 2.263329267501831, "learning_rate": 4.909493264913407e-06, "loss": 0.9619, "step": 5515 }, { "epoch": 0.06727359145917883, "grad_norm": 2.1455233097076416, "learning_rate": 4.90917254650417e-06, "loss": 0.9061, "step": 5520 }, { "epoch": 0.06733452768332664, "grad_norm": 2.113616943359375, "learning_rate": 4.908851828094934e-06, "loss": 1.0062, "step": 5525 }, { "epoch": 0.06739546390747443, "grad_norm": 2.1857852935791016, "learning_rate": 4.908531109685697e-06, "loss": 0.9361, "step": 5530 }, { "epoch": 0.06745640013162224, "grad_norm": 1.9813944101333618, "learning_rate": 4.90821039127646e-06, "loss": 0.9245, "step": 5535 }, { "epoch": 0.06751733635577005, "grad_norm": 2.4018540382385254, "learning_rate": 4.9078896728672235e-06, "loss": 0.9644, "step": 5540 }, { "epoch": 0.06757827257991786, "grad_norm": 2.608034372329712, "learning_rate": 4.9075689544579865e-06, "loss": 0.9805, "step": 5545 }, { "epoch": 0.06763920880406567, "grad_norm": 1.8872679471969604, "learning_rate": 4.9072482360487496e-06, "loss": 0.9583, "step": 5550 }, { "epoch": 0.06770014502821348, "grad_norm": 1.9081449508666992, "learning_rate": 4.9069275176395134e-06, "loss": 0.9781, "step": 5555 }, { "epoch": 0.06776108125236129, "grad_norm": 2.071246862411499, "learning_rate": 4.9066067992302764e-06, "loss": 0.9672, "step": 5560 }, { "epoch": 0.06782201747650908, "grad_norm": 2.547616481781006, "learning_rate": 4.9062860808210395e-06, "loss": 0.8491, "step": 5565 }, { "epoch": 0.06788295370065689, "grad_norm": 2.0076513290405273, "learning_rate": 4.905965362411803e-06, "loss": 0.9738, "step": 5570 }, { "epoch": 0.0679438899248047, "grad_norm": 2.02601957321167, "learning_rate": 4.905644644002566e-06, "loss": 0.9918, "step": 5575 }, { "epoch": 0.06800482614895251, "grad_norm": 1.9831821918487549, "learning_rate": 4.905323925593329e-06, "loss": 0.8933, "step": 5580 }, { "epoch": 0.06806576237310032, "grad_norm": 2.042637348175049, "learning_rate": 4.905003207184092e-06, "loss": 0.9285, "step": 5585 }, { "epoch": 0.06812669859724813, "grad_norm": 2.0547916889190674, "learning_rate": 4.904682488774856e-06, "loss": 0.9446, "step": 5590 }, { "epoch": 0.06818763482139592, "grad_norm": 2.579834461212158, "learning_rate": 4.904361770365619e-06, "loss": 0.9775, "step": 5595 }, { "epoch": 0.06824857104554373, "grad_norm": 2.182192087173462, "learning_rate": 4.904041051956382e-06, "loss": 0.9089, "step": 5600 }, { "epoch": 0.06830950726969154, "grad_norm": 2.3873093128204346, "learning_rate": 4.903720333547146e-06, "loss": 0.9544, "step": 5605 }, { "epoch": 0.06837044349383935, "grad_norm": 2.099276065826416, "learning_rate": 4.903399615137909e-06, "loss": 0.9969, "step": 5610 }, { "epoch": 0.06843137971798716, "grad_norm": 2.0704431533813477, "learning_rate": 4.903078896728672e-06, "loss": 0.9704, "step": 5615 }, { "epoch": 0.06849231594213497, "grad_norm": 1.9915403127670288, "learning_rate": 4.902758178319436e-06, "loss": 0.9233, "step": 5620 }, { "epoch": 0.06855325216628277, "grad_norm": 2.1143558025360107, "learning_rate": 4.902437459910199e-06, "loss": 0.9941, "step": 5625 }, { "epoch": 0.06861418839043057, "grad_norm": 1.9448161125183105, "learning_rate": 4.902116741500962e-06, "loss": 0.9344, "step": 5630 }, { "epoch": 0.06867512461457838, "grad_norm": 2.019564151763916, "learning_rate": 4.901796023091726e-06, "loss": 0.9865, "step": 5635 }, { "epoch": 0.06873606083872619, "grad_norm": 3.0411031246185303, "learning_rate": 4.901475304682489e-06, "loss": 0.8556, "step": 5640 }, { "epoch": 0.068796997062874, "grad_norm": 2.219921827316284, "learning_rate": 4.901154586273252e-06, "loss": 0.9713, "step": 5645 }, { "epoch": 0.0688579332870218, "grad_norm": 1.9028618335723877, "learning_rate": 4.900833867864016e-06, "loss": 0.918, "step": 5650 }, { "epoch": 0.06891886951116961, "grad_norm": 2.1054720878601074, "learning_rate": 4.900513149454779e-06, "loss": 0.9923, "step": 5655 }, { "epoch": 0.06897980573531742, "grad_norm": 1.9352818727493286, "learning_rate": 4.900192431045543e-06, "loss": 0.8786, "step": 5660 }, { "epoch": 0.06904074195946522, "grad_norm": 1.9749253988265991, "learning_rate": 4.899871712636306e-06, "loss": 0.9483, "step": 5665 }, { "epoch": 0.06910167818361303, "grad_norm": 2.0778286457061768, "learning_rate": 4.899550994227069e-06, "loss": 0.8813, "step": 5670 }, { "epoch": 0.06916261440776084, "grad_norm": 2.4423959255218506, "learning_rate": 4.899230275817833e-06, "loss": 0.912, "step": 5675 }, { "epoch": 0.06922355063190865, "grad_norm": 1.8516321182250977, "learning_rate": 4.898909557408596e-06, "loss": 0.9747, "step": 5680 }, { "epoch": 0.06928448685605645, "grad_norm": 1.8656874895095825, "learning_rate": 4.8985888389993595e-06, "loss": 0.8786, "step": 5685 }, { "epoch": 0.06934542308020426, "grad_norm": 2.3048288822174072, "learning_rate": 4.8982681205901225e-06, "loss": 1.0132, "step": 5690 }, { "epoch": 0.06940635930435206, "grad_norm": 2.263622760772705, "learning_rate": 4.8979474021808855e-06, "loss": 0.9884, "step": 5695 }, { "epoch": 0.06946729552849987, "grad_norm": 2.505897045135498, "learning_rate": 4.897626683771649e-06, "loss": 0.9426, "step": 5700 }, { "epoch": 0.06952823175264768, "grad_norm": 1.9871858358383179, "learning_rate": 4.897305965362412e-06, "loss": 0.9037, "step": 5705 }, { "epoch": 0.06958916797679549, "grad_norm": 2.029043436050415, "learning_rate": 4.896985246953175e-06, "loss": 0.9179, "step": 5710 }, { "epoch": 0.0696501042009433, "grad_norm": 2.4915287494659424, "learning_rate": 4.896664528543939e-06, "loss": 0.9219, "step": 5715 }, { "epoch": 0.0697110404250911, "grad_norm": 2.033275842666626, "learning_rate": 4.896343810134702e-06, "loss": 1.0468, "step": 5720 }, { "epoch": 0.06977197664923891, "grad_norm": 2.1157631874084473, "learning_rate": 4.896023091725465e-06, "loss": 0.9056, "step": 5725 }, { "epoch": 0.06983291287338671, "grad_norm": 1.7507747411727905, "learning_rate": 4.895702373316229e-06, "loss": 0.9514, "step": 5730 }, { "epoch": 0.06989384909753452, "grad_norm": 2.0126051902770996, "learning_rate": 4.895381654906992e-06, "loss": 0.9594, "step": 5735 }, { "epoch": 0.06995478532168233, "grad_norm": 2.042680263519287, "learning_rate": 4.895060936497755e-06, "loss": 0.9634, "step": 5740 }, { "epoch": 0.07001572154583013, "grad_norm": 3.187479257583618, "learning_rate": 4.894740218088519e-06, "loss": 0.9632, "step": 5745 }, { "epoch": 0.07007665776997794, "grad_norm": 2.4841887950897217, "learning_rate": 4.894419499679282e-06, "loss": 0.9268, "step": 5750 }, { "epoch": 0.07013759399412575, "grad_norm": 2.085435152053833, "learning_rate": 4.894098781270045e-06, "loss": 0.9858, "step": 5755 }, { "epoch": 0.07019853021827356, "grad_norm": 3.88295578956604, "learning_rate": 4.893778062860808e-06, "loss": 1.0013, "step": 5760 }, { "epoch": 0.07025946644242136, "grad_norm": 1.8874945640563965, "learning_rate": 4.893457344451572e-06, "loss": 0.9537, "step": 5765 }, { "epoch": 0.07032040266656917, "grad_norm": 2.056675910949707, "learning_rate": 4.893136626042335e-06, "loss": 0.8948, "step": 5770 }, { "epoch": 0.07038133889071697, "grad_norm": 1.96757173538208, "learning_rate": 4.892815907633098e-06, "loss": 0.8892, "step": 5775 }, { "epoch": 0.07044227511486478, "grad_norm": 2.1282758712768555, "learning_rate": 4.892495189223862e-06, "loss": 0.9271, "step": 5780 }, { "epoch": 0.07050321133901259, "grad_norm": 2.510052442550659, "learning_rate": 4.892174470814625e-06, "loss": 0.9595, "step": 5785 }, { "epoch": 0.0705641475631604, "grad_norm": 1.9504060745239258, "learning_rate": 4.891853752405388e-06, "loss": 0.8996, "step": 5790 }, { "epoch": 0.07062508378730821, "grad_norm": 2.377829074859619, "learning_rate": 4.891533033996152e-06, "loss": 0.9197, "step": 5795 }, { "epoch": 0.070686020011456, "grad_norm": 2.296762228012085, "learning_rate": 4.891212315586915e-06, "loss": 0.9186, "step": 5800 }, { "epoch": 0.07074695623560381, "grad_norm": 2.619567632675171, "learning_rate": 4.890891597177679e-06, "loss": 0.9934, "step": 5805 }, { "epoch": 0.07080789245975162, "grad_norm": 1.9482396841049194, "learning_rate": 4.890570878768442e-06, "loss": 0.8875, "step": 5810 }, { "epoch": 0.07086882868389943, "grad_norm": 2.069843053817749, "learning_rate": 4.890250160359205e-06, "loss": 0.9287, "step": 5815 }, { "epoch": 0.07092976490804724, "grad_norm": 1.6548014879226685, "learning_rate": 4.8899294419499686e-06, "loss": 0.8822, "step": 5820 }, { "epoch": 0.07099070113219505, "grad_norm": 2.100879430770874, "learning_rate": 4.889608723540732e-06, "loss": 0.9993, "step": 5825 }, { "epoch": 0.07105163735634284, "grad_norm": 2.66702938079834, "learning_rate": 4.8892880051314954e-06, "loss": 0.9634, "step": 5830 }, { "epoch": 0.07111257358049065, "grad_norm": 2.041637659072876, "learning_rate": 4.8889672867222585e-06, "loss": 1.0103, "step": 5835 }, { "epoch": 0.07117350980463846, "grad_norm": 2.2805373668670654, "learning_rate": 4.8886465683130215e-06, "loss": 0.9377, "step": 5840 }, { "epoch": 0.07123444602878627, "grad_norm": 1.9731310606002808, "learning_rate": 4.888325849903785e-06, "loss": 0.8971, "step": 5845 }, { "epoch": 0.07129538225293408, "grad_norm": 2.0691118240356445, "learning_rate": 4.888005131494548e-06, "loss": 0.9441, "step": 5850 }, { "epoch": 0.07135631847708189, "grad_norm": 2.4567580223083496, "learning_rate": 4.887684413085311e-06, "loss": 0.9827, "step": 5855 }, { "epoch": 0.0714172547012297, "grad_norm": 2.105822801589966, "learning_rate": 4.887363694676075e-06, "loss": 0.9002, "step": 5860 }, { "epoch": 0.0714781909253775, "grad_norm": 2.1354997158050537, "learning_rate": 4.887042976266838e-06, "loss": 0.9118, "step": 5865 }, { "epoch": 0.0715391271495253, "grad_norm": 1.95277738571167, "learning_rate": 4.886722257857601e-06, "loss": 0.8822, "step": 5870 }, { "epoch": 0.07160006337367311, "grad_norm": 2.1227755546569824, "learning_rate": 4.886401539448365e-06, "loss": 0.9344, "step": 5875 }, { "epoch": 0.07166099959782092, "grad_norm": 2.5037665367126465, "learning_rate": 4.886080821039128e-06, "loss": 1.0238, "step": 5880 }, { "epoch": 0.07172193582196873, "grad_norm": 2.9445648193359375, "learning_rate": 4.885760102629891e-06, "loss": 0.9954, "step": 5885 }, { "epoch": 0.07178287204611654, "grad_norm": 2.250157594680786, "learning_rate": 4.885439384220655e-06, "loss": 0.9299, "step": 5890 }, { "epoch": 0.07184380827026435, "grad_norm": 1.691392421722412, "learning_rate": 4.885118665811418e-06, "loss": 0.8741, "step": 5895 }, { "epoch": 0.07190474449441214, "grad_norm": 1.8591121435165405, "learning_rate": 4.884797947402181e-06, "loss": 0.9365, "step": 5900 }, { "epoch": 0.07196568071855995, "grad_norm": 2.3170573711395264, "learning_rate": 4.884477228992945e-06, "loss": 0.9754, "step": 5905 }, { "epoch": 0.07202661694270776, "grad_norm": 2.3220057487487793, "learning_rate": 4.884156510583708e-06, "loss": 0.9095, "step": 5910 }, { "epoch": 0.07208755316685557, "grad_norm": 2.4482905864715576, "learning_rate": 4.883835792174471e-06, "loss": 0.9448, "step": 5915 }, { "epoch": 0.07214848939100338, "grad_norm": 1.8132418394088745, "learning_rate": 4.883515073765234e-06, "loss": 0.949, "step": 5920 }, { "epoch": 0.07220942561515119, "grad_norm": 1.8171226978302002, "learning_rate": 4.883194355355998e-06, "loss": 0.8578, "step": 5925 }, { "epoch": 0.072270361839299, "grad_norm": 2.5577595233917236, "learning_rate": 4.882873636946761e-06, "loss": 0.9053, "step": 5930 }, { "epoch": 0.07233129806344679, "grad_norm": 2.2908802032470703, "learning_rate": 4.882552918537524e-06, "loss": 0.9132, "step": 5935 }, { "epoch": 0.0723922342875946, "grad_norm": 2.061593770980835, "learning_rate": 4.882232200128288e-06, "loss": 0.942, "step": 5940 }, { "epoch": 0.07245317051174241, "grad_norm": 1.8257638216018677, "learning_rate": 4.881911481719051e-06, "loss": 0.917, "step": 5945 }, { "epoch": 0.07251410673589022, "grad_norm": 1.8867508172988892, "learning_rate": 4.881590763309814e-06, "loss": 0.9091, "step": 5950 }, { "epoch": 0.07257504296003803, "grad_norm": 2.063039779663086, "learning_rate": 4.881270044900578e-06, "loss": 0.8948, "step": 5955 }, { "epoch": 0.07263597918418584, "grad_norm": 2.1148252487182617, "learning_rate": 4.880949326491341e-06, "loss": 0.8989, "step": 5960 }, { "epoch": 0.07269691540833363, "grad_norm": 2.3737564086914062, "learning_rate": 4.8806286080821045e-06, "loss": 0.9379, "step": 5965 }, { "epoch": 0.07275785163248144, "grad_norm": 2.0330874919891357, "learning_rate": 4.8803078896728675e-06, "loss": 0.8909, "step": 5970 }, { "epoch": 0.07281878785662925, "grad_norm": 2.3606760501861572, "learning_rate": 4.8799871712636306e-06, "loss": 0.9183, "step": 5975 }, { "epoch": 0.07287972408077706, "grad_norm": 2.4035112857818604, "learning_rate": 4.879666452854394e-06, "loss": 0.968, "step": 5980 }, { "epoch": 0.07294066030492487, "grad_norm": 1.5972390174865723, "learning_rate": 4.8793457344451574e-06, "loss": 0.8724, "step": 5985 }, { "epoch": 0.07300159652907268, "grad_norm": 1.852688193321228, "learning_rate": 4.879025016035921e-06, "loss": 0.862, "step": 5990 }, { "epoch": 0.07306253275322049, "grad_norm": 2.3141932487487793, "learning_rate": 4.878704297626684e-06, "loss": 0.9134, "step": 5995 }, { "epoch": 0.07312346897736828, "grad_norm": 2.7298431396484375, "learning_rate": 4.878383579217447e-06, "loss": 0.9314, "step": 6000 }, { "epoch": 0.07318440520151609, "grad_norm": 2.044787883758545, "learning_rate": 4.878062860808211e-06, "loss": 0.9747, "step": 6005 }, { "epoch": 0.0732453414256639, "grad_norm": 2.011598825454712, "learning_rate": 4.877742142398974e-06, "loss": 0.9663, "step": 6010 }, { "epoch": 0.07330627764981171, "grad_norm": 1.9038563966751099, "learning_rate": 4.877421423989737e-06, "loss": 0.9045, "step": 6015 }, { "epoch": 0.07336721387395952, "grad_norm": 1.9643322229385376, "learning_rate": 4.877100705580501e-06, "loss": 0.9028, "step": 6020 }, { "epoch": 0.07342815009810733, "grad_norm": 2.280993938446045, "learning_rate": 4.876779987171264e-06, "loss": 0.9214, "step": 6025 }, { "epoch": 0.07348908632225513, "grad_norm": 2.0054423809051514, "learning_rate": 4.876459268762027e-06, "loss": 0.8931, "step": 6030 }, { "epoch": 0.07355002254640293, "grad_norm": 2.2734999656677246, "learning_rate": 4.876138550352791e-06, "loss": 0.9034, "step": 6035 }, { "epoch": 0.07361095877055074, "grad_norm": 2.075723171234131, "learning_rate": 4.875817831943554e-06, "loss": 0.9707, "step": 6040 }, { "epoch": 0.07367189499469855, "grad_norm": 2.215965986251831, "learning_rate": 4.875497113534317e-06, "loss": 0.8916, "step": 6045 }, { "epoch": 0.07373283121884636, "grad_norm": 2.103832960128784, "learning_rate": 4.875176395125081e-06, "loss": 0.8927, "step": 6050 }, { "epoch": 0.07379376744299417, "grad_norm": 2.4160139560699463, "learning_rate": 4.874855676715844e-06, "loss": 0.977, "step": 6055 }, { "epoch": 0.07385470366714197, "grad_norm": 1.8669407367706299, "learning_rate": 4.874534958306607e-06, "loss": 0.9289, "step": 6060 }, { "epoch": 0.07391563989128977, "grad_norm": 1.6428558826446533, "learning_rate": 4.874214239897371e-06, "loss": 0.9357, "step": 6065 }, { "epoch": 0.07397657611543758, "grad_norm": 2.045236587524414, "learning_rate": 4.873893521488134e-06, "loss": 1.0226, "step": 6070 }, { "epoch": 0.07403751233958539, "grad_norm": 2.2175097465515137, "learning_rate": 4.873572803078897e-06, "loss": 0.9854, "step": 6075 }, { "epoch": 0.0740984485637332, "grad_norm": 2.3538291454315186, "learning_rate": 4.873252084669661e-06, "loss": 0.8974, "step": 6080 }, { "epoch": 0.074159384787881, "grad_norm": 2.0438039302825928, "learning_rate": 4.872931366260424e-06, "loss": 0.9737, "step": 6085 }, { "epoch": 0.07422032101202881, "grad_norm": 2.105994939804077, "learning_rate": 4.872610647851187e-06, "loss": 0.9199, "step": 6090 }, { "epoch": 0.07428125723617662, "grad_norm": 2.0574491024017334, "learning_rate": 4.87228992944195e-06, "loss": 0.8887, "step": 6095 }, { "epoch": 0.07434219346032442, "grad_norm": 1.754936933517456, "learning_rate": 4.871969211032714e-06, "loss": 0.9969, "step": 6100 }, { "epoch": 0.07440312968447223, "grad_norm": 1.9501475095748901, "learning_rate": 4.871648492623477e-06, "loss": 0.8802, "step": 6105 }, { "epoch": 0.07446406590862004, "grad_norm": 2.2370874881744385, "learning_rate": 4.8713277742142405e-06, "loss": 0.8972, "step": 6110 }, { "epoch": 0.07452500213276785, "grad_norm": 2.410597562789917, "learning_rate": 4.8710070558050035e-06, "loss": 0.9145, "step": 6115 }, { "epoch": 0.07458593835691565, "grad_norm": 2.155970573425293, "learning_rate": 4.8706863373957665e-06, "loss": 0.9388, "step": 6120 }, { "epoch": 0.07464687458106346, "grad_norm": 2.494917869567871, "learning_rate": 4.87036561898653e-06, "loss": 0.9503, "step": 6125 }, { "epoch": 0.07470781080521127, "grad_norm": 1.8307732343673706, "learning_rate": 4.870044900577293e-06, "loss": 0.9698, "step": 6130 }, { "epoch": 0.07476874702935907, "grad_norm": 2.125645875930786, "learning_rate": 4.869724182168057e-06, "loss": 0.9759, "step": 6135 }, { "epoch": 0.07482968325350688, "grad_norm": 2.3660974502563477, "learning_rate": 4.86940346375882e-06, "loss": 0.8708, "step": 6140 }, { "epoch": 0.07489061947765469, "grad_norm": 2.0266199111938477, "learning_rate": 4.869082745349583e-06, "loss": 0.9571, "step": 6145 }, { "epoch": 0.0749515557018025, "grad_norm": 2.2203450202941895, "learning_rate": 4.868762026940347e-06, "loss": 0.9645, "step": 6150 }, { "epoch": 0.0750124919259503, "grad_norm": 1.8021390438079834, "learning_rate": 4.86844130853111e-06, "loss": 0.9635, "step": 6155 }, { "epoch": 0.07507342815009811, "grad_norm": 1.8062379360198975, "learning_rate": 4.868120590121874e-06, "loss": 0.9415, "step": 6160 }, { "epoch": 0.07513436437424592, "grad_norm": 1.8886853456497192, "learning_rate": 4.867799871712637e-06, "loss": 0.9121, "step": 6165 }, { "epoch": 0.07519530059839372, "grad_norm": 2.335338592529297, "learning_rate": 4.8674791533034e-06, "loss": 0.8749, "step": 6170 }, { "epoch": 0.07525623682254153, "grad_norm": 2.214508533477783, "learning_rate": 4.867158434894163e-06, "loss": 0.9646, "step": 6175 }, { "epoch": 0.07531717304668933, "grad_norm": 2.2471530437469482, "learning_rate": 4.866837716484927e-06, "loss": 0.9532, "step": 6180 }, { "epoch": 0.07537810927083714, "grad_norm": 2.2621874809265137, "learning_rate": 4.86651699807569e-06, "loss": 0.9273, "step": 6185 }, { "epoch": 0.07543904549498495, "grad_norm": 2.0785531997680664, "learning_rate": 4.866196279666453e-06, "loss": 0.9623, "step": 6190 }, { "epoch": 0.07549998171913276, "grad_norm": 2.1352055072784424, "learning_rate": 4.865875561257217e-06, "loss": 0.9422, "step": 6195 }, { "epoch": 0.07556091794328056, "grad_norm": 2.0234503746032715, "learning_rate": 4.86555484284798e-06, "loss": 0.9267, "step": 6200 }, { "epoch": 0.07562185416742837, "grad_norm": 2.4233829975128174, "learning_rate": 4.865234124438743e-06, "loss": 0.9563, "step": 6205 }, { "epoch": 0.07568279039157617, "grad_norm": 1.9574421644210815, "learning_rate": 4.864913406029507e-06, "loss": 0.9908, "step": 6210 }, { "epoch": 0.07574372661572398, "grad_norm": 2.031392812728882, "learning_rate": 4.86459268762027e-06, "loss": 0.8936, "step": 6215 }, { "epoch": 0.07580466283987179, "grad_norm": 2.2192978858947754, "learning_rate": 4.864271969211033e-06, "loss": 0.9014, "step": 6220 }, { "epoch": 0.0758655990640196, "grad_norm": 2.249377727508545, "learning_rate": 4.863951250801797e-06, "loss": 0.9481, "step": 6225 }, { "epoch": 0.07592653528816741, "grad_norm": 1.9473501443862915, "learning_rate": 4.86363053239256e-06, "loss": 0.8906, "step": 6230 }, { "epoch": 0.0759874715123152, "grad_norm": 1.9548888206481934, "learning_rate": 4.863309813983323e-06, "loss": 1.0095, "step": 6235 }, { "epoch": 0.07604840773646301, "grad_norm": 2.1365411281585693, "learning_rate": 4.8629890955740865e-06, "loss": 0.9382, "step": 6240 }, { "epoch": 0.07610934396061082, "grad_norm": 1.8369001150131226, "learning_rate": 4.8626683771648496e-06, "loss": 0.9316, "step": 6245 }, { "epoch": 0.07617028018475863, "grad_norm": 2.339127540588379, "learning_rate": 4.8623476587556126e-06, "loss": 0.9374, "step": 6250 }, { "epoch": 0.07623121640890644, "grad_norm": 2.127126693725586, "learning_rate": 4.8620269403463764e-06, "loss": 0.8973, "step": 6255 }, { "epoch": 0.07629215263305425, "grad_norm": 2.194749593734741, "learning_rate": 4.8617062219371395e-06, "loss": 0.9472, "step": 6260 }, { "epoch": 0.07635308885720206, "grad_norm": 2.1761484146118164, "learning_rate": 4.8613855035279025e-06, "loss": 0.9692, "step": 6265 }, { "epoch": 0.07641402508134985, "grad_norm": 2.1947739124298096, "learning_rate": 4.861064785118666e-06, "loss": 1.0081, "step": 6270 }, { "epoch": 0.07647496130549766, "grad_norm": 1.9757435321807861, "learning_rate": 4.860744066709429e-06, "loss": 0.8762, "step": 6275 }, { "epoch": 0.07653589752964547, "grad_norm": 2.3992955684661865, "learning_rate": 4.860423348300193e-06, "loss": 0.9681, "step": 6280 }, { "epoch": 0.07659683375379328, "grad_norm": 1.8243988752365112, "learning_rate": 4.860102629890956e-06, "loss": 0.9121, "step": 6285 }, { "epoch": 0.07665776997794109, "grad_norm": 2.053520679473877, "learning_rate": 4.859781911481719e-06, "loss": 0.9312, "step": 6290 }, { "epoch": 0.0767187062020889, "grad_norm": 1.8739417791366577, "learning_rate": 4.859461193072483e-06, "loss": 0.9099, "step": 6295 }, { "epoch": 0.0767796424262367, "grad_norm": 2.114243507385254, "learning_rate": 4.859140474663246e-06, "loss": 0.9586, "step": 6300 }, { "epoch": 0.0768405786503845, "grad_norm": 2.1254639625549316, "learning_rate": 4.85881975625401e-06, "loss": 0.9706, "step": 6305 }, { "epoch": 0.07690151487453231, "grad_norm": 2.531479835510254, "learning_rate": 4.858499037844773e-06, "loss": 0.9826, "step": 6310 }, { "epoch": 0.07696245109868012, "grad_norm": 1.9453142881393433, "learning_rate": 4.858178319435536e-06, "loss": 0.977, "step": 6315 }, { "epoch": 0.07702338732282793, "grad_norm": 2.1653189659118652, "learning_rate": 4.8578576010263e-06, "loss": 0.9575, "step": 6320 }, { "epoch": 0.07708432354697574, "grad_norm": 1.990473747253418, "learning_rate": 4.857536882617063e-06, "loss": 0.997, "step": 6325 }, { "epoch": 0.07714525977112355, "grad_norm": 2.23555064201355, "learning_rate": 4.857216164207826e-06, "loss": 0.8522, "step": 6330 }, { "epoch": 0.07720619599527134, "grad_norm": 2.0460479259490967, "learning_rate": 4.856895445798589e-06, "loss": 0.9119, "step": 6335 }, { "epoch": 0.07726713221941915, "grad_norm": 1.996715784072876, "learning_rate": 4.856574727389353e-06, "loss": 0.9396, "step": 6340 }, { "epoch": 0.07732806844356696, "grad_norm": 1.8032236099243164, "learning_rate": 4.856254008980116e-06, "loss": 0.9111, "step": 6345 }, { "epoch": 0.07738900466771477, "grad_norm": 2.2316486835479736, "learning_rate": 4.855933290570879e-06, "loss": 0.9977, "step": 6350 }, { "epoch": 0.07744994089186258, "grad_norm": 2.190032958984375, "learning_rate": 4.855612572161643e-06, "loss": 0.9394, "step": 6355 }, { "epoch": 0.07751087711601039, "grad_norm": 2.118587017059326, "learning_rate": 4.855291853752406e-06, "loss": 0.9776, "step": 6360 }, { "epoch": 0.0775718133401582, "grad_norm": 2.060887336730957, "learning_rate": 4.854971135343169e-06, "loss": 0.9911, "step": 6365 }, { "epoch": 0.07763274956430599, "grad_norm": 2.7689716815948486, "learning_rate": 4.854650416933933e-06, "loss": 0.9732, "step": 6370 }, { "epoch": 0.0776936857884538, "grad_norm": 2.2243740558624268, "learning_rate": 4.854329698524696e-06, "loss": 0.9429, "step": 6375 }, { "epoch": 0.07775462201260161, "grad_norm": 2.2645113468170166, "learning_rate": 4.854008980115459e-06, "loss": 0.961, "step": 6380 }, { "epoch": 0.07781555823674942, "grad_norm": 1.9041110277175903, "learning_rate": 4.8536882617062225e-06, "loss": 0.9354, "step": 6385 }, { "epoch": 0.07787649446089723, "grad_norm": 2.13134503364563, "learning_rate": 4.8533675432969855e-06, "loss": 1.041, "step": 6390 }, { "epoch": 0.07793743068504504, "grad_norm": 2.1649110317230225, "learning_rate": 4.8530468248877485e-06, "loss": 0.9383, "step": 6395 }, { "epoch": 0.07799836690919285, "grad_norm": 1.9823123216629028, "learning_rate": 4.852726106478512e-06, "loss": 0.8977, "step": 6400 }, { "epoch": 0.07805930313334064, "grad_norm": 2.203291654586792, "learning_rate": 4.852405388069275e-06, "loss": 0.977, "step": 6405 }, { "epoch": 0.07812023935748845, "grad_norm": 1.7971915006637573, "learning_rate": 4.8520846696600384e-06, "loss": 0.9218, "step": 6410 }, { "epoch": 0.07818117558163626, "grad_norm": 2.1926238536834717, "learning_rate": 4.851763951250802e-06, "loss": 0.9368, "step": 6415 }, { "epoch": 0.07824211180578407, "grad_norm": 1.9904050827026367, "learning_rate": 4.851443232841565e-06, "loss": 0.9516, "step": 6420 }, { "epoch": 0.07830304802993188, "grad_norm": 1.9998832941055298, "learning_rate": 4.851122514432328e-06, "loss": 0.9411, "step": 6425 }, { "epoch": 0.07836398425407969, "grad_norm": 2.073882818222046, "learning_rate": 4.850801796023092e-06, "loss": 0.89, "step": 6430 }, { "epoch": 0.07842492047822748, "grad_norm": 2.128140687942505, "learning_rate": 4.850481077613855e-06, "loss": 0.9143, "step": 6435 }, { "epoch": 0.07848585670237529, "grad_norm": 2.195711135864258, "learning_rate": 4.850160359204619e-06, "loss": 0.8983, "step": 6440 }, { "epoch": 0.0785467929265231, "grad_norm": 2.0700173377990723, "learning_rate": 4.849839640795382e-06, "loss": 0.9628, "step": 6445 }, { "epoch": 0.07860772915067091, "grad_norm": 2.0421550273895264, "learning_rate": 4.849518922386145e-06, "loss": 0.9895, "step": 6450 }, { "epoch": 0.07866866537481872, "grad_norm": 1.8964378833770752, "learning_rate": 4.849198203976909e-06, "loss": 0.8588, "step": 6455 }, { "epoch": 0.07872960159896653, "grad_norm": 1.8768057823181152, "learning_rate": 4.848877485567672e-06, "loss": 0.93, "step": 6460 }, { "epoch": 0.07879053782311433, "grad_norm": 2.238158941268921, "learning_rate": 4.848556767158436e-06, "loss": 0.9286, "step": 6465 }, { "epoch": 0.07885147404726213, "grad_norm": 2.0766067504882812, "learning_rate": 4.848236048749199e-06, "loss": 0.9534, "step": 6470 }, { "epoch": 0.07891241027140994, "grad_norm": 2.1413984298706055, "learning_rate": 4.847915330339962e-06, "loss": 0.9166, "step": 6475 }, { "epoch": 0.07897334649555775, "grad_norm": 1.8705217838287354, "learning_rate": 4.847594611930726e-06, "loss": 0.9825, "step": 6480 }, { "epoch": 0.07903428271970556, "grad_norm": 1.7580273151397705, "learning_rate": 4.847273893521489e-06, "loss": 0.908, "step": 6485 }, { "epoch": 0.07909521894385337, "grad_norm": 2.0084216594696045, "learning_rate": 4.846953175112252e-06, "loss": 0.9545, "step": 6490 }, { "epoch": 0.07915615516800117, "grad_norm": 2.157071352005005, "learning_rate": 4.846632456703016e-06, "loss": 0.9637, "step": 6495 }, { "epoch": 0.07921709139214898, "grad_norm": 2.3469314575195312, "learning_rate": 4.846311738293779e-06, "loss": 0.9813, "step": 6500 }, { "epoch": 0.07927802761629678, "grad_norm": 2.0536954402923584, "learning_rate": 4.845991019884542e-06, "loss": 0.9297, "step": 6505 }, { "epoch": 0.07933896384044459, "grad_norm": 2.1143805980682373, "learning_rate": 4.845670301475305e-06, "loss": 0.8607, "step": 6510 }, { "epoch": 0.0793999000645924, "grad_norm": 1.9045823812484741, "learning_rate": 4.8453495830660686e-06, "loss": 0.9221, "step": 6515 }, { "epoch": 0.0794608362887402, "grad_norm": 2.1854519844055176, "learning_rate": 4.8450288646568316e-06, "loss": 0.8504, "step": 6520 }, { "epoch": 0.07952177251288801, "grad_norm": 2.0399694442749023, "learning_rate": 4.844708146247595e-06, "loss": 0.9412, "step": 6525 }, { "epoch": 0.07958270873703582, "grad_norm": 1.8649539947509766, "learning_rate": 4.8443874278383585e-06, "loss": 0.9825, "step": 6530 }, { "epoch": 0.07964364496118362, "grad_norm": 1.794996976852417, "learning_rate": 4.8440667094291215e-06, "loss": 0.9439, "step": 6535 }, { "epoch": 0.07970458118533143, "grad_norm": 2.0114805698394775, "learning_rate": 4.8437459910198845e-06, "loss": 0.8907, "step": 6540 }, { "epoch": 0.07976551740947924, "grad_norm": 2.1456496715545654, "learning_rate": 4.843425272610648e-06, "loss": 0.9474, "step": 6545 }, { "epoch": 0.07982645363362705, "grad_norm": 1.793847918510437, "learning_rate": 4.843104554201411e-06, "loss": 0.9066, "step": 6550 }, { "epoch": 0.07988738985777485, "grad_norm": 2.3565006256103516, "learning_rate": 4.842783835792174e-06, "loss": 0.9396, "step": 6555 }, { "epoch": 0.07994832608192266, "grad_norm": 1.9100393056869507, "learning_rate": 4.842463117382938e-06, "loss": 0.941, "step": 6560 }, { "epoch": 0.08000926230607047, "grad_norm": 2.1562438011169434, "learning_rate": 4.842142398973701e-06, "loss": 0.8769, "step": 6565 }, { "epoch": 0.08007019853021827, "grad_norm": 2.394960880279541, "learning_rate": 4.841821680564464e-06, "loss": 0.9752, "step": 6570 }, { "epoch": 0.08013113475436608, "grad_norm": 2.0163116455078125, "learning_rate": 4.841500962155228e-06, "loss": 0.8909, "step": 6575 }, { "epoch": 0.08019207097851389, "grad_norm": 2.048614025115967, "learning_rate": 4.841180243745991e-06, "loss": 0.9341, "step": 6580 }, { "epoch": 0.0802530072026617, "grad_norm": 1.998561978340149, "learning_rate": 4.840859525336755e-06, "loss": 0.9344, "step": 6585 }, { "epoch": 0.0803139434268095, "grad_norm": 2.284200668334961, "learning_rate": 4.840538806927518e-06, "loss": 0.9767, "step": 6590 }, { "epoch": 0.08037487965095731, "grad_norm": 2.6436097621917725, "learning_rate": 4.840218088518281e-06, "loss": 0.9704, "step": 6595 }, { "epoch": 0.08043581587510512, "grad_norm": 2.2672598361968994, "learning_rate": 4.839897370109045e-06, "loss": 0.9896, "step": 6600 }, { "epoch": 0.08049675209925292, "grad_norm": 2.0784504413604736, "learning_rate": 4.839576651699808e-06, "loss": 0.8664, "step": 6605 }, { "epoch": 0.08055768832340073, "grad_norm": 1.9849109649658203, "learning_rate": 4.839255933290572e-06, "loss": 1.0324, "step": 6610 }, { "epoch": 0.08061862454754853, "grad_norm": 2.285675287246704, "learning_rate": 4.838935214881335e-06, "loss": 0.9642, "step": 6615 }, { "epoch": 0.08067956077169634, "grad_norm": 2.031477689743042, "learning_rate": 4.838614496472098e-06, "loss": 0.891, "step": 6620 }, { "epoch": 0.08074049699584415, "grad_norm": 2.31709361076355, "learning_rate": 4.838293778062862e-06, "loss": 0.9535, "step": 6625 }, { "epoch": 0.08080143321999196, "grad_norm": 1.83560049533844, "learning_rate": 4.837973059653625e-06, "loss": 0.9062, "step": 6630 }, { "epoch": 0.08086236944413977, "grad_norm": 1.7588400840759277, "learning_rate": 4.837652341244388e-06, "loss": 0.8611, "step": 6635 }, { "epoch": 0.08092330566828757, "grad_norm": 2.2519514560699463, "learning_rate": 4.837331622835152e-06, "loss": 0.9843, "step": 6640 }, { "epoch": 0.08098424189243537, "grad_norm": 1.9540269374847412, "learning_rate": 4.837010904425915e-06, "loss": 0.8984, "step": 6645 }, { "epoch": 0.08104517811658318, "grad_norm": 1.9172619581222534, "learning_rate": 4.836690186016678e-06, "loss": 0.8871, "step": 6650 }, { "epoch": 0.08110611434073099, "grad_norm": 2.0083110332489014, "learning_rate": 4.8363694676074415e-06, "loss": 0.886, "step": 6655 }, { "epoch": 0.0811670505648788, "grad_norm": 2.222327947616577, "learning_rate": 4.8360487491982045e-06, "loss": 0.987, "step": 6660 }, { "epoch": 0.08122798678902661, "grad_norm": 2.073199510574341, "learning_rate": 4.8357280307889675e-06, "loss": 0.9381, "step": 6665 }, { "epoch": 0.0812889230131744, "grad_norm": 2.2343034744262695, "learning_rate": 4.835407312379731e-06, "loss": 0.9251, "step": 6670 }, { "epoch": 0.08134985923732221, "grad_norm": 2.130352258682251, "learning_rate": 4.835086593970494e-06, "loss": 0.9177, "step": 6675 }, { "epoch": 0.08141079546147002, "grad_norm": 2.0435919761657715, "learning_rate": 4.8347658755612574e-06, "loss": 0.9624, "step": 6680 }, { "epoch": 0.08147173168561783, "grad_norm": 2.066668748855591, "learning_rate": 4.8344451571520204e-06, "loss": 0.8904, "step": 6685 }, { "epoch": 0.08153266790976564, "grad_norm": 2.9028432369232178, "learning_rate": 4.834124438742784e-06, "loss": 0.9742, "step": 6690 }, { "epoch": 0.08159360413391345, "grad_norm": 1.832720398902893, "learning_rate": 4.833803720333547e-06, "loss": 0.9387, "step": 6695 }, { "epoch": 0.08165454035806126, "grad_norm": 1.8992513418197632, "learning_rate": 4.83348300192431e-06, "loss": 0.9588, "step": 6700 }, { "epoch": 0.08171547658220905, "grad_norm": 2.0749385356903076, "learning_rate": 4.833162283515074e-06, "loss": 0.9247, "step": 6705 }, { "epoch": 0.08177641280635686, "grad_norm": 2.207242250442505, "learning_rate": 4.832841565105837e-06, "loss": 0.9647, "step": 6710 }, { "epoch": 0.08183734903050467, "grad_norm": 2.0873661041259766, "learning_rate": 4.8325208466966e-06, "loss": 0.973, "step": 6715 }, { "epoch": 0.08189828525465248, "grad_norm": 2.1332056522369385, "learning_rate": 4.832200128287364e-06, "loss": 0.9398, "step": 6720 }, { "epoch": 0.08195922147880029, "grad_norm": 2.2976720333099365, "learning_rate": 4.831879409878127e-06, "loss": 0.9493, "step": 6725 }, { "epoch": 0.0820201577029481, "grad_norm": 2.6296842098236084, "learning_rate": 4.831558691468891e-06, "loss": 0.9164, "step": 6730 }, { "epoch": 0.08208109392709591, "grad_norm": 2.039426565170288, "learning_rate": 4.831237973059654e-06, "loss": 0.8993, "step": 6735 }, { "epoch": 0.0821420301512437, "grad_norm": 1.961220383644104, "learning_rate": 4.830917254650417e-06, "loss": 0.9186, "step": 6740 }, { "epoch": 0.08220296637539151, "grad_norm": 2.1695544719696045, "learning_rate": 4.830596536241181e-06, "loss": 0.9421, "step": 6745 }, { "epoch": 0.08226390259953932, "grad_norm": 2.3010709285736084, "learning_rate": 4.830275817831944e-06, "loss": 0.8014, "step": 6750 }, { "epoch": 0.08232483882368713, "grad_norm": 2.2804148197174072, "learning_rate": 4.829955099422708e-06, "loss": 0.9578, "step": 6755 }, { "epoch": 0.08238577504783494, "grad_norm": 2.1143274307250977, "learning_rate": 4.829634381013471e-06, "loss": 0.9166, "step": 6760 }, { "epoch": 0.08244671127198275, "grad_norm": 2.152611017227173, "learning_rate": 4.829313662604234e-06, "loss": 0.9448, "step": 6765 }, { "epoch": 0.08250764749613054, "grad_norm": 1.863962173461914, "learning_rate": 4.828992944194998e-06, "loss": 0.8968, "step": 6770 }, { "epoch": 0.08256858372027835, "grad_norm": 2.0519895553588867, "learning_rate": 4.828672225785761e-06, "loss": 0.8996, "step": 6775 }, { "epoch": 0.08262951994442616, "grad_norm": 2.1846346855163574, "learning_rate": 4.828351507376524e-06, "loss": 0.9267, "step": 6780 }, { "epoch": 0.08269045616857397, "grad_norm": 2.1790270805358887, "learning_rate": 4.8280307889672876e-06, "loss": 0.9482, "step": 6785 }, { "epoch": 0.08275139239272178, "grad_norm": 2.043017864227295, "learning_rate": 4.8277100705580506e-06, "loss": 0.9601, "step": 6790 }, { "epoch": 0.08281232861686959, "grad_norm": 2.3086771965026855, "learning_rate": 4.827389352148814e-06, "loss": 0.9577, "step": 6795 }, { "epoch": 0.0828732648410174, "grad_norm": 2.046595573425293, "learning_rate": 4.8270686337395774e-06, "loss": 0.8949, "step": 6800 }, { "epoch": 0.08293420106516519, "grad_norm": 2.8353049755096436, "learning_rate": 4.8267479153303405e-06, "loss": 0.9152, "step": 6805 }, { "epoch": 0.082995137289313, "grad_norm": 2.4530153274536133, "learning_rate": 4.8264271969211035e-06, "loss": 0.9275, "step": 6810 }, { "epoch": 0.08305607351346081, "grad_norm": 2.3794615268707275, "learning_rate": 4.826106478511867e-06, "loss": 0.9823, "step": 6815 }, { "epoch": 0.08311700973760862, "grad_norm": 1.8662110567092896, "learning_rate": 4.82578576010263e-06, "loss": 0.8793, "step": 6820 }, { "epoch": 0.08317794596175643, "grad_norm": 2.1208302974700928, "learning_rate": 4.825465041693393e-06, "loss": 0.8914, "step": 6825 }, { "epoch": 0.08323888218590424, "grad_norm": 2.164259672164917, "learning_rate": 4.825144323284157e-06, "loss": 0.8983, "step": 6830 }, { "epoch": 0.08329981841005205, "grad_norm": 1.9149514436721802, "learning_rate": 4.82482360487492e-06, "loss": 0.8891, "step": 6835 }, { "epoch": 0.08336075463419984, "grad_norm": 2.0124576091766357, "learning_rate": 4.824502886465683e-06, "loss": 0.8385, "step": 6840 }, { "epoch": 0.08342169085834765, "grad_norm": 2.0276834964752197, "learning_rate": 4.824182168056446e-06, "loss": 0.9874, "step": 6845 }, { "epoch": 0.08348262708249546, "grad_norm": 2.084742307662964, "learning_rate": 4.82386144964721e-06, "loss": 0.9164, "step": 6850 }, { "epoch": 0.08354356330664327, "grad_norm": 2.3387086391448975, "learning_rate": 4.823540731237973e-06, "loss": 0.9114, "step": 6855 }, { "epoch": 0.08360449953079108, "grad_norm": 2.335552930831909, "learning_rate": 4.823220012828736e-06, "loss": 1.0341, "step": 6860 }, { "epoch": 0.08366543575493889, "grad_norm": 1.8856899738311768, "learning_rate": 4.8228992944195e-06, "loss": 0.9899, "step": 6865 }, { "epoch": 0.0837263719790867, "grad_norm": 2.08134388923645, "learning_rate": 4.822578576010263e-06, "loss": 0.9433, "step": 6870 }, { "epoch": 0.08378730820323449, "grad_norm": 2.318678855895996, "learning_rate": 4.822257857601026e-06, "loss": 0.9841, "step": 6875 }, { "epoch": 0.0838482444273823, "grad_norm": 2.2905099391937256, "learning_rate": 4.82193713919179e-06, "loss": 0.9579, "step": 6880 }, { "epoch": 0.08390918065153011, "grad_norm": 2.11892032623291, "learning_rate": 4.821616420782553e-06, "loss": 1.0517, "step": 6885 }, { "epoch": 0.08397011687567792, "grad_norm": 1.7290277481079102, "learning_rate": 4.821295702373317e-06, "loss": 0.8158, "step": 6890 }, { "epoch": 0.08403105309982573, "grad_norm": 2.1645572185516357, "learning_rate": 4.82097498396408e-06, "loss": 0.9729, "step": 6895 }, { "epoch": 0.08409198932397353, "grad_norm": 2.315171003341675, "learning_rate": 4.820654265554843e-06, "loss": 0.9209, "step": 6900 }, { "epoch": 0.08415292554812133, "grad_norm": 2.2783122062683105, "learning_rate": 4.820333547145607e-06, "loss": 0.9126, "step": 6905 }, { "epoch": 0.08421386177226914, "grad_norm": 1.9721159934997559, "learning_rate": 4.82001282873637e-06, "loss": 0.9082, "step": 6910 }, { "epoch": 0.08427479799641695, "grad_norm": 2.634104013442993, "learning_rate": 4.819692110327134e-06, "loss": 0.8814, "step": 6915 }, { "epoch": 0.08433573422056476, "grad_norm": 1.9771839380264282, "learning_rate": 4.819371391917897e-06, "loss": 0.965, "step": 6920 }, { "epoch": 0.08439667044471257, "grad_norm": 2.013723373413086, "learning_rate": 4.81905067350866e-06, "loss": 0.8709, "step": 6925 }, { "epoch": 0.08445760666886037, "grad_norm": 2.351365804672241, "learning_rate": 4.8187299550994235e-06, "loss": 0.9043, "step": 6930 }, { "epoch": 0.08451854289300818, "grad_norm": 2.339461088180542, "learning_rate": 4.8184092366901865e-06, "loss": 0.9493, "step": 6935 }, { "epoch": 0.08457947911715598, "grad_norm": 1.9695179462432861, "learning_rate": 4.8180885182809495e-06, "loss": 0.9048, "step": 6940 }, { "epoch": 0.08464041534130379, "grad_norm": 1.876617431640625, "learning_rate": 4.817767799871713e-06, "loss": 0.9183, "step": 6945 }, { "epoch": 0.0847013515654516, "grad_norm": 2.405707597732544, "learning_rate": 4.8174470814624764e-06, "loss": 0.9969, "step": 6950 }, { "epoch": 0.0847622877895994, "grad_norm": 2.251875877380371, "learning_rate": 4.8171263630532394e-06, "loss": 0.9665, "step": 6955 }, { "epoch": 0.08482322401374721, "grad_norm": 2.1219818592071533, "learning_rate": 4.816805644644003e-06, "loss": 0.8937, "step": 6960 }, { "epoch": 0.08488416023789502, "grad_norm": 2.0210931301116943, "learning_rate": 4.816484926234766e-06, "loss": 0.8616, "step": 6965 }, { "epoch": 0.08494509646204283, "grad_norm": 2.6594650745391846, "learning_rate": 4.816164207825529e-06, "loss": 0.9432, "step": 6970 }, { "epoch": 0.08500603268619063, "grad_norm": 2.045768976211548, "learning_rate": 4.815843489416293e-06, "loss": 0.978, "step": 6975 }, { "epoch": 0.08506696891033844, "grad_norm": 2.024988889694214, "learning_rate": 4.815522771007056e-06, "loss": 0.9222, "step": 6980 }, { "epoch": 0.08512790513448625, "grad_norm": 2.04422926902771, "learning_rate": 4.815202052597819e-06, "loss": 0.9115, "step": 6985 }, { "epoch": 0.08518884135863405, "grad_norm": 2.4125137329101562, "learning_rate": 4.814881334188583e-06, "loss": 0.9548, "step": 6990 }, { "epoch": 0.08524977758278186, "grad_norm": 1.8670135736465454, "learning_rate": 4.814560615779346e-06, "loss": 0.9471, "step": 6995 }, { "epoch": 0.08531071380692967, "grad_norm": 2.4212558269500732, "learning_rate": 4.814239897370109e-06, "loss": 0.922, "step": 7000 }, { "epoch": 0.08537165003107748, "grad_norm": 2.085561513900757, "learning_rate": 4.813919178960873e-06, "loss": 0.8421, "step": 7005 }, { "epoch": 0.08543258625522528, "grad_norm": 1.9715999364852905, "learning_rate": 4.813598460551636e-06, "loss": 0.9142, "step": 7010 }, { "epoch": 0.08549352247937309, "grad_norm": 2.401533842086792, "learning_rate": 4.813277742142399e-06, "loss": 0.9342, "step": 7015 }, { "epoch": 0.0855544587035209, "grad_norm": 1.9141807556152344, "learning_rate": 4.812957023733162e-06, "loss": 0.9946, "step": 7020 }, { "epoch": 0.0856153949276687, "grad_norm": 2.627074956893921, "learning_rate": 4.812636305323926e-06, "loss": 1.027, "step": 7025 }, { "epoch": 0.08567633115181651, "grad_norm": 2.141732931137085, "learning_rate": 4.812315586914689e-06, "loss": 0.9631, "step": 7030 }, { "epoch": 0.08573726737596432, "grad_norm": 2.078641176223755, "learning_rate": 4.811994868505453e-06, "loss": 0.89, "step": 7035 }, { "epoch": 0.08579820360011212, "grad_norm": 2.4260966777801514, "learning_rate": 4.811674150096216e-06, "loss": 0.9632, "step": 7040 }, { "epoch": 0.08585913982425993, "grad_norm": 2.8582587242126465, "learning_rate": 4.811353431686979e-06, "loss": 0.9295, "step": 7045 }, { "epoch": 0.08592007604840773, "grad_norm": 1.9664297103881836, "learning_rate": 4.811032713277743e-06, "loss": 0.9118, "step": 7050 }, { "epoch": 0.08598101227255554, "grad_norm": 2.23750901222229, "learning_rate": 4.810711994868506e-06, "loss": 0.95, "step": 7055 }, { "epoch": 0.08604194849670335, "grad_norm": 1.870489478111267, "learning_rate": 4.8103912764592696e-06, "loss": 0.9472, "step": 7060 }, { "epoch": 0.08610288472085116, "grad_norm": 1.9752862453460693, "learning_rate": 4.810070558050033e-06, "loss": 0.8919, "step": 7065 }, { "epoch": 0.08616382094499897, "grad_norm": 2.2416093349456787, "learning_rate": 4.809749839640796e-06, "loss": 0.9174, "step": 7070 }, { "epoch": 0.08622475716914677, "grad_norm": 1.7908618450164795, "learning_rate": 4.8094291212315595e-06, "loss": 1.0152, "step": 7075 }, { "epoch": 0.08628569339329457, "grad_norm": 2.030872344970703, "learning_rate": 4.8091084028223225e-06, "loss": 0.9572, "step": 7080 }, { "epoch": 0.08634662961744238, "grad_norm": 2.695528745651245, "learning_rate": 4.808787684413086e-06, "loss": 0.9166, "step": 7085 }, { "epoch": 0.08640756584159019, "grad_norm": 1.7504909038543701, "learning_rate": 4.808466966003849e-06, "loss": 0.91, "step": 7090 }, { "epoch": 0.086468502065738, "grad_norm": 2.3087871074676514, "learning_rate": 4.808146247594612e-06, "loss": 0.9015, "step": 7095 }, { "epoch": 0.08652943828988581, "grad_norm": 2.6088685989379883, "learning_rate": 4.807825529185375e-06, "loss": 0.8748, "step": 7100 }, { "epoch": 0.08659037451403362, "grad_norm": 1.8509798049926758, "learning_rate": 4.807504810776139e-06, "loss": 0.9069, "step": 7105 }, { "epoch": 0.08665131073818141, "grad_norm": 1.9434236288070679, "learning_rate": 4.807184092366902e-06, "loss": 0.9012, "step": 7110 }, { "epoch": 0.08671224696232922, "grad_norm": 1.7816276550292969, "learning_rate": 4.806863373957665e-06, "loss": 0.911, "step": 7115 }, { "epoch": 0.08677318318647703, "grad_norm": 2.323180913925171, "learning_rate": 4.806542655548429e-06, "loss": 0.9725, "step": 7120 }, { "epoch": 0.08683411941062484, "grad_norm": 4.22190523147583, "learning_rate": 4.806221937139192e-06, "loss": 0.9816, "step": 7125 }, { "epoch": 0.08689505563477265, "grad_norm": 2.342141628265381, "learning_rate": 4.805901218729955e-06, "loss": 0.9119, "step": 7130 }, { "epoch": 0.08695599185892046, "grad_norm": 2.2150118350982666, "learning_rate": 4.805580500320719e-06, "loss": 0.9396, "step": 7135 }, { "epoch": 0.08701692808306825, "grad_norm": 1.6652734279632568, "learning_rate": 4.805259781911482e-06, "loss": 0.9247, "step": 7140 }, { "epoch": 0.08707786430721606, "grad_norm": 2.150707244873047, "learning_rate": 4.804939063502245e-06, "loss": 0.9647, "step": 7145 }, { "epoch": 0.08713880053136387, "grad_norm": 1.9644463062286377, "learning_rate": 4.804618345093009e-06, "loss": 0.9129, "step": 7150 }, { "epoch": 0.08719973675551168, "grad_norm": 2.0494484901428223, "learning_rate": 4.804297626683772e-06, "loss": 0.9149, "step": 7155 }, { "epoch": 0.08726067297965949, "grad_norm": 1.7859325408935547, "learning_rate": 4.803976908274535e-06, "loss": 0.9144, "step": 7160 }, { "epoch": 0.0873216092038073, "grad_norm": 2.063023805618286, "learning_rate": 4.803656189865299e-06, "loss": 0.9357, "step": 7165 }, { "epoch": 0.08738254542795511, "grad_norm": 2.240950584411621, "learning_rate": 4.803335471456062e-06, "loss": 0.9344, "step": 7170 }, { "epoch": 0.0874434816521029, "grad_norm": 2.1062870025634766, "learning_rate": 4.803014753046825e-06, "loss": 0.9765, "step": 7175 }, { "epoch": 0.08750441787625071, "grad_norm": 2.0267274379730225, "learning_rate": 4.802694034637588e-06, "loss": 0.895, "step": 7180 }, { "epoch": 0.08756535410039852, "grad_norm": 1.8818373680114746, "learning_rate": 4.802373316228352e-06, "loss": 0.8578, "step": 7185 }, { "epoch": 0.08762629032454633, "grad_norm": 1.9372528791427612, "learning_rate": 4.802052597819115e-06, "loss": 0.969, "step": 7190 }, { "epoch": 0.08768722654869414, "grad_norm": 1.7959895133972168, "learning_rate": 4.801731879409879e-06, "loss": 0.9913, "step": 7195 }, { "epoch": 0.08774816277284195, "grad_norm": 2.3128812313079834, "learning_rate": 4.801411161000642e-06, "loss": 0.9683, "step": 7200 }, { "epoch": 0.08780909899698976, "grad_norm": 2.301892042160034, "learning_rate": 4.8010904425914055e-06, "loss": 0.9433, "step": 7205 }, { "epoch": 0.08787003522113755, "grad_norm": 2.2926924228668213, "learning_rate": 4.8007697241821685e-06, "loss": 0.9545, "step": 7210 }, { "epoch": 0.08793097144528536, "grad_norm": 2.567042589187622, "learning_rate": 4.8004490057729316e-06, "loss": 0.8239, "step": 7215 }, { "epoch": 0.08799190766943317, "grad_norm": 6.152321815490723, "learning_rate": 4.800128287363695e-06, "loss": 0.9074, "step": 7220 }, { "epoch": 0.08805284389358098, "grad_norm": 1.9118752479553223, "learning_rate": 4.7998075689544584e-06, "loss": 0.8739, "step": 7225 }, { "epoch": 0.08811378011772879, "grad_norm": 2.3493921756744385, "learning_rate": 4.799486850545222e-06, "loss": 0.9299, "step": 7230 }, { "epoch": 0.0881747163418766, "grad_norm": 2.174729585647583, "learning_rate": 4.799166132135985e-06, "loss": 1.0237, "step": 7235 }, { "epoch": 0.0882356525660244, "grad_norm": 3.636669874191284, "learning_rate": 4.798845413726748e-06, "loss": 0.9874, "step": 7240 }, { "epoch": 0.0882965887901722, "grad_norm": 1.9855098724365234, "learning_rate": 4.798524695317512e-06, "loss": 0.9047, "step": 7245 }, { "epoch": 0.08835752501432001, "grad_norm": 2.0535855293273926, "learning_rate": 4.798203976908275e-06, "loss": 0.9029, "step": 7250 }, { "epoch": 0.08841846123846782, "grad_norm": 2.1381638050079346, "learning_rate": 4.797883258499038e-06, "loss": 0.8925, "step": 7255 }, { "epoch": 0.08847939746261563, "grad_norm": 2.517617702484131, "learning_rate": 4.797562540089802e-06, "loss": 0.9105, "step": 7260 }, { "epoch": 0.08854033368676344, "grad_norm": 1.9714499711990356, "learning_rate": 4.797241821680565e-06, "loss": 0.9646, "step": 7265 }, { "epoch": 0.08860126991091125, "grad_norm": 1.726201057434082, "learning_rate": 4.796921103271328e-06, "loss": 0.9218, "step": 7270 }, { "epoch": 0.08866220613505904, "grad_norm": 2.0243258476257324, "learning_rate": 4.796600384862091e-06, "loss": 0.8974, "step": 7275 }, { "epoch": 0.08872314235920685, "grad_norm": 1.903942346572876, "learning_rate": 4.796279666452855e-06, "loss": 0.9645, "step": 7280 }, { "epoch": 0.08878407858335466, "grad_norm": 1.9951122999191284, "learning_rate": 4.795958948043618e-06, "loss": 0.8906, "step": 7285 }, { "epoch": 0.08884501480750247, "grad_norm": 1.950618863105774, "learning_rate": 4.795638229634381e-06, "loss": 0.8706, "step": 7290 }, { "epoch": 0.08890595103165028, "grad_norm": 2.0396406650543213, "learning_rate": 4.795317511225145e-06, "loss": 0.8961, "step": 7295 }, { "epoch": 0.08896688725579809, "grad_norm": 1.833855390548706, "learning_rate": 4.794996792815908e-06, "loss": 0.8705, "step": 7300 }, { "epoch": 0.0890278234799459, "grad_norm": 2.03249454498291, "learning_rate": 4.794676074406671e-06, "loss": 0.9596, "step": 7305 }, { "epoch": 0.08908875970409369, "grad_norm": 1.9826524257659912, "learning_rate": 4.794355355997435e-06, "loss": 0.8695, "step": 7310 }, { "epoch": 0.0891496959282415, "grad_norm": 2.158702850341797, "learning_rate": 4.794034637588198e-06, "loss": 0.9437, "step": 7315 }, { "epoch": 0.08921063215238931, "grad_norm": 1.965775728225708, "learning_rate": 4.793713919178961e-06, "loss": 0.9232, "step": 7320 }, { "epoch": 0.08927156837653712, "grad_norm": 2.31723952293396, "learning_rate": 4.793393200769725e-06, "loss": 0.9574, "step": 7325 }, { "epoch": 0.08933250460068493, "grad_norm": 2.7622809410095215, "learning_rate": 4.793072482360488e-06, "loss": 0.9397, "step": 7330 }, { "epoch": 0.08939344082483273, "grad_norm": 2.228262186050415, "learning_rate": 4.792751763951251e-06, "loss": 0.8788, "step": 7335 }, { "epoch": 0.08945437704898054, "grad_norm": 1.8983056545257568, "learning_rate": 4.792431045542015e-06, "loss": 0.9213, "step": 7340 }, { "epoch": 0.08951531327312834, "grad_norm": 2.2601776123046875, "learning_rate": 4.792110327132778e-06, "loss": 0.9999, "step": 7345 }, { "epoch": 0.08957624949727615, "grad_norm": 1.8711588382720947, "learning_rate": 4.791789608723541e-06, "loss": 0.8972, "step": 7350 }, { "epoch": 0.08963718572142396, "grad_norm": 2.3431334495544434, "learning_rate": 4.7914688903143045e-06, "loss": 0.8863, "step": 7355 }, { "epoch": 0.08969812194557177, "grad_norm": 2.2549216747283936, "learning_rate": 4.7911481719050675e-06, "loss": 0.9001, "step": 7360 }, { "epoch": 0.08975905816971957, "grad_norm": 2.531609058380127, "learning_rate": 4.790827453495831e-06, "loss": 0.8944, "step": 7365 }, { "epoch": 0.08981999439386738, "grad_norm": 2.3974781036376953, "learning_rate": 4.790506735086594e-06, "loss": 0.9477, "step": 7370 }, { "epoch": 0.08988093061801518, "grad_norm": 2.268054485321045, "learning_rate": 4.790186016677357e-06, "loss": 0.9021, "step": 7375 }, { "epoch": 0.08994186684216299, "grad_norm": 2.088883638381958, "learning_rate": 4.789865298268121e-06, "loss": 0.9542, "step": 7380 }, { "epoch": 0.0900028030663108, "grad_norm": 2.3126916885375977, "learning_rate": 4.789544579858884e-06, "loss": 1.0022, "step": 7385 }, { "epoch": 0.0900637392904586, "grad_norm": 1.9775770902633667, "learning_rate": 4.789223861449648e-06, "loss": 1.0101, "step": 7390 }, { "epoch": 0.09012467551460641, "grad_norm": 1.9215114116668701, "learning_rate": 4.788903143040411e-06, "loss": 0.8863, "step": 7395 }, { "epoch": 0.09018561173875422, "grad_norm": 2.270382881164551, "learning_rate": 4.788582424631174e-06, "loss": 0.8891, "step": 7400 }, { "epoch": 0.09024654796290203, "grad_norm": 2.240250825881958, "learning_rate": 4.788261706221938e-06, "loss": 0.9096, "step": 7405 }, { "epoch": 0.09030748418704983, "grad_norm": 1.7566808462142944, "learning_rate": 4.787940987812701e-06, "loss": 0.8779, "step": 7410 }, { "epoch": 0.09036842041119764, "grad_norm": 2.0242221355438232, "learning_rate": 4.787620269403464e-06, "loss": 1.0233, "step": 7415 }, { "epoch": 0.09042935663534545, "grad_norm": 1.7889626026153564, "learning_rate": 4.787299550994228e-06, "loss": 0.882, "step": 7420 }, { "epoch": 0.09049029285949325, "grad_norm": 1.7876362800598145, "learning_rate": 4.786978832584991e-06, "loss": 0.9283, "step": 7425 }, { "epoch": 0.09055122908364106, "grad_norm": 1.8397997617721558, "learning_rate": 4.786658114175754e-06, "loss": 0.9707, "step": 7430 }, { "epoch": 0.09061216530778887, "grad_norm": 2.09940505027771, "learning_rate": 4.786337395766517e-06, "loss": 0.9491, "step": 7435 }, { "epoch": 0.09067310153193668, "grad_norm": 3.126763343811035, "learning_rate": 4.786016677357281e-06, "loss": 0.9679, "step": 7440 }, { "epoch": 0.09073403775608448, "grad_norm": 2.1460072994232178, "learning_rate": 4.785695958948044e-06, "loss": 0.9188, "step": 7445 }, { "epoch": 0.09079497398023229, "grad_norm": 2.048736572265625, "learning_rate": 4.785375240538807e-06, "loss": 0.8943, "step": 7450 }, { "epoch": 0.0908559102043801, "grad_norm": 1.8938684463500977, "learning_rate": 4.785054522129571e-06, "loss": 0.8733, "step": 7455 }, { "epoch": 0.0909168464285279, "grad_norm": 2.0592944622039795, "learning_rate": 4.784733803720334e-06, "loss": 0.8981, "step": 7460 }, { "epoch": 0.09097778265267571, "grad_norm": 2.1310739517211914, "learning_rate": 4.784413085311097e-06, "loss": 0.9925, "step": 7465 }, { "epoch": 0.09103871887682352, "grad_norm": 2.614304304122925, "learning_rate": 4.784092366901861e-06, "loss": 0.8562, "step": 7470 }, { "epoch": 0.09109965510097133, "grad_norm": 2.2887959480285645, "learning_rate": 4.783771648492624e-06, "loss": 0.8673, "step": 7475 }, { "epoch": 0.09116059132511913, "grad_norm": 2.027843952178955, "learning_rate": 4.783450930083387e-06, "loss": 0.876, "step": 7480 }, { "epoch": 0.09122152754926693, "grad_norm": 2.0919036865234375, "learning_rate": 4.7831302116741506e-06, "loss": 0.9327, "step": 7485 }, { "epoch": 0.09128246377341474, "grad_norm": 1.9272987842559814, "learning_rate": 4.7828094932649136e-06, "loss": 0.9172, "step": 7490 }, { "epoch": 0.09134339999756255, "grad_norm": 1.805300235748291, "learning_rate": 4.782488774855677e-06, "loss": 0.9376, "step": 7495 }, { "epoch": 0.09140433622171036, "grad_norm": 2.2065114974975586, "learning_rate": 4.7821680564464405e-06, "loss": 0.9204, "step": 7500 }, { "epoch": 0.09146527244585817, "grad_norm": 2.3646843433380127, "learning_rate": 4.7818473380372035e-06, "loss": 0.9129, "step": 7505 }, { "epoch": 0.09152620867000597, "grad_norm": 1.914488673210144, "learning_rate": 4.781526619627967e-06, "loss": 0.931, "step": 7510 }, { "epoch": 0.09158714489415377, "grad_norm": 2.414942741394043, "learning_rate": 4.78120590121873e-06, "loss": 0.9604, "step": 7515 }, { "epoch": 0.09164808111830158, "grad_norm": 2.023393154144287, "learning_rate": 4.780885182809493e-06, "loss": 0.8927, "step": 7520 }, { "epoch": 0.09170901734244939, "grad_norm": 1.8826125860214233, "learning_rate": 4.780564464400257e-06, "loss": 0.9144, "step": 7525 }, { "epoch": 0.0917699535665972, "grad_norm": 1.934658169746399, "learning_rate": 4.78024374599102e-06, "loss": 0.9238, "step": 7530 }, { "epoch": 0.09183088979074501, "grad_norm": 1.8769108057022095, "learning_rate": 4.779923027581784e-06, "loss": 0.9483, "step": 7535 }, { "epoch": 0.09189182601489282, "grad_norm": 1.8224563598632812, "learning_rate": 4.779602309172547e-06, "loss": 0.8715, "step": 7540 }, { "epoch": 0.09195276223904061, "grad_norm": 2.1903605461120605, "learning_rate": 4.77928159076331e-06, "loss": 0.9246, "step": 7545 }, { "epoch": 0.09201369846318842, "grad_norm": 1.99595046043396, "learning_rate": 4.778960872354074e-06, "loss": 0.9272, "step": 7550 }, { "epoch": 0.09207463468733623, "grad_norm": 1.8051209449768066, "learning_rate": 4.778640153944837e-06, "loss": 0.9377, "step": 7555 }, { "epoch": 0.09213557091148404, "grad_norm": 2.2034003734588623, "learning_rate": 4.7783194355356e-06, "loss": 0.9404, "step": 7560 }, { "epoch": 0.09219650713563185, "grad_norm": 1.847010850906372, "learning_rate": 4.777998717126364e-06, "loss": 0.931, "step": 7565 }, { "epoch": 0.09225744335977966, "grad_norm": 2.466892719268799, "learning_rate": 4.777677998717127e-06, "loss": 0.8602, "step": 7570 }, { "epoch": 0.09231837958392747, "grad_norm": 2.1136720180511475, "learning_rate": 4.77735728030789e-06, "loss": 0.88, "step": 7575 }, { "epoch": 0.09237931580807526, "grad_norm": 2.0207531452178955, "learning_rate": 4.777036561898654e-06, "loss": 0.8837, "step": 7580 }, { "epoch": 0.09244025203222307, "grad_norm": 2.416347026824951, "learning_rate": 4.776715843489417e-06, "loss": 0.9102, "step": 7585 }, { "epoch": 0.09250118825637088, "grad_norm": 1.981613278388977, "learning_rate": 4.77639512508018e-06, "loss": 0.9421, "step": 7590 }, { "epoch": 0.09256212448051869, "grad_norm": 2.661247730255127, "learning_rate": 4.776074406670944e-06, "loss": 0.9921, "step": 7595 }, { "epoch": 0.0926230607046665, "grad_norm": 2.143035411834717, "learning_rate": 4.775753688261707e-06, "loss": 0.9158, "step": 7600 }, { "epoch": 0.09268399692881431, "grad_norm": 1.9767389297485352, "learning_rate": 4.77543296985247e-06, "loss": 0.9446, "step": 7605 }, { "epoch": 0.0927449331529621, "grad_norm": 2.0826892852783203, "learning_rate": 4.775112251443233e-06, "loss": 0.8845, "step": 7610 }, { "epoch": 0.09280586937710991, "grad_norm": 2.3066442012786865, "learning_rate": 4.774791533033997e-06, "loss": 1.0629, "step": 7615 }, { "epoch": 0.09286680560125772, "grad_norm": 2.045722484588623, "learning_rate": 4.77447081462476e-06, "loss": 0.9392, "step": 7620 }, { "epoch": 0.09292774182540553, "grad_norm": 2.0314271450042725, "learning_rate": 4.774150096215523e-06, "loss": 0.9461, "step": 7625 }, { "epoch": 0.09298867804955334, "grad_norm": 1.970862627029419, "learning_rate": 4.7738293778062865e-06, "loss": 1.0239, "step": 7630 }, { "epoch": 0.09304961427370115, "grad_norm": 2.658550977706909, "learning_rate": 4.7735086593970495e-06, "loss": 0.9789, "step": 7635 }, { "epoch": 0.09311055049784896, "grad_norm": 1.9149727821350098, "learning_rate": 4.7731879409878125e-06, "loss": 0.8967, "step": 7640 }, { "epoch": 0.09317148672199675, "grad_norm": 2.236013174057007, "learning_rate": 4.772867222578576e-06, "loss": 0.952, "step": 7645 }, { "epoch": 0.09323242294614456, "grad_norm": 2.169174909591675, "learning_rate": 4.7725465041693394e-06, "loss": 0.9536, "step": 7650 }, { "epoch": 0.09329335917029237, "grad_norm": 2.192509174346924, "learning_rate": 4.7722257857601024e-06, "loss": 0.9428, "step": 7655 }, { "epoch": 0.09335429539444018, "grad_norm": 1.8955999612808228, "learning_rate": 4.771905067350866e-06, "loss": 0.9083, "step": 7660 }, { "epoch": 0.09341523161858799, "grad_norm": 1.9547942876815796, "learning_rate": 4.771584348941629e-06, "loss": 0.9, "step": 7665 }, { "epoch": 0.0934761678427358, "grad_norm": 2.127410411834717, "learning_rate": 4.771263630532393e-06, "loss": 0.9385, "step": 7670 }, { "epoch": 0.0935371040668836, "grad_norm": 2.138725996017456, "learning_rate": 4.770942912123156e-06, "loss": 0.9326, "step": 7675 }, { "epoch": 0.0935980402910314, "grad_norm": 2.3240411281585693, "learning_rate": 4.77062219371392e-06, "loss": 0.9091, "step": 7680 }, { "epoch": 0.09365897651517921, "grad_norm": 1.940781593322754, "learning_rate": 4.770301475304683e-06, "loss": 0.9227, "step": 7685 }, { "epoch": 0.09371991273932702, "grad_norm": 2.017167329788208, "learning_rate": 4.769980756895446e-06, "loss": 0.9786, "step": 7690 }, { "epoch": 0.09378084896347483, "grad_norm": 2.4298176765441895, "learning_rate": 4.76966003848621e-06, "loss": 0.9637, "step": 7695 }, { "epoch": 0.09384178518762264, "grad_norm": 2.1289358139038086, "learning_rate": 4.769339320076973e-06, "loss": 0.9765, "step": 7700 }, { "epoch": 0.09390272141177045, "grad_norm": 1.8138405084609985, "learning_rate": 4.769018601667736e-06, "loss": 0.8894, "step": 7705 }, { "epoch": 0.09396365763591825, "grad_norm": 2.644636631011963, "learning_rate": 4.7686978832585e-06, "loss": 0.9225, "step": 7710 }, { "epoch": 0.09402459386006605, "grad_norm": 1.9747201204299927, "learning_rate": 4.768377164849263e-06, "loss": 0.9753, "step": 7715 }, { "epoch": 0.09408553008421386, "grad_norm": 2.0535030364990234, "learning_rate": 4.768056446440026e-06, "loss": 0.9334, "step": 7720 }, { "epoch": 0.09414646630836167, "grad_norm": 2.1358063220977783, "learning_rate": 4.76773572803079e-06, "loss": 0.9816, "step": 7725 }, { "epoch": 0.09420740253250948, "grad_norm": 1.8958888053894043, "learning_rate": 4.767415009621553e-06, "loss": 0.9423, "step": 7730 }, { "epoch": 0.09426833875665729, "grad_norm": 2.046226978302002, "learning_rate": 4.767094291212316e-06, "loss": 0.9658, "step": 7735 }, { "epoch": 0.0943292749808051, "grad_norm": 1.9881007671356201, "learning_rate": 4.76677357280308e-06, "loss": 0.8991, "step": 7740 }, { "epoch": 0.09439021120495289, "grad_norm": 1.8895440101623535, "learning_rate": 4.766452854393843e-06, "loss": 0.9263, "step": 7745 }, { "epoch": 0.0944511474291007, "grad_norm": 2.2098195552825928, "learning_rate": 4.766132135984606e-06, "loss": 0.8773, "step": 7750 }, { "epoch": 0.09451208365324851, "grad_norm": 2.185847043991089, "learning_rate": 4.7658114175753696e-06, "loss": 0.9411, "step": 7755 }, { "epoch": 0.09457301987739632, "grad_norm": 1.879759430885315, "learning_rate": 4.7654906991661326e-06, "loss": 0.9602, "step": 7760 }, { "epoch": 0.09463395610154413, "grad_norm": 1.907683253288269, "learning_rate": 4.765169980756896e-06, "loss": 0.9231, "step": 7765 }, { "epoch": 0.09469489232569193, "grad_norm": 1.826238989830017, "learning_rate": 4.764849262347659e-06, "loss": 0.9246, "step": 7770 }, { "epoch": 0.09475582854983974, "grad_norm": 1.8469316959381104, "learning_rate": 4.7645285439384225e-06, "loss": 0.9175, "step": 7775 }, { "epoch": 0.09481676477398754, "grad_norm": 2.4672603607177734, "learning_rate": 4.7642078255291855e-06, "loss": 0.8716, "step": 7780 }, { "epoch": 0.09487770099813535, "grad_norm": 2.1861093044281006, "learning_rate": 4.7638871071199485e-06, "loss": 0.9412, "step": 7785 }, { "epoch": 0.09493863722228316, "grad_norm": 2.069005012512207, "learning_rate": 4.763566388710712e-06, "loss": 0.9771, "step": 7790 }, { "epoch": 0.09499957344643097, "grad_norm": 1.8024389743804932, "learning_rate": 4.763245670301475e-06, "loss": 0.9271, "step": 7795 }, { "epoch": 0.09506050967057877, "grad_norm": 2.185800790786743, "learning_rate": 4.762924951892238e-06, "loss": 0.8994, "step": 7800 }, { "epoch": 0.09512144589472658, "grad_norm": 1.9321203231811523, "learning_rate": 4.762604233483002e-06, "loss": 0.9442, "step": 7805 }, { "epoch": 0.09518238211887439, "grad_norm": 2.226386785507202, "learning_rate": 4.762283515073765e-06, "loss": 0.9254, "step": 7810 }, { "epoch": 0.09524331834302219, "grad_norm": 2.233790159225464, "learning_rate": 4.761962796664529e-06, "loss": 0.9652, "step": 7815 }, { "epoch": 0.09530425456717, "grad_norm": 2.2027385234832764, "learning_rate": 4.761642078255292e-06, "loss": 0.8329, "step": 7820 }, { "epoch": 0.0953651907913178, "grad_norm": 1.9457811117172241, "learning_rate": 4.761321359846055e-06, "loss": 0.9228, "step": 7825 }, { "epoch": 0.09542612701546561, "grad_norm": 1.869152545928955, "learning_rate": 4.761000641436819e-06, "loss": 0.9356, "step": 7830 }, { "epoch": 0.09548706323961342, "grad_norm": 1.8704944849014282, "learning_rate": 4.760679923027582e-06, "loss": 0.9313, "step": 7835 }, { "epoch": 0.09554799946376123, "grad_norm": 1.7948956489562988, "learning_rate": 4.760359204618346e-06, "loss": 0.8888, "step": 7840 }, { "epoch": 0.09560893568790904, "grad_norm": 2.5060970783233643, "learning_rate": 4.760038486209109e-06, "loss": 0.8745, "step": 7845 }, { "epoch": 0.09566987191205684, "grad_norm": 2.005239963531494, "learning_rate": 4.759717767799872e-06, "loss": 0.9075, "step": 7850 }, { "epoch": 0.09573080813620465, "grad_norm": 1.9032849073410034, "learning_rate": 4.759397049390636e-06, "loss": 0.8867, "step": 7855 }, { "epoch": 0.09579174436035245, "grad_norm": 2.545836925506592, "learning_rate": 4.759076330981399e-06, "loss": 0.9345, "step": 7860 }, { "epoch": 0.09585268058450026, "grad_norm": 2.016139030456543, "learning_rate": 4.758755612572162e-06, "loss": 0.908, "step": 7865 }, { "epoch": 0.09591361680864807, "grad_norm": 2.0335195064544678, "learning_rate": 4.758434894162926e-06, "loss": 0.9106, "step": 7870 }, { "epoch": 0.09597455303279588, "grad_norm": 2.0569303035736084, "learning_rate": 4.758114175753689e-06, "loss": 0.9349, "step": 7875 }, { "epoch": 0.09603548925694368, "grad_norm": 1.9056336879730225, "learning_rate": 4.757793457344452e-06, "loss": 0.9854, "step": 7880 }, { "epoch": 0.09609642548109149, "grad_norm": 2.3368544578552246, "learning_rate": 4.757472738935216e-06, "loss": 1.004, "step": 7885 }, { "epoch": 0.0961573617052393, "grad_norm": 2.0200326442718506, "learning_rate": 4.757152020525979e-06, "loss": 0.9578, "step": 7890 }, { "epoch": 0.0962182979293871, "grad_norm": 2.530238151550293, "learning_rate": 4.756831302116742e-06, "loss": 0.9356, "step": 7895 }, { "epoch": 0.09627923415353491, "grad_norm": 2.3082385063171387, "learning_rate": 4.7565105837075055e-06, "loss": 0.9237, "step": 7900 }, { "epoch": 0.09634017037768272, "grad_norm": 1.9801300764083862, "learning_rate": 4.7561898652982685e-06, "loss": 0.9487, "step": 7905 }, { "epoch": 0.09640110660183053, "grad_norm": 1.7842282056808472, "learning_rate": 4.7558691468890315e-06, "loss": 0.9684, "step": 7910 }, { "epoch": 0.09646204282597833, "grad_norm": 2.294867515563965, "learning_rate": 4.755548428479795e-06, "loss": 0.9915, "step": 7915 }, { "epoch": 0.09652297905012613, "grad_norm": 2.0797903537750244, "learning_rate": 4.7552277100705584e-06, "loss": 0.8556, "step": 7920 }, { "epoch": 0.09658391527427394, "grad_norm": 2.2383716106414795, "learning_rate": 4.7549069916613214e-06, "loss": 0.9331, "step": 7925 }, { "epoch": 0.09664485149842175, "grad_norm": 2.038297176361084, "learning_rate": 4.754586273252085e-06, "loss": 0.9107, "step": 7930 }, { "epoch": 0.09670578772256956, "grad_norm": 2.253730535507202, "learning_rate": 4.754265554842848e-06, "loss": 0.8909, "step": 7935 }, { "epoch": 0.09676672394671737, "grad_norm": 2.036807060241699, "learning_rate": 4.753944836433611e-06, "loss": 0.8417, "step": 7940 }, { "epoch": 0.09682766017086518, "grad_norm": 2.9657509326934814, "learning_rate": 4.753624118024374e-06, "loss": 0.9141, "step": 7945 }, { "epoch": 0.09688859639501297, "grad_norm": 2.286118268966675, "learning_rate": 4.753303399615138e-06, "loss": 0.9204, "step": 7950 }, { "epoch": 0.09694953261916078, "grad_norm": 2.162585973739624, "learning_rate": 4.752982681205901e-06, "loss": 0.8735, "step": 7955 }, { "epoch": 0.09701046884330859, "grad_norm": 1.873410940170288, "learning_rate": 4.752661962796665e-06, "loss": 0.9198, "step": 7960 }, { "epoch": 0.0970714050674564, "grad_norm": 2.0796713829040527, "learning_rate": 4.752341244387428e-06, "loss": 0.9197, "step": 7965 }, { "epoch": 0.09713234129160421, "grad_norm": 2.054398536682129, "learning_rate": 4.752020525978191e-06, "loss": 0.8567, "step": 7970 }, { "epoch": 0.09719327751575202, "grad_norm": 1.97373628616333, "learning_rate": 4.751699807568955e-06, "loss": 0.9414, "step": 7975 }, { "epoch": 0.09725421373989981, "grad_norm": 2.3761985301971436, "learning_rate": 4.751379089159718e-06, "loss": 0.9417, "step": 7980 }, { "epoch": 0.09731514996404762, "grad_norm": 2.0049057006835938, "learning_rate": 4.751058370750482e-06, "loss": 0.966, "step": 7985 }, { "epoch": 0.09737608618819543, "grad_norm": 2.0444600582122803, "learning_rate": 4.750737652341245e-06, "loss": 0.9469, "step": 7990 }, { "epoch": 0.09743702241234324, "grad_norm": 2.0737287998199463, "learning_rate": 4.750416933932008e-06, "loss": 0.9667, "step": 7995 }, { "epoch": 0.09749795863649105, "grad_norm": 1.962401270866394, "learning_rate": 4.750096215522772e-06, "loss": 0.9749, "step": 8000 }, { "epoch": 0.09755889486063886, "grad_norm": 1.8550528287887573, "learning_rate": 4.749775497113535e-06, "loss": 0.942, "step": 8005 }, { "epoch": 0.09761983108478667, "grad_norm": 2.1311745643615723, "learning_rate": 4.749454778704299e-06, "loss": 0.8951, "step": 8010 }, { "epoch": 0.09768076730893446, "grad_norm": 2.4035210609436035, "learning_rate": 4.749134060295062e-06, "loss": 0.9023, "step": 8015 }, { "epoch": 0.09774170353308227, "grad_norm": 2.1345503330230713, "learning_rate": 4.748813341885825e-06, "loss": 0.9452, "step": 8020 }, { "epoch": 0.09780263975723008, "grad_norm": 1.7475550174713135, "learning_rate": 4.748492623476588e-06, "loss": 0.9011, "step": 8025 }, { "epoch": 0.09786357598137789, "grad_norm": 2.3338751792907715, "learning_rate": 4.7481719050673516e-06, "loss": 0.9269, "step": 8030 }, { "epoch": 0.0979245122055257, "grad_norm": 1.961544394493103, "learning_rate": 4.747851186658115e-06, "loss": 0.8931, "step": 8035 }, { "epoch": 0.09798544842967351, "grad_norm": 1.9926375150680542, "learning_rate": 4.747530468248878e-06, "loss": 0.9683, "step": 8040 }, { "epoch": 0.09804638465382132, "grad_norm": 1.9739952087402344, "learning_rate": 4.7472097498396415e-06, "loss": 0.896, "step": 8045 }, { "epoch": 0.09810732087796911, "grad_norm": 1.8859587907791138, "learning_rate": 4.7468890314304045e-06, "loss": 0.8716, "step": 8050 }, { "epoch": 0.09816825710211692, "grad_norm": 2.1573235988616943, "learning_rate": 4.7465683130211675e-06, "loss": 0.894, "step": 8055 }, { "epoch": 0.09822919332626473, "grad_norm": 2.285555124282837, "learning_rate": 4.746247594611931e-06, "loss": 0.9457, "step": 8060 }, { "epoch": 0.09829012955041254, "grad_norm": 2.525275707244873, "learning_rate": 4.745926876202694e-06, "loss": 0.9924, "step": 8065 }, { "epoch": 0.09835106577456035, "grad_norm": 1.9987424612045288, "learning_rate": 4.745606157793457e-06, "loss": 0.9003, "step": 8070 }, { "epoch": 0.09841200199870816, "grad_norm": 1.968011736869812, "learning_rate": 4.745285439384221e-06, "loss": 0.9751, "step": 8075 }, { "epoch": 0.09847293822285597, "grad_norm": 2.045969247817993, "learning_rate": 4.744964720974984e-06, "loss": 0.9176, "step": 8080 }, { "epoch": 0.09853387444700376, "grad_norm": 1.8818403482437134, "learning_rate": 4.744644002565747e-06, "loss": 0.9637, "step": 8085 }, { "epoch": 0.09859481067115157, "grad_norm": 2.672522783279419, "learning_rate": 4.744323284156511e-06, "loss": 0.9141, "step": 8090 }, { "epoch": 0.09865574689529938, "grad_norm": 2.5961544513702393, "learning_rate": 4.744002565747274e-06, "loss": 0.9643, "step": 8095 }, { "epoch": 0.09871668311944719, "grad_norm": 2.6472041606903076, "learning_rate": 4.743681847338037e-06, "loss": 0.9114, "step": 8100 }, { "epoch": 0.098777619343595, "grad_norm": 1.7934584617614746, "learning_rate": 4.7433611289288e-06, "loss": 0.9978, "step": 8105 }, { "epoch": 0.0988385555677428, "grad_norm": 2.0736684799194336, "learning_rate": 4.743040410519564e-06, "loss": 0.9224, "step": 8110 }, { "epoch": 0.0988994917918906, "grad_norm": 2.200827121734619, "learning_rate": 4.742719692110327e-06, "loss": 0.9726, "step": 8115 }, { "epoch": 0.09896042801603841, "grad_norm": 2.045956611633301, "learning_rate": 4.742398973701091e-06, "loss": 0.9551, "step": 8120 }, { "epoch": 0.09902136424018622, "grad_norm": 2.7284739017486572, "learning_rate": 4.742078255291854e-06, "loss": 0.9698, "step": 8125 }, { "epoch": 0.09908230046433403, "grad_norm": 2.1364614963531494, "learning_rate": 4.741757536882618e-06, "loss": 0.8836, "step": 8130 }, { "epoch": 0.09914323668848184, "grad_norm": 2.243349313735962, "learning_rate": 4.741436818473381e-06, "loss": 0.929, "step": 8135 }, { "epoch": 0.09920417291262965, "grad_norm": 1.9796265363693237, "learning_rate": 4.741116100064144e-06, "loss": 0.9125, "step": 8140 }, { "epoch": 0.09926510913677745, "grad_norm": 2.062844753265381, "learning_rate": 4.740795381654908e-06, "loss": 0.9164, "step": 8145 }, { "epoch": 0.09932604536092525, "grad_norm": 2.149637222290039, "learning_rate": 4.740474663245671e-06, "loss": 1.0391, "step": 8150 }, { "epoch": 0.09938698158507306, "grad_norm": 2.1644644737243652, "learning_rate": 4.740153944836435e-06, "loss": 1.0344, "step": 8155 }, { "epoch": 0.09944791780922087, "grad_norm": 1.9899293184280396, "learning_rate": 4.739833226427198e-06, "loss": 0.9144, "step": 8160 }, { "epoch": 0.09950885403336868, "grad_norm": 1.7863998413085938, "learning_rate": 4.739512508017961e-06, "loss": 0.9897, "step": 8165 }, { "epoch": 0.09956979025751649, "grad_norm": 2.065837860107422, "learning_rate": 4.7391917896087245e-06, "loss": 0.9209, "step": 8170 }, { "epoch": 0.0996307264816643, "grad_norm": 2.0527403354644775, "learning_rate": 4.7388710711994875e-06, "loss": 0.9073, "step": 8175 }, { "epoch": 0.0996916627058121, "grad_norm": 2.4022738933563232, "learning_rate": 4.7385503527902505e-06, "loss": 0.9706, "step": 8180 }, { "epoch": 0.0997525989299599, "grad_norm": 2.247056007385254, "learning_rate": 4.738229634381014e-06, "loss": 0.9236, "step": 8185 }, { "epoch": 0.09981353515410771, "grad_norm": 2.23116397857666, "learning_rate": 4.7379089159717774e-06, "loss": 0.9009, "step": 8190 }, { "epoch": 0.09987447137825552, "grad_norm": 3.2064990997314453, "learning_rate": 4.7375881975625404e-06, "loss": 0.8531, "step": 8195 }, { "epoch": 0.09993540760240333, "grad_norm": 3.485119581222534, "learning_rate": 4.7372674791533035e-06, "loss": 0.9015, "step": 8200 }, { "epoch": 0.09999634382655113, "grad_norm": 2.0118627548217773, "learning_rate": 4.736946760744067e-06, "loss": 0.8835, "step": 8205 }, { "epoch": 0.10005728005069894, "grad_norm": 1.9517288208007812, "learning_rate": 4.73662604233483e-06, "loss": 0.8711, "step": 8210 }, { "epoch": 0.10011821627484674, "grad_norm": 2.3590734004974365, "learning_rate": 4.736305323925593e-06, "loss": 0.9296, "step": 8215 }, { "epoch": 0.10017915249899455, "grad_norm": 1.724117398262024, "learning_rate": 4.735984605516357e-06, "loss": 0.8688, "step": 8220 }, { "epoch": 0.10024008872314236, "grad_norm": 1.7765769958496094, "learning_rate": 4.73566388710712e-06, "loss": 0.9003, "step": 8225 }, { "epoch": 0.10030102494729017, "grad_norm": 1.9446302652359009, "learning_rate": 4.735343168697883e-06, "loss": 0.9354, "step": 8230 }, { "epoch": 0.10036196117143797, "grad_norm": 2.1893601417541504, "learning_rate": 4.735022450288647e-06, "loss": 0.9092, "step": 8235 }, { "epoch": 0.10042289739558578, "grad_norm": 1.98208749294281, "learning_rate": 4.73470173187941e-06, "loss": 0.9285, "step": 8240 }, { "epoch": 0.10048383361973359, "grad_norm": 2.1889150142669678, "learning_rate": 4.734381013470173e-06, "loss": 0.9773, "step": 8245 }, { "epoch": 0.10054476984388139, "grad_norm": 2.3524653911590576, "learning_rate": 4.734060295060937e-06, "loss": 0.9741, "step": 8250 }, { "epoch": 0.1006057060680292, "grad_norm": 2.309814214706421, "learning_rate": 4.7337395766517e-06, "loss": 0.9213, "step": 8255 }, { "epoch": 0.100666642292177, "grad_norm": 2.021740198135376, "learning_rate": 4.733418858242463e-06, "loss": 0.9163, "step": 8260 }, { "epoch": 0.10072757851632481, "grad_norm": 2.287910223007202, "learning_rate": 4.733098139833227e-06, "loss": 0.8975, "step": 8265 }, { "epoch": 0.10078851474047262, "grad_norm": 1.8899909257888794, "learning_rate": 4.73277742142399e-06, "loss": 0.8852, "step": 8270 }, { "epoch": 0.10084945096462043, "grad_norm": 1.9531868696212769, "learning_rate": 4.732456703014753e-06, "loss": 0.9584, "step": 8275 }, { "epoch": 0.10091038718876824, "grad_norm": 1.91768479347229, "learning_rate": 4.732135984605517e-06, "loss": 0.9062, "step": 8280 }, { "epoch": 0.10097132341291604, "grad_norm": 1.704755425453186, "learning_rate": 4.73181526619628e-06, "loss": 0.8767, "step": 8285 }, { "epoch": 0.10103225963706385, "grad_norm": 1.9756295680999756, "learning_rate": 4.731494547787044e-06, "loss": 0.8897, "step": 8290 }, { "epoch": 0.10109319586121165, "grad_norm": 1.9363298416137695, "learning_rate": 4.731173829377807e-06, "loss": 0.9503, "step": 8295 }, { "epoch": 0.10115413208535946, "grad_norm": 2.010800838470459, "learning_rate": 4.73085311096857e-06, "loss": 0.9379, "step": 8300 }, { "epoch": 0.10121506830950727, "grad_norm": 2.23850417137146, "learning_rate": 4.730532392559334e-06, "loss": 0.9278, "step": 8305 }, { "epoch": 0.10127600453365508, "grad_norm": 2.0575718879699707, "learning_rate": 4.730211674150097e-06, "loss": 0.9533, "step": 8310 }, { "epoch": 0.10133694075780289, "grad_norm": 2.1264286041259766, "learning_rate": 4.7298909557408605e-06, "loss": 0.9182, "step": 8315 }, { "epoch": 0.10139787698195069, "grad_norm": 1.9656238555908203, "learning_rate": 4.7295702373316235e-06, "loss": 0.9094, "step": 8320 }, { "epoch": 0.1014588132060985, "grad_norm": 1.9864729642868042, "learning_rate": 4.7292495189223865e-06, "loss": 0.938, "step": 8325 }, { "epoch": 0.1015197494302463, "grad_norm": 2.097409248352051, "learning_rate": 4.72892880051315e-06, "loss": 0.8651, "step": 8330 }, { "epoch": 0.10158068565439411, "grad_norm": 2.2983391284942627, "learning_rate": 4.728608082103913e-06, "loss": 0.9903, "step": 8335 }, { "epoch": 0.10164162187854192, "grad_norm": 2.095107078552246, "learning_rate": 4.728287363694676e-06, "loss": 0.9451, "step": 8340 }, { "epoch": 0.10170255810268973, "grad_norm": 2.521742582321167, "learning_rate": 4.72796664528544e-06, "loss": 0.984, "step": 8345 }, { "epoch": 0.10176349432683753, "grad_norm": 1.9460004568099976, "learning_rate": 4.727645926876203e-06, "loss": 0.9795, "step": 8350 }, { "epoch": 0.10182443055098533, "grad_norm": 2.0656237602233887, "learning_rate": 4.727325208466966e-06, "loss": 0.9141, "step": 8355 }, { "epoch": 0.10188536677513314, "grad_norm": 1.9521057605743408, "learning_rate": 4.727004490057729e-06, "loss": 0.903, "step": 8360 }, { "epoch": 0.10194630299928095, "grad_norm": 1.9395917654037476, "learning_rate": 4.726683771648493e-06, "loss": 0.8849, "step": 8365 }, { "epoch": 0.10200723922342876, "grad_norm": 1.8056466579437256, "learning_rate": 4.726363053239256e-06, "loss": 0.8404, "step": 8370 }, { "epoch": 0.10206817544757657, "grad_norm": 1.8896183967590332, "learning_rate": 4.726042334830019e-06, "loss": 0.8683, "step": 8375 }, { "epoch": 0.10212911167172438, "grad_norm": 1.8382140398025513, "learning_rate": 4.725721616420783e-06, "loss": 0.881, "step": 8380 }, { "epoch": 0.10219004789587217, "grad_norm": 2.0544352531433105, "learning_rate": 4.725400898011546e-06, "loss": 0.8865, "step": 8385 }, { "epoch": 0.10225098412001998, "grad_norm": 2.1450517177581787, "learning_rate": 4.725080179602309e-06, "loss": 0.9895, "step": 8390 }, { "epoch": 0.10231192034416779, "grad_norm": 2.016580820083618, "learning_rate": 4.724759461193073e-06, "loss": 0.971, "step": 8395 }, { "epoch": 0.1023728565683156, "grad_norm": 2.044877529144287, "learning_rate": 4.724438742783836e-06, "loss": 0.9845, "step": 8400 }, { "epoch": 0.10243379279246341, "grad_norm": 2.0411667823791504, "learning_rate": 4.724118024374599e-06, "loss": 0.8451, "step": 8405 }, { "epoch": 0.10249472901661122, "grad_norm": 2.2532529830932617, "learning_rate": 4.723797305965363e-06, "loss": 0.9764, "step": 8410 }, { "epoch": 0.10255566524075903, "grad_norm": 2.1185994148254395, "learning_rate": 4.723476587556126e-06, "loss": 0.9369, "step": 8415 }, { "epoch": 0.10261660146490682, "grad_norm": 2.1737475395202637, "learning_rate": 4.723155869146889e-06, "loss": 0.9156, "step": 8420 }, { "epoch": 0.10267753768905463, "grad_norm": 2.0115549564361572, "learning_rate": 4.722835150737653e-06, "loss": 0.8861, "step": 8425 }, { "epoch": 0.10273847391320244, "grad_norm": 2.234729766845703, "learning_rate": 4.722514432328416e-06, "loss": 0.9401, "step": 8430 }, { "epoch": 0.10279941013735025, "grad_norm": 2.271008014678955, "learning_rate": 4.72219371391918e-06, "loss": 0.9071, "step": 8435 }, { "epoch": 0.10286034636149806, "grad_norm": 2.230844020843506, "learning_rate": 4.721872995509943e-06, "loss": 0.9055, "step": 8440 }, { "epoch": 0.10292128258564587, "grad_norm": 1.827703833580017, "learning_rate": 4.721552277100706e-06, "loss": 0.9137, "step": 8445 }, { "epoch": 0.10298221880979366, "grad_norm": 2.3895797729492188, "learning_rate": 4.7212315586914695e-06, "loss": 0.8656, "step": 8450 }, { "epoch": 0.10304315503394147, "grad_norm": 1.956121802330017, "learning_rate": 4.7209108402822326e-06, "loss": 0.898, "step": 8455 }, { "epoch": 0.10310409125808928, "grad_norm": 1.945845127105713, "learning_rate": 4.7205901218729964e-06, "loss": 1.0013, "step": 8460 }, { "epoch": 0.10316502748223709, "grad_norm": 2.2670669555664062, "learning_rate": 4.7202694034637594e-06, "loss": 0.8881, "step": 8465 }, { "epoch": 0.1032259637063849, "grad_norm": 2.0311732292175293, "learning_rate": 4.7199486850545225e-06, "loss": 0.9265, "step": 8470 }, { "epoch": 0.10328689993053271, "grad_norm": 2.1455936431884766, "learning_rate": 4.719627966645286e-06, "loss": 0.9296, "step": 8475 }, { "epoch": 0.10334783615468052, "grad_norm": 1.8654755353927612, "learning_rate": 4.719307248236049e-06, "loss": 0.9562, "step": 8480 }, { "epoch": 0.10340877237882831, "grad_norm": 2.132781505584717, "learning_rate": 4.718986529826812e-06, "loss": 0.8607, "step": 8485 }, { "epoch": 0.10346970860297612, "grad_norm": 1.9701842069625854, "learning_rate": 4.718665811417576e-06, "loss": 0.9575, "step": 8490 }, { "epoch": 0.10353064482712393, "grad_norm": 2.15618896484375, "learning_rate": 4.718345093008339e-06, "loss": 0.9043, "step": 8495 }, { "epoch": 0.10359158105127174, "grad_norm": 4.281879901885986, "learning_rate": 4.718024374599102e-06, "loss": 0.9258, "step": 8500 }, { "epoch": 0.10365251727541955, "grad_norm": 1.7965959310531616, "learning_rate": 4.717703656189866e-06, "loss": 0.9275, "step": 8505 }, { "epoch": 0.10371345349956736, "grad_norm": 2.3310492038726807, "learning_rate": 4.717382937780629e-06, "loss": 0.8582, "step": 8510 }, { "epoch": 0.10377438972371517, "grad_norm": 1.8000808954238892, "learning_rate": 4.717062219371392e-06, "loss": 0.9879, "step": 8515 }, { "epoch": 0.10383532594786296, "grad_norm": 1.8764945268630981, "learning_rate": 4.716741500962156e-06, "loss": 0.9242, "step": 8520 }, { "epoch": 0.10389626217201077, "grad_norm": 2.3416976928710938, "learning_rate": 4.716420782552919e-06, "loss": 0.8634, "step": 8525 }, { "epoch": 0.10395719839615858, "grad_norm": 2.060760021209717, "learning_rate": 4.716100064143682e-06, "loss": 0.9273, "step": 8530 }, { "epoch": 0.10401813462030639, "grad_norm": 1.8268070220947266, "learning_rate": 4.715779345734445e-06, "loss": 0.8877, "step": 8535 }, { "epoch": 0.1040790708444542, "grad_norm": 2.185181140899658, "learning_rate": 4.715458627325209e-06, "loss": 0.9272, "step": 8540 }, { "epoch": 0.104140007068602, "grad_norm": 2.1770846843719482, "learning_rate": 4.715137908915972e-06, "loss": 0.9742, "step": 8545 }, { "epoch": 0.10420094329274981, "grad_norm": 1.6140451431274414, "learning_rate": 4.714817190506735e-06, "loss": 0.9305, "step": 8550 }, { "epoch": 0.10426187951689761, "grad_norm": 1.86104416847229, "learning_rate": 4.714496472097499e-06, "loss": 0.8539, "step": 8555 }, { "epoch": 0.10432281574104542, "grad_norm": 1.8754287958145142, "learning_rate": 4.714175753688262e-06, "loss": 0.8858, "step": 8560 }, { "epoch": 0.10438375196519323, "grad_norm": 1.9611334800720215, "learning_rate": 4.713855035279025e-06, "loss": 0.8936, "step": 8565 }, { "epoch": 0.10444468818934104, "grad_norm": 2.235750436782837, "learning_rate": 4.713534316869789e-06, "loss": 0.9073, "step": 8570 }, { "epoch": 0.10450562441348885, "grad_norm": 2.1212968826293945, "learning_rate": 4.713213598460552e-06, "loss": 0.9055, "step": 8575 }, { "epoch": 0.10456656063763665, "grad_norm": 2.2166996002197266, "learning_rate": 4.712892880051315e-06, "loss": 0.8923, "step": 8580 }, { "epoch": 0.10462749686178445, "grad_norm": 2.3961360454559326, "learning_rate": 4.712572161642079e-06, "loss": 0.9136, "step": 8585 }, { "epoch": 0.10468843308593226, "grad_norm": 1.9156235456466675, "learning_rate": 4.712251443232842e-06, "loss": 0.9595, "step": 8590 }, { "epoch": 0.10474936931008007, "grad_norm": 2.2935519218444824, "learning_rate": 4.7119307248236055e-06, "loss": 0.9303, "step": 8595 }, { "epoch": 0.10481030553422788, "grad_norm": 1.7534023523330688, "learning_rate": 4.7116100064143685e-06, "loss": 0.9209, "step": 8600 }, { "epoch": 0.10487124175837569, "grad_norm": 1.9270128011703491, "learning_rate": 4.711289288005132e-06, "loss": 0.9352, "step": 8605 }, { "epoch": 0.1049321779825235, "grad_norm": 1.9519741535186768, "learning_rate": 4.710968569595895e-06, "loss": 0.9205, "step": 8610 }, { "epoch": 0.1049931142066713, "grad_norm": 2.0212526321411133, "learning_rate": 4.710647851186658e-06, "loss": 0.9366, "step": 8615 }, { "epoch": 0.1050540504308191, "grad_norm": 1.8140945434570312, "learning_rate": 4.710327132777422e-06, "loss": 0.9339, "step": 8620 }, { "epoch": 0.10511498665496691, "grad_norm": 1.9228378534317017, "learning_rate": 4.710006414368185e-06, "loss": 0.9716, "step": 8625 }, { "epoch": 0.10517592287911472, "grad_norm": 1.8767502307891846, "learning_rate": 4.709685695958948e-06, "loss": 1.0028, "step": 8630 }, { "epoch": 0.10523685910326253, "grad_norm": 1.9442917108535767, "learning_rate": 4.709364977549712e-06, "loss": 0.8923, "step": 8635 }, { "epoch": 0.10529779532741033, "grad_norm": 1.858862280845642, "learning_rate": 4.709044259140475e-06, "loss": 0.9829, "step": 8640 }, { "epoch": 0.10535873155155814, "grad_norm": 2.3148512840270996, "learning_rate": 4.708723540731238e-06, "loss": 0.9777, "step": 8645 }, { "epoch": 0.10541966777570595, "grad_norm": 1.8866088390350342, "learning_rate": 4.708402822322002e-06, "loss": 0.9443, "step": 8650 }, { "epoch": 0.10548060399985375, "grad_norm": 2.024811029434204, "learning_rate": 4.708082103912765e-06, "loss": 0.9051, "step": 8655 }, { "epoch": 0.10554154022400156, "grad_norm": 2.06909441947937, "learning_rate": 4.707761385503528e-06, "loss": 0.9613, "step": 8660 }, { "epoch": 0.10560247644814937, "grad_norm": 1.9369755983352661, "learning_rate": 4.707440667094292e-06, "loss": 0.8865, "step": 8665 }, { "epoch": 0.10566341267229717, "grad_norm": 2.350677728652954, "learning_rate": 4.707119948685055e-06, "loss": 0.8787, "step": 8670 }, { "epoch": 0.10572434889644498, "grad_norm": 1.9142334461212158, "learning_rate": 4.706799230275818e-06, "loss": 0.8762, "step": 8675 }, { "epoch": 0.10578528512059279, "grad_norm": 1.87430739402771, "learning_rate": 4.706478511866582e-06, "loss": 0.9254, "step": 8680 }, { "epoch": 0.1058462213447406, "grad_norm": 2.2262284755706787, "learning_rate": 4.706157793457345e-06, "loss": 0.974, "step": 8685 }, { "epoch": 0.1059071575688884, "grad_norm": 2.2499752044677734, "learning_rate": 4.705837075048108e-06, "loss": 0.9673, "step": 8690 }, { "epoch": 0.1059680937930362, "grad_norm": 2.3272907733917236, "learning_rate": 4.705516356638871e-06, "loss": 0.9233, "step": 8695 }, { "epoch": 0.10602903001718401, "grad_norm": 2.205979347229004, "learning_rate": 4.705195638229635e-06, "loss": 0.9337, "step": 8700 }, { "epoch": 0.10608996624133182, "grad_norm": 1.9032402038574219, "learning_rate": 4.704874919820398e-06, "loss": 0.8696, "step": 8705 }, { "epoch": 0.10615090246547963, "grad_norm": 1.9556834697723389, "learning_rate": 4.704554201411161e-06, "loss": 0.9006, "step": 8710 }, { "epoch": 0.10621183868962744, "grad_norm": 2.5231869220733643, "learning_rate": 4.704233483001925e-06, "loss": 0.9282, "step": 8715 }, { "epoch": 0.10627277491377524, "grad_norm": 2.279545545578003, "learning_rate": 4.703912764592688e-06, "loss": 0.9105, "step": 8720 }, { "epoch": 0.10633371113792305, "grad_norm": 1.877699613571167, "learning_rate": 4.703592046183451e-06, "loss": 0.8999, "step": 8725 }, { "epoch": 0.10639464736207085, "grad_norm": 2.6014528274536133, "learning_rate": 4.703271327774215e-06, "loss": 0.9243, "step": 8730 }, { "epoch": 0.10645558358621866, "grad_norm": 2.200237512588501, "learning_rate": 4.702950609364978e-06, "loss": 0.998, "step": 8735 }, { "epoch": 0.10651651981036647, "grad_norm": 2.009072780609131, "learning_rate": 4.7026298909557415e-06, "loss": 0.8751, "step": 8740 }, { "epoch": 0.10657745603451428, "grad_norm": 1.9965664148330688, "learning_rate": 4.7023091725465045e-06, "loss": 0.9193, "step": 8745 }, { "epoch": 0.10663839225866209, "grad_norm": 1.7639437913894653, "learning_rate": 4.7019884541372675e-06, "loss": 0.9077, "step": 8750 }, { "epoch": 0.10669932848280989, "grad_norm": 2.0843584537506104, "learning_rate": 4.701667735728031e-06, "loss": 0.9351, "step": 8755 }, { "epoch": 0.1067602647069577, "grad_norm": 2.2543606758117676, "learning_rate": 4.701347017318794e-06, "loss": 0.931, "step": 8760 }, { "epoch": 0.1068212009311055, "grad_norm": 2.2969906330108643, "learning_rate": 4.701026298909558e-06, "loss": 0.9107, "step": 8765 }, { "epoch": 0.10688213715525331, "grad_norm": 2.3141603469848633, "learning_rate": 4.700705580500321e-06, "loss": 0.9292, "step": 8770 }, { "epoch": 0.10694307337940112, "grad_norm": 2.161503553390503, "learning_rate": 4.700384862091084e-06, "loss": 0.9111, "step": 8775 }, { "epoch": 0.10700400960354893, "grad_norm": 1.777173638343811, "learning_rate": 4.700064143681848e-06, "loss": 0.9365, "step": 8780 }, { "epoch": 0.10706494582769674, "grad_norm": 2.056325674057007, "learning_rate": 4.699743425272611e-06, "loss": 0.8783, "step": 8785 }, { "epoch": 0.10712588205184453, "grad_norm": 2.1908090114593506, "learning_rate": 4.699422706863374e-06, "loss": 0.9291, "step": 8790 }, { "epoch": 0.10718681827599234, "grad_norm": 1.8705945014953613, "learning_rate": 4.699101988454138e-06, "loss": 0.9027, "step": 8795 }, { "epoch": 0.10724775450014015, "grad_norm": 2.3036882877349854, "learning_rate": 4.698781270044901e-06, "loss": 0.9586, "step": 8800 }, { "epoch": 0.10730869072428796, "grad_norm": 2.74810528755188, "learning_rate": 4.698460551635664e-06, "loss": 0.871, "step": 8805 }, { "epoch": 0.10736962694843577, "grad_norm": 2.1349401473999023, "learning_rate": 4.698139833226428e-06, "loss": 0.913, "step": 8810 }, { "epoch": 0.10743056317258358, "grad_norm": 2.048353910446167, "learning_rate": 4.697819114817191e-06, "loss": 0.9204, "step": 8815 }, { "epoch": 0.10749149939673137, "grad_norm": 2.3483352661132812, "learning_rate": 4.697498396407954e-06, "loss": 0.9744, "step": 8820 }, { "epoch": 0.10755243562087918, "grad_norm": 1.9844880104064941, "learning_rate": 4.697177677998718e-06, "loss": 0.9532, "step": 8825 }, { "epoch": 0.10761337184502699, "grad_norm": 2.1558332443237305, "learning_rate": 4.696856959589481e-06, "loss": 0.8866, "step": 8830 }, { "epoch": 0.1076743080691748, "grad_norm": 1.74855375289917, "learning_rate": 4.696536241180244e-06, "loss": 0.9649, "step": 8835 }, { "epoch": 0.10773524429332261, "grad_norm": 2.264493942260742, "learning_rate": 4.696215522771008e-06, "loss": 0.9282, "step": 8840 }, { "epoch": 0.10779618051747042, "grad_norm": 2.1755261421203613, "learning_rate": 4.695894804361771e-06, "loss": 0.896, "step": 8845 }, { "epoch": 0.10785711674161823, "grad_norm": 1.7317098379135132, "learning_rate": 4.695574085952534e-06, "loss": 0.9071, "step": 8850 }, { "epoch": 0.10791805296576602, "grad_norm": 2.106553077697754, "learning_rate": 4.695253367543298e-06, "loss": 0.897, "step": 8855 }, { "epoch": 0.10797898918991383, "grad_norm": 1.962265968322754, "learning_rate": 4.694932649134061e-06, "loss": 0.8905, "step": 8860 }, { "epoch": 0.10803992541406164, "grad_norm": 2.0523712635040283, "learning_rate": 4.694611930724824e-06, "loss": 0.9488, "step": 8865 }, { "epoch": 0.10810086163820945, "grad_norm": 2.051715135574341, "learning_rate": 4.694291212315587e-06, "loss": 0.9656, "step": 8870 }, { "epoch": 0.10816179786235726, "grad_norm": 1.6873265504837036, "learning_rate": 4.6939704939063505e-06, "loss": 0.8941, "step": 8875 }, { "epoch": 0.10822273408650507, "grad_norm": 2.1399500370025635, "learning_rate": 4.6936497754971136e-06, "loss": 0.9453, "step": 8880 }, { "epoch": 0.10828367031065288, "grad_norm": 2.615262031555176, "learning_rate": 4.693329057087877e-06, "loss": 0.9031, "step": 8885 }, { "epoch": 0.10834460653480067, "grad_norm": 1.7952336072921753, "learning_rate": 4.6930083386786404e-06, "loss": 0.9271, "step": 8890 }, { "epoch": 0.10840554275894848, "grad_norm": 2.0220141410827637, "learning_rate": 4.6926876202694035e-06, "loss": 0.9029, "step": 8895 }, { "epoch": 0.10846647898309629, "grad_norm": 1.6992799043655396, "learning_rate": 4.692366901860167e-06, "loss": 0.9181, "step": 8900 }, { "epoch": 0.1085274152072441, "grad_norm": 2.025951385498047, "learning_rate": 4.69204618345093e-06, "loss": 0.9448, "step": 8905 }, { "epoch": 0.10858835143139191, "grad_norm": 1.9102294445037842, "learning_rate": 4.691725465041694e-06, "loss": 0.9498, "step": 8910 }, { "epoch": 0.10864928765553972, "grad_norm": 2.1527469158172607, "learning_rate": 4.691404746632457e-06, "loss": 0.9357, "step": 8915 }, { "epoch": 0.10871022387968753, "grad_norm": 1.9369832277297974, "learning_rate": 4.69108402822322e-06, "loss": 0.8654, "step": 8920 }, { "epoch": 0.10877116010383532, "grad_norm": 1.971448302268982, "learning_rate": 4.690763309813984e-06, "loss": 0.8914, "step": 8925 }, { "epoch": 0.10883209632798313, "grad_norm": 3.750746965408325, "learning_rate": 4.690442591404747e-06, "loss": 0.9067, "step": 8930 }, { "epoch": 0.10889303255213094, "grad_norm": 2.0822577476501465, "learning_rate": 4.690121872995511e-06, "loss": 0.9246, "step": 8935 }, { "epoch": 0.10895396877627875, "grad_norm": 2.384821891784668, "learning_rate": 4.689801154586274e-06, "loss": 0.948, "step": 8940 }, { "epoch": 0.10901490500042656, "grad_norm": 2.0038516521453857, "learning_rate": 4.689480436177037e-06, "loss": 1.0063, "step": 8945 }, { "epoch": 0.10907584122457437, "grad_norm": 2.2540628910064697, "learning_rate": 4.6891597177678e-06, "loss": 0.954, "step": 8950 }, { "epoch": 0.10913677744872216, "grad_norm": 2.0362226963043213, "learning_rate": 4.688838999358564e-06, "loss": 0.9487, "step": 8955 }, { "epoch": 0.10919771367286997, "grad_norm": 1.992284893989563, "learning_rate": 4.688518280949327e-06, "loss": 0.9291, "step": 8960 }, { "epoch": 0.10925864989701778, "grad_norm": 1.8786481618881226, "learning_rate": 4.68819756254009e-06, "loss": 0.9179, "step": 8965 }, { "epoch": 0.10931958612116559, "grad_norm": 2.043250322341919, "learning_rate": 4.687876844130854e-06, "loss": 0.9659, "step": 8970 }, { "epoch": 0.1093805223453134, "grad_norm": 2.1624746322631836, "learning_rate": 4.687556125721617e-06, "loss": 0.957, "step": 8975 }, { "epoch": 0.1094414585694612, "grad_norm": 2.0456440448760986, "learning_rate": 4.68723540731238e-06, "loss": 0.8512, "step": 8980 }, { "epoch": 0.10950239479360901, "grad_norm": 1.9821300506591797, "learning_rate": 4.686914688903144e-06, "loss": 0.9041, "step": 8985 }, { "epoch": 0.10956333101775681, "grad_norm": 1.8892862796783447, "learning_rate": 4.686593970493907e-06, "loss": 0.8819, "step": 8990 }, { "epoch": 0.10962426724190462, "grad_norm": 1.8665733337402344, "learning_rate": 4.68627325208467e-06, "loss": 0.9314, "step": 8995 }, { "epoch": 0.10968520346605243, "grad_norm": 2.0965452194213867, "learning_rate": 4.685952533675434e-06, "loss": 0.8393, "step": 9000 }, { "epoch": 0.10974613969020024, "grad_norm": 1.838534951210022, "learning_rate": 4.685631815266197e-06, "loss": 0.886, "step": 9005 }, { "epoch": 0.10980707591434805, "grad_norm": 1.906435251235962, "learning_rate": 4.68531109685696e-06, "loss": 0.8858, "step": 9010 }, { "epoch": 0.10986801213849585, "grad_norm": 2.672081470489502, "learning_rate": 4.6849903784477235e-06, "loss": 0.8428, "step": 9015 }, { "epoch": 0.10992894836264366, "grad_norm": 1.6699012517929077, "learning_rate": 4.6846696600384865e-06, "loss": 0.9045, "step": 9020 }, { "epoch": 0.10998988458679146, "grad_norm": 2.1404736042022705, "learning_rate": 4.6843489416292495e-06, "loss": 0.9075, "step": 9025 }, { "epoch": 0.11005082081093927, "grad_norm": 2.05534029006958, "learning_rate": 4.6840282232200125e-06, "loss": 0.8623, "step": 9030 }, { "epoch": 0.11011175703508708, "grad_norm": 2.1334147453308105, "learning_rate": 4.683707504810776e-06, "loss": 0.9504, "step": 9035 }, { "epoch": 0.11017269325923489, "grad_norm": 2.0963804721832275, "learning_rate": 4.683386786401539e-06, "loss": 0.9161, "step": 9040 }, { "epoch": 0.1102336294833827, "grad_norm": 2.3499741554260254, "learning_rate": 4.683066067992303e-06, "loss": 0.916, "step": 9045 }, { "epoch": 0.1102945657075305, "grad_norm": 2.1723697185516357, "learning_rate": 4.682745349583066e-06, "loss": 0.9381, "step": 9050 }, { "epoch": 0.1103555019316783, "grad_norm": 1.8632113933563232, "learning_rate": 4.682424631173829e-06, "loss": 0.8757, "step": 9055 }, { "epoch": 0.11041643815582611, "grad_norm": 2.424755811691284, "learning_rate": 4.682103912764593e-06, "loss": 0.9079, "step": 9060 }, { "epoch": 0.11047737437997392, "grad_norm": 1.8842517137527466, "learning_rate": 4.681783194355356e-06, "loss": 0.8794, "step": 9065 }, { "epoch": 0.11053831060412173, "grad_norm": 1.6885448694229126, "learning_rate": 4.68146247594612e-06, "loss": 0.9607, "step": 9070 }, { "epoch": 0.11059924682826953, "grad_norm": 2.1095540523529053, "learning_rate": 4.681141757536883e-06, "loss": 0.971, "step": 9075 }, { "epoch": 0.11066018305241734, "grad_norm": 1.7267647981643677, "learning_rate": 4.680821039127647e-06, "loss": 0.948, "step": 9080 }, { "epoch": 0.11072111927656515, "grad_norm": 2.2864420413970947, "learning_rate": 4.68050032071841e-06, "loss": 0.9533, "step": 9085 }, { "epoch": 0.11078205550071295, "grad_norm": 2.173121929168701, "learning_rate": 4.680179602309173e-06, "loss": 0.897, "step": 9090 }, { "epoch": 0.11084299172486076, "grad_norm": 2.1385483741760254, "learning_rate": 4.679858883899937e-06, "loss": 0.8899, "step": 9095 }, { "epoch": 0.11090392794900857, "grad_norm": 2.5654196739196777, "learning_rate": 4.6795381654907e-06, "loss": 0.9246, "step": 9100 }, { "epoch": 0.11096486417315637, "grad_norm": 1.9192054271697998, "learning_rate": 4.679217447081463e-06, "loss": 0.9548, "step": 9105 }, { "epoch": 0.11102580039730418, "grad_norm": 2.1049418449401855, "learning_rate": 4.678896728672227e-06, "loss": 0.9152, "step": 9110 }, { "epoch": 0.11108673662145199, "grad_norm": 1.9079850912094116, "learning_rate": 4.67857601026299e-06, "loss": 0.8295, "step": 9115 }, { "epoch": 0.1111476728455998, "grad_norm": 2.147994041442871, "learning_rate": 4.678255291853753e-06, "loss": 0.9252, "step": 9120 }, { "epoch": 0.1112086090697476, "grad_norm": 2.4565110206604004, "learning_rate": 4.677934573444516e-06, "loss": 0.8791, "step": 9125 }, { "epoch": 0.1112695452938954, "grad_norm": 2.336663007736206, "learning_rate": 4.67761385503528e-06, "loss": 0.8796, "step": 9130 }, { "epoch": 0.11133048151804321, "grad_norm": 2.1483585834503174, "learning_rate": 4.677293136626043e-06, "loss": 0.8675, "step": 9135 }, { "epoch": 0.11139141774219102, "grad_norm": 2.1321468353271484, "learning_rate": 4.676972418216806e-06, "loss": 0.9285, "step": 9140 }, { "epoch": 0.11145235396633883, "grad_norm": 2.293458938598633, "learning_rate": 4.6766516998075695e-06, "loss": 0.8695, "step": 9145 }, { "epoch": 0.11151329019048664, "grad_norm": 1.7089438438415527, "learning_rate": 4.6763309813983326e-06, "loss": 0.8554, "step": 9150 }, { "epoch": 0.11157422641463445, "grad_norm": 2.184054374694824, "learning_rate": 4.6760102629890956e-06, "loss": 0.9122, "step": 9155 }, { "epoch": 0.11163516263878225, "grad_norm": 2.0118095874786377, "learning_rate": 4.6756895445798594e-06, "loss": 0.9396, "step": 9160 }, { "epoch": 0.11169609886293005, "grad_norm": 1.8393096923828125, "learning_rate": 4.6753688261706225e-06, "loss": 0.8927, "step": 9165 }, { "epoch": 0.11175703508707786, "grad_norm": 2.3533570766448975, "learning_rate": 4.6750481077613855e-06, "loss": 0.9643, "step": 9170 }, { "epoch": 0.11181797131122567, "grad_norm": 1.9598495960235596, "learning_rate": 4.674727389352149e-06, "loss": 0.9194, "step": 9175 }, { "epoch": 0.11187890753537348, "grad_norm": 2.043760061264038, "learning_rate": 4.674406670942912e-06, "loss": 1.0063, "step": 9180 }, { "epoch": 0.11193984375952129, "grad_norm": 2.073265552520752, "learning_rate": 4.674085952533675e-06, "loss": 0.984, "step": 9185 }, { "epoch": 0.11200077998366909, "grad_norm": 1.9879851341247559, "learning_rate": 4.673765234124439e-06, "loss": 0.8395, "step": 9190 }, { "epoch": 0.1120617162078169, "grad_norm": 2.063283920288086, "learning_rate": 4.673444515715202e-06, "loss": 0.8851, "step": 9195 }, { "epoch": 0.1121226524319647, "grad_norm": 2.0307514667510986, "learning_rate": 4.673123797305965e-06, "loss": 0.961, "step": 9200 }, { "epoch": 0.11218358865611251, "grad_norm": 2.070291757583618, "learning_rate": 4.672803078896729e-06, "loss": 0.9, "step": 9205 }, { "epoch": 0.11224452488026032, "grad_norm": 2.375070571899414, "learning_rate": 4.672482360487492e-06, "loss": 0.9415, "step": 9210 }, { "epoch": 0.11230546110440813, "grad_norm": 1.9787760972976685, "learning_rate": 4.672161642078256e-06, "loss": 0.9501, "step": 9215 }, { "epoch": 0.11236639732855594, "grad_norm": 1.7267186641693115, "learning_rate": 4.671840923669019e-06, "loss": 0.911, "step": 9220 }, { "epoch": 0.11242733355270373, "grad_norm": 2.1844167709350586, "learning_rate": 4.671520205259782e-06, "loss": 0.9478, "step": 9225 }, { "epoch": 0.11248826977685154, "grad_norm": 2.526386260986328, "learning_rate": 4.671199486850546e-06, "loss": 0.8736, "step": 9230 }, { "epoch": 0.11254920600099935, "grad_norm": 1.9954948425292969, "learning_rate": 4.670878768441309e-06, "loss": 0.9118, "step": 9235 }, { "epoch": 0.11261014222514716, "grad_norm": 1.9163306951522827, "learning_rate": 4.670558050032073e-06, "loss": 0.9543, "step": 9240 }, { "epoch": 0.11267107844929497, "grad_norm": 2.0058135986328125, "learning_rate": 4.670237331622836e-06, "loss": 0.8944, "step": 9245 }, { "epoch": 0.11273201467344278, "grad_norm": 2.143951177597046, "learning_rate": 4.669916613213599e-06, "loss": 1.0007, "step": 9250 }, { "epoch": 0.11279295089759059, "grad_norm": 2.159233331680298, "learning_rate": 4.669595894804363e-06, "loss": 0.9296, "step": 9255 }, { "epoch": 0.11285388712173838, "grad_norm": 1.678851842880249, "learning_rate": 4.669275176395126e-06, "loss": 0.9125, "step": 9260 }, { "epoch": 0.11291482334588619, "grad_norm": 2.405348777770996, "learning_rate": 4.668954457985889e-06, "loss": 0.9186, "step": 9265 }, { "epoch": 0.112975759570034, "grad_norm": 2.525873899459839, "learning_rate": 4.668633739576653e-06, "loss": 0.9628, "step": 9270 }, { "epoch": 0.11303669579418181, "grad_norm": 2.5430819988250732, "learning_rate": 4.668313021167416e-06, "loss": 0.9633, "step": 9275 }, { "epoch": 0.11309763201832962, "grad_norm": 2.1415746212005615, "learning_rate": 4.667992302758179e-06, "loss": 0.9202, "step": 9280 }, { "epoch": 0.11315856824247743, "grad_norm": 2.178382396697998, "learning_rate": 4.667671584348942e-06, "loss": 0.9169, "step": 9285 }, { "epoch": 0.11321950446662522, "grad_norm": 1.8480912446975708, "learning_rate": 4.6673508659397055e-06, "loss": 0.795, "step": 9290 }, { "epoch": 0.11328044069077303, "grad_norm": 2.1824989318847656, "learning_rate": 4.6670301475304685e-06, "loss": 0.9043, "step": 9295 }, { "epoch": 0.11334137691492084, "grad_norm": 1.8799386024475098, "learning_rate": 4.6667094291212315e-06, "loss": 0.9219, "step": 9300 }, { "epoch": 0.11340231313906865, "grad_norm": 2.4293413162231445, "learning_rate": 4.666388710711995e-06, "loss": 0.8669, "step": 9305 }, { "epoch": 0.11346324936321646, "grad_norm": 1.866790771484375, "learning_rate": 4.666067992302758e-06, "loss": 0.9053, "step": 9310 }, { "epoch": 0.11352418558736427, "grad_norm": 2.1105306148529053, "learning_rate": 4.6657472738935214e-06, "loss": 0.8912, "step": 9315 }, { "epoch": 0.11358512181151208, "grad_norm": 1.818466067314148, "learning_rate": 4.665426555484285e-06, "loss": 0.9073, "step": 9320 }, { "epoch": 0.11364605803565987, "grad_norm": 1.7119413614273071, "learning_rate": 4.665105837075048e-06, "loss": 0.9163, "step": 9325 }, { "epoch": 0.11370699425980768, "grad_norm": 2.101445198059082, "learning_rate": 4.664785118665811e-06, "loss": 0.8657, "step": 9330 }, { "epoch": 0.11376793048395549, "grad_norm": 2.1395227909088135, "learning_rate": 4.664464400256575e-06, "loss": 0.9298, "step": 9335 }, { "epoch": 0.1138288667081033, "grad_norm": 2.1712231636047363, "learning_rate": 4.664143681847338e-06, "loss": 0.9246, "step": 9340 }, { "epoch": 0.11388980293225111, "grad_norm": 2.0400936603546143, "learning_rate": 4.663822963438101e-06, "loss": 0.8986, "step": 9345 }, { "epoch": 0.11395073915639892, "grad_norm": 2.23528790473938, "learning_rate": 4.663502245028865e-06, "loss": 0.888, "step": 9350 }, { "epoch": 0.11401167538054673, "grad_norm": 2.017927646636963, "learning_rate": 4.663181526619628e-06, "loss": 0.8614, "step": 9355 }, { "epoch": 0.11407261160469452, "grad_norm": 2.0851283073425293, "learning_rate": 4.662860808210392e-06, "loss": 0.8885, "step": 9360 }, { "epoch": 0.11413354782884233, "grad_norm": 2.0836682319641113, "learning_rate": 4.662540089801155e-06, "loss": 0.9525, "step": 9365 }, { "epoch": 0.11419448405299014, "grad_norm": 1.9184179306030273, "learning_rate": 4.662219371391918e-06, "loss": 0.9269, "step": 9370 }, { "epoch": 0.11425542027713795, "grad_norm": 2.576378345489502, "learning_rate": 4.661898652982682e-06, "loss": 0.9407, "step": 9375 }, { "epoch": 0.11431635650128576, "grad_norm": 1.9286080598831177, "learning_rate": 4.661577934573445e-06, "loss": 0.9512, "step": 9380 }, { "epoch": 0.11437729272543357, "grad_norm": 1.7680518627166748, "learning_rate": 4.661257216164209e-06, "loss": 0.8981, "step": 9385 }, { "epoch": 0.11443822894958137, "grad_norm": 2.049729824066162, "learning_rate": 4.660936497754972e-06, "loss": 0.849, "step": 9390 }, { "epoch": 0.11449916517372917, "grad_norm": 2.244316577911377, "learning_rate": 4.660615779345735e-06, "loss": 0.8983, "step": 9395 }, { "epoch": 0.11456010139787698, "grad_norm": 2.1047325134277344, "learning_rate": 4.660295060936499e-06, "loss": 0.9626, "step": 9400 }, { "epoch": 0.11462103762202479, "grad_norm": 1.8950092792510986, "learning_rate": 4.659974342527262e-06, "loss": 0.8627, "step": 9405 }, { "epoch": 0.1146819738461726, "grad_norm": 2.211398124694824, "learning_rate": 4.659653624118025e-06, "loss": 0.9209, "step": 9410 }, { "epoch": 0.1147429100703204, "grad_norm": 2.256730079650879, "learning_rate": 4.6593329057087885e-06, "loss": 0.9723, "step": 9415 }, { "epoch": 0.11480384629446821, "grad_norm": 1.7487398386001587, "learning_rate": 4.6590121872995516e-06, "loss": 0.8253, "step": 9420 }, { "epoch": 0.11486478251861601, "grad_norm": 2.223905324935913, "learning_rate": 4.6586914688903146e-06, "loss": 0.9588, "step": 9425 }, { "epoch": 0.11492571874276382, "grad_norm": 2.3652546405792236, "learning_rate": 4.6583707504810784e-06, "loss": 0.958, "step": 9430 }, { "epoch": 0.11498665496691163, "grad_norm": 2.719996690750122, "learning_rate": 4.6580500320718415e-06, "loss": 0.9, "step": 9435 }, { "epoch": 0.11504759119105944, "grad_norm": 3.0332083702087402, "learning_rate": 4.6577293136626045e-06, "loss": 0.8397, "step": 9440 }, { "epoch": 0.11510852741520725, "grad_norm": 1.7605419158935547, "learning_rate": 4.657408595253368e-06, "loss": 0.9083, "step": 9445 }, { "epoch": 0.11516946363935505, "grad_norm": 1.8795024156570435, "learning_rate": 4.657087876844131e-06, "loss": 0.9027, "step": 9450 }, { "epoch": 0.11523039986350286, "grad_norm": 1.8881696462631226, "learning_rate": 4.656767158434894e-06, "loss": 0.9232, "step": 9455 }, { "epoch": 0.11529133608765066, "grad_norm": 1.9369388818740845, "learning_rate": 4.656446440025657e-06, "loss": 0.9111, "step": 9460 }, { "epoch": 0.11535227231179847, "grad_norm": 2.161135673522949, "learning_rate": 4.656125721616421e-06, "loss": 0.867, "step": 9465 }, { "epoch": 0.11541320853594628, "grad_norm": 1.9206912517547607, "learning_rate": 4.655805003207184e-06, "loss": 0.9427, "step": 9470 }, { "epoch": 0.11547414476009409, "grad_norm": 1.7462095022201538, "learning_rate": 4.655484284797947e-06, "loss": 0.8947, "step": 9475 }, { "epoch": 0.1155350809842419, "grad_norm": 1.7770577669143677, "learning_rate": 4.655163566388711e-06, "loss": 0.9491, "step": 9480 }, { "epoch": 0.1155960172083897, "grad_norm": 2.6394453048706055, "learning_rate": 4.654842847979474e-06, "loss": 0.8961, "step": 9485 }, { "epoch": 0.11565695343253751, "grad_norm": 1.8177051544189453, "learning_rate": 4.654522129570237e-06, "loss": 0.9092, "step": 9490 }, { "epoch": 0.11571788965668531, "grad_norm": 1.9360734224319458, "learning_rate": 4.654201411161001e-06, "loss": 0.9023, "step": 9495 }, { "epoch": 0.11577882588083312, "grad_norm": 2.319483757019043, "learning_rate": 4.653880692751764e-06, "loss": 0.9383, "step": 9500 }, { "epoch": 0.11583976210498093, "grad_norm": 2.04402756690979, "learning_rate": 4.653559974342527e-06, "loss": 0.9082, "step": 9505 }, { "epoch": 0.11590069832912873, "grad_norm": 2.0216212272644043, "learning_rate": 4.653239255933291e-06, "loss": 0.9571, "step": 9510 }, { "epoch": 0.11596163455327654, "grad_norm": 2.1013104915618896, "learning_rate": 4.652918537524054e-06, "loss": 0.9173, "step": 9515 }, { "epoch": 0.11602257077742435, "grad_norm": 2.2218899726867676, "learning_rate": 4.652597819114818e-06, "loss": 0.9335, "step": 9520 }, { "epoch": 0.11608350700157216, "grad_norm": 2.2199811935424805, "learning_rate": 4.652277100705581e-06, "loss": 0.9519, "step": 9525 }, { "epoch": 0.11614444322571996, "grad_norm": 2.556572437286377, "learning_rate": 4.651956382296344e-06, "loss": 0.9446, "step": 9530 }, { "epoch": 0.11620537944986777, "grad_norm": 1.956648588180542, "learning_rate": 4.651635663887108e-06, "loss": 0.8802, "step": 9535 }, { "epoch": 0.11626631567401557, "grad_norm": 1.777643084526062, "learning_rate": 4.651314945477871e-06, "loss": 0.8442, "step": 9540 }, { "epoch": 0.11632725189816338, "grad_norm": 1.9250733852386475, "learning_rate": 4.650994227068635e-06, "loss": 0.9181, "step": 9545 }, { "epoch": 0.11638818812231119, "grad_norm": 1.8679815530776978, "learning_rate": 4.650673508659398e-06, "loss": 0.9044, "step": 9550 }, { "epoch": 0.116449124346459, "grad_norm": 1.8844469785690308, "learning_rate": 4.650352790250161e-06, "loss": 0.9995, "step": 9555 }, { "epoch": 0.1165100605706068, "grad_norm": 2.136380910873413, "learning_rate": 4.6500320718409245e-06, "loss": 0.9901, "step": 9560 }, { "epoch": 0.1165709967947546, "grad_norm": 2.304107189178467, "learning_rate": 4.6497113534316875e-06, "loss": 0.9757, "step": 9565 }, { "epoch": 0.11663193301890241, "grad_norm": 1.9518758058547974, "learning_rate": 4.6493906350224505e-06, "loss": 0.9919, "step": 9570 }, { "epoch": 0.11669286924305022, "grad_norm": 2.042341470718384, "learning_rate": 4.649069916613214e-06, "loss": 0.9288, "step": 9575 }, { "epoch": 0.11675380546719803, "grad_norm": 2.1850762367248535, "learning_rate": 4.648749198203977e-06, "loss": 0.9166, "step": 9580 }, { "epoch": 0.11681474169134584, "grad_norm": 1.8227448463439941, "learning_rate": 4.6484284797947404e-06, "loss": 0.9525, "step": 9585 }, { "epoch": 0.11687567791549365, "grad_norm": 1.874144434928894, "learning_rate": 4.648107761385504e-06, "loss": 0.937, "step": 9590 }, { "epoch": 0.11693661413964145, "grad_norm": 2.1504154205322266, "learning_rate": 4.647787042976267e-06, "loss": 0.9054, "step": 9595 }, { "epoch": 0.11699755036378925, "grad_norm": 2.1471428871154785, "learning_rate": 4.64746632456703e-06, "loss": 0.9714, "step": 9600 }, { "epoch": 0.11705848658793706, "grad_norm": 1.8822869062423706, "learning_rate": 4.647145606157794e-06, "loss": 0.8724, "step": 9605 }, { "epoch": 0.11711942281208487, "grad_norm": 2.0268375873565674, "learning_rate": 4.646824887748557e-06, "loss": 0.8835, "step": 9610 }, { "epoch": 0.11718035903623268, "grad_norm": 2.103949546813965, "learning_rate": 4.64650416933932e-06, "loss": 0.8897, "step": 9615 }, { "epoch": 0.11724129526038049, "grad_norm": 2.3728103637695312, "learning_rate": 4.646183450930083e-06, "loss": 0.9395, "step": 9620 }, { "epoch": 0.1173022314845283, "grad_norm": 2.127617120742798, "learning_rate": 4.645862732520847e-06, "loss": 0.9441, "step": 9625 }, { "epoch": 0.1173631677086761, "grad_norm": 2.371605634689331, "learning_rate": 4.64554201411161e-06, "loss": 0.9015, "step": 9630 }, { "epoch": 0.1174241039328239, "grad_norm": 2.022836446762085, "learning_rate": 4.645221295702373e-06, "loss": 0.9277, "step": 9635 }, { "epoch": 0.11748504015697171, "grad_norm": 1.9181504249572754, "learning_rate": 4.644900577293137e-06, "loss": 0.9197, "step": 9640 }, { "epoch": 0.11754597638111952, "grad_norm": 2.1872823238372803, "learning_rate": 4.6445798588839e-06, "loss": 0.9078, "step": 9645 }, { "epoch": 0.11760691260526733, "grad_norm": 1.9300191402435303, "learning_rate": 4.644259140474663e-06, "loss": 0.8486, "step": 9650 }, { "epoch": 0.11766784882941514, "grad_norm": 1.861207127571106, "learning_rate": 4.643938422065427e-06, "loss": 0.9238, "step": 9655 }, { "epoch": 0.11772878505356293, "grad_norm": 2.2306766510009766, "learning_rate": 4.64361770365619e-06, "loss": 0.9206, "step": 9660 }, { "epoch": 0.11778972127771074, "grad_norm": 2.461223602294922, "learning_rate": 4.643296985246954e-06, "loss": 0.9197, "step": 9665 }, { "epoch": 0.11785065750185855, "grad_norm": 2.4389171600341797, "learning_rate": 4.642976266837717e-06, "loss": 0.8818, "step": 9670 }, { "epoch": 0.11791159372600636, "grad_norm": 2.1864981651306152, "learning_rate": 4.64265554842848e-06, "loss": 0.8989, "step": 9675 }, { "epoch": 0.11797252995015417, "grad_norm": 2.155989646911621, "learning_rate": 4.642334830019244e-06, "loss": 0.9618, "step": 9680 }, { "epoch": 0.11803346617430198, "grad_norm": 1.9232611656188965, "learning_rate": 4.642014111610007e-06, "loss": 0.8696, "step": 9685 }, { "epoch": 0.11809440239844979, "grad_norm": 2.014681339263916, "learning_rate": 4.6416933932007706e-06, "loss": 0.9087, "step": 9690 }, { "epoch": 0.11815533862259758, "grad_norm": 1.8253693580627441, "learning_rate": 4.6413726747915336e-06, "loss": 0.873, "step": 9695 }, { "epoch": 0.11821627484674539, "grad_norm": 1.7862703800201416, "learning_rate": 4.641051956382297e-06, "loss": 0.8141, "step": 9700 }, { "epoch": 0.1182772110708932, "grad_norm": 2.280820369720459, "learning_rate": 4.6407312379730605e-06, "loss": 0.8957, "step": 9705 }, { "epoch": 0.11833814729504101, "grad_norm": 1.7991114854812622, "learning_rate": 4.6404105195638235e-06, "loss": 0.9388, "step": 9710 }, { "epoch": 0.11839908351918882, "grad_norm": 1.893432855606079, "learning_rate": 4.6400898011545865e-06, "loss": 0.9303, "step": 9715 }, { "epoch": 0.11846001974333663, "grad_norm": 2.350494861602783, "learning_rate": 4.63976908274535e-06, "loss": 0.9522, "step": 9720 }, { "epoch": 0.11852095596748444, "grad_norm": 2.246748685836792, "learning_rate": 4.639448364336113e-06, "loss": 0.8676, "step": 9725 }, { "epoch": 0.11858189219163223, "grad_norm": 2.2126729488372803, "learning_rate": 4.639127645926876e-06, "loss": 0.9426, "step": 9730 }, { "epoch": 0.11864282841578004, "grad_norm": 2.0616490840911865, "learning_rate": 4.63880692751764e-06, "loss": 0.8475, "step": 9735 }, { "epoch": 0.11870376463992785, "grad_norm": 2.2822535037994385, "learning_rate": 4.638486209108403e-06, "loss": 0.9015, "step": 9740 }, { "epoch": 0.11876470086407566, "grad_norm": 2.1444993019104004, "learning_rate": 4.638165490699166e-06, "loss": 0.9514, "step": 9745 }, { "epoch": 0.11882563708822347, "grad_norm": 2.174046754837036, "learning_rate": 4.63784477228993e-06, "loss": 0.8837, "step": 9750 }, { "epoch": 0.11888657331237128, "grad_norm": 2.21126389503479, "learning_rate": 4.637524053880693e-06, "loss": 0.9441, "step": 9755 }, { "epoch": 0.11894750953651909, "grad_norm": 1.9274104833602905, "learning_rate": 4.637203335471456e-06, "loss": 0.8925, "step": 9760 }, { "epoch": 0.11900844576066688, "grad_norm": 2.140052556991577, "learning_rate": 4.63688261706222e-06, "loss": 0.9321, "step": 9765 }, { "epoch": 0.11906938198481469, "grad_norm": 2.37093186378479, "learning_rate": 4.636561898652983e-06, "loss": 0.9313, "step": 9770 }, { "epoch": 0.1191303182089625, "grad_norm": 1.9722331762313843, "learning_rate": 4.636241180243746e-06, "loss": 0.9113, "step": 9775 }, { "epoch": 0.11919125443311031, "grad_norm": 2.0205488204956055, "learning_rate": 4.63592046183451e-06, "loss": 0.852, "step": 9780 }, { "epoch": 0.11925219065725812, "grad_norm": 1.912983775138855, "learning_rate": 4.635599743425273e-06, "loss": 0.9161, "step": 9785 }, { "epoch": 0.11931312688140593, "grad_norm": 2.1738970279693604, "learning_rate": 4.635279025016036e-06, "loss": 0.9293, "step": 9790 }, { "epoch": 0.11937406310555372, "grad_norm": 2.2822821140289307, "learning_rate": 4.634958306606799e-06, "loss": 0.9219, "step": 9795 }, { "epoch": 0.11943499932970153, "grad_norm": 1.9737863540649414, "learning_rate": 4.634637588197563e-06, "loss": 0.9587, "step": 9800 }, { "epoch": 0.11949593555384934, "grad_norm": 2.14579701423645, "learning_rate": 4.634316869788326e-06, "loss": 0.88, "step": 9805 }, { "epoch": 0.11955687177799715, "grad_norm": 1.838638186454773, "learning_rate": 4.63399615137909e-06, "loss": 0.9149, "step": 9810 }, { "epoch": 0.11961780800214496, "grad_norm": 1.9533520936965942, "learning_rate": 4.633675432969853e-06, "loss": 0.9377, "step": 9815 }, { "epoch": 0.11967874422629277, "grad_norm": 1.8792078495025635, "learning_rate": 4.633354714560616e-06, "loss": 0.9049, "step": 9820 }, { "epoch": 0.11973968045044057, "grad_norm": 1.9765270948410034, "learning_rate": 4.63303399615138e-06, "loss": 0.8687, "step": 9825 }, { "epoch": 0.11980061667458837, "grad_norm": 2.132838249206543, "learning_rate": 4.632713277742143e-06, "loss": 0.9452, "step": 9830 }, { "epoch": 0.11986155289873618, "grad_norm": 2.0580639839172363, "learning_rate": 4.6323925593329065e-06, "loss": 0.9387, "step": 9835 }, { "epoch": 0.11992248912288399, "grad_norm": 1.9351083040237427, "learning_rate": 4.6320718409236695e-06, "loss": 0.9388, "step": 9840 }, { "epoch": 0.1199834253470318, "grad_norm": 2.024641752243042, "learning_rate": 4.6317511225144325e-06, "loss": 0.9458, "step": 9845 }, { "epoch": 0.1200443615711796, "grad_norm": 1.946682333946228, "learning_rate": 4.631430404105196e-06, "loss": 0.8379, "step": 9850 }, { "epoch": 0.12010529779532741, "grad_norm": 1.8197338581085205, "learning_rate": 4.6311096856959594e-06, "loss": 0.9208, "step": 9855 }, { "epoch": 0.12016623401947522, "grad_norm": 2.1077616214752197, "learning_rate": 4.630788967286723e-06, "loss": 0.9259, "step": 9860 }, { "epoch": 0.12022717024362302, "grad_norm": 1.9417253732681274, "learning_rate": 4.630468248877486e-06, "loss": 0.9158, "step": 9865 }, { "epoch": 0.12028810646777083, "grad_norm": 1.8785810470581055, "learning_rate": 4.630147530468249e-06, "loss": 0.9096, "step": 9870 }, { "epoch": 0.12034904269191864, "grad_norm": 2.0801634788513184, "learning_rate": 4.629826812059012e-06, "loss": 0.8698, "step": 9875 }, { "epoch": 0.12040997891606645, "grad_norm": 1.9108902215957642, "learning_rate": 4.629506093649776e-06, "loss": 0.9684, "step": 9880 }, { "epoch": 0.12047091514021425, "grad_norm": 1.9472031593322754, "learning_rate": 4.629185375240539e-06, "loss": 0.9454, "step": 9885 }, { "epoch": 0.12053185136436206, "grad_norm": 2.018423557281494, "learning_rate": 4.628864656831302e-06, "loss": 0.8886, "step": 9890 }, { "epoch": 0.12059278758850986, "grad_norm": 1.7897502183914185, "learning_rate": 4.628543938422066e-06, "loss": 0.8394, "step": 9895 }, { "epoch": 0.12065372381265767, "grad_norm": 1.9519151449203491, "learning_rate": 4.628223220012829e-06, "loss": 0.8988, "step": 9900 }, { "epoch": 0.12071466003680548, "grad_norm": 1.8762571811676025, "learning_rate": 4.627902501603592e-06, "loss": 0.8948, "step": 9905 }, { "epoch": 0.12077559626095329, "grad_norm": 1.8681563138961792, "learning_rate": 4.627581783194356e-06, "loss": 0.887, "step": 9910 }, { "epoch": 0.1208365324851011, "grad_norm": 2.221306800842285, "learning_rate": 4.627261064785119e-06, "loss": 1.026, "step": 9915 }, { "epoch": 0.1208974687092489, "grad_norm": 2.4885623455047607, "learning_rate": 4.626940346375882e-06, "loss": 0.9717, "step": 9920 }, { "epoch": 0.12095840493339671, "grad_norm": 2.0780346393585205, "learning_rate": 4.626619627966646e-06, "loss": 0.9389, "step": 9925 }, { "epoch": 0.12101934115754451, "grad_norm": 2.1820836067199707, "learning_rate": 4.626298909557409e-06, "loss": 0.9646, "step": 9930 }, { "epoch": 0.12108027738169232, "grad_norm": 1.9599545001983643, "learning_rate": 4.625978191148172e-06, "loss": 0.9214, "step": 9935 }, { "epoch": 0.12114121360584013, "grad_norm": 2.1854448318481445, "learning_rate": 4.625657472738936e-06, "loss": 0.8875, "step": 9940 }, { "epoch": 0.12120214982998793, "grad_norm": 2.669888496398926, "learning_rate": 4.625336754329699e-06, "loss": 0.947, "step": 9945 }, { "epoch": 0.12126308605413574, "grad_norm": 2.2690351009368896, "learning_rate": 4.625016035920462e-06, "loss": 0.9198, "step": 9950 }, { "epoch": 0.12132402227828355, "grad_norm": 2.12504506111145, "learning_rate": 4.624695317511225e-06, "loss": 0.9039, "step": 9955 }, { "epoch": 0.12138495850243136, "grad_norm": 2.0417420864105225, "learning_rate": 4.624374599101989e-06, "loss": 0.8924, "step": 9960 }, { "epoch": 0.12144589472657916, "grad_norm": 2.1724772453308105, "learning_rate": 4.624053880692752e-06, "loss": 0.9331, "step": 9965 }, { "epoch": 0.12150683095072697, "grad_norm": 2.155653953552246, "learning_rate": 4.623733162283516e-06, "loss": 0.942, "step": 9970 }, { "epoch": 0.12156776717487477, "grad_norm": 2.2511255741119385, "learning_rate": 4.623412443874279e-06, "loss": 0.9981, "step": 9975 }, { "epoch": 0.12162870339902258, "grad_norm": 1.9806103706359863, "learning_rate": 4.623091725465042e-06, "loss": 0.9346, "step": 9980 }, { "epoch": 0.12168963962317039, "grad_norm": 1.8423787355422974, "learning_rate": 4.6227710070558055e-06, "loss": 0.9011, "step": 9985 }, { "epoch": 0.1217505758473182, "grad_norm": 2.0658812522888184, "learning_rate": 4.6224502886465685e-06, "loss": 0.9349, "step": 9990 }, { "epoch": 0.12181151207146601, "grad_norm": 2.024285316467285, "learning_rate": 4.622129570237332e-06, "loss": 0.927, "step": 9995 }, { "epoch": 0.1218724482956138, "grad_norm": 2.327456474304199, "learning_rate": 4.621808851828095e-06, "loss": 0.9381, "step": 10000 }, { "epoch": 0.12193338451976161, "grad_norm": 2.3254663944244385, "learning_rate": 4.621488133418859e-06, "loss": 0.9233, "step": 10005 }, { "epoch": 0.12199432074390942, "grad_norm": 1.9125157594680786, "learning_rate": 4.621167415009622e-06, "loss": 0.9367, "step": 10010 }, { "epoch": 0.12205525696805723, "grad_norm": 1.9442079067230225, "learning_rate": 4.620846696600385e-06, "loss": 0.9205, "step": 10015 }, { "epoch": 0.12211619319220504, "grad_norm": 1.7279936075210571, "learning_rate": 4.620525978191149e-06, "loss": 0.8788, "step": 10020 }, { "epoch": 0.12217712941635285, "grad_norm": 1.879952073097229, "learning_rate": 4.620205259781912e-06, "loss": 0.9978, "step": 10025 }, { "epoch": 0.12223806564050065, "grad_norm": 1.8383362293243408, "learning_rate": 4.619884541372675e-06, "loss": 0.9533, "step": 10030 }, { "epoch": 0.12229900186464845, "grad_norm": 1.9013539552688599, "learning_rate": 4.619563822963439e-06, "loss": 0.9228, "step": 10035 }, { "epoch": 0.12235993808879626, "grad_norm": 2.000100612640381, "learning_rate": 4.619243104554202e-06, "loss": 0.938, "step": 10040 }, { "epoch": 0.12242087431294407, "grad_norm": 1.8218541145324707, "learning_rate": 4.618922386144965e-06, "loss": 0.9022, "step": 10045 }, { "epoch": 0.12248181053709188, "grad_norm": 2.1225602626800537, "learning_rate": 4.618601667735728e-06, "loss": 0.8602, "step": 10050 }, { "epoch": 0.12254274676123969, "grad_norm": 1.9435354471206665, "learning_rate": 4.618280949326492e-06, "loss": 0.9368, "step": 10055 }, { "epoch": 0.1226036829853875, "grad_norm": 1.6282801628112793, "learning_rate": 4.617960230917255e-06, "loss": 0.9484, "step": 10060 }, { "epoch": 0.1226646192095353, "grad_norm": 2.115814208984375, "learning_rate": 4.617639512508018e-06, "loss": 0.8321, "step": 10065 }, { "epoch": 0.1227255554336831, "grad_norm": 2.180788040161133, "learning_rate": 4.617318794098782e-06, "loss": 0.8371, "step": 10070 }, { "epoch": 0.12278649165783091, "grad_norm": 2.0938007831573486, "learning_rate": 4.616998075689545e-06, "loss": 0.94, "step": 10075 }, { "epoch": 0.12284742788197872, "grad_norm": 1.914613962173462, "learning_rate": 4.616677357280308e-06, "loss": 0.9203, "step": 10080 }, { "epoch": 0.12290836410612653, "grad_norm": 1.9570887088775635, "learning_rate": 4.616356638871072e-06, "loss": 0.9265, "step": 10085 }, { "epoch": 0.12296930033027434, "grad_norm": 2.069660186767578, "learning_rate": 4.616035920461835e-06, "loss": 0.9415, "step": 10090 }, { "epoch": 0.12303023655442215, "grad_norm": 2.170239210128784, "learning_rate": 4.615715202052598e-06, "loss": 0.8687, "step": 10095 }, { "epoch": 0.12309117277856994, "grad_norm": 1.7561410665512085, "learning_rate": 4.615394483643362e-06, "loss": 0.9186, "step": 10100 }, { "epoch": 0.12315210900271775, "grad_norm": 2.264768123626709, "learning_rate": 4.615073765234125e-06, "loss": 0.8894, "step": 10105 }, { "epoch": 0.12321304522686556, "grad_norm": 1.8940670490264893, "learning_rate": 4.614753046824888e-06, "loss": 0.8667, "step": 10110 }, { "epoch": 0.12327398145101337, "grad_norm": 1.9549248218536377, "learning_rate": 4.6144323284156515e-06, "loss": 0.9334, "step": 10115 }, { "epoch": 0.12333491767516118, "grad_norm": 1.9638900756835938, "learning_rate": 4.6141116100064146e-06, "loss": 0.8346, "step": 10120 }, { "epoch": 0.12339585389930899, "grad_norm": 1.9087327718734741, "learning_rate": 4.613790891597178e-06, "loss": 0.9193, "step": 10125 }, { "epoch": 0.12345679012345678, "grad_norm": 2.4666104316711426, "learning_rate": 4.6134701731879414e-06, "loss": 0.9663, "step": 10130 }, { "epoch": 0.12351772634760459, "grad_norm": 1.9233049154281616, "learning_rate": 4.6131494547787045e-06, "loss": 0.9179, "step": 10135 }, { "epoch": 0.1235786625717524, "grad_norm": 2.0434579849243164, "learning_rate": 4.612828736369468e-06, "loss": 0.9672, "step": 10140 }, { "epoch": 0.12363959879590021, "grad_norm": 2.4659008979797363, "learning_rate": 4.612508017960231e-06, "loss": 0.9044, "step": 10145 }, { "epoch": 0.12370053502004802, "grad_norm": 2.1123313903808594, "learning_rate": 4.612187299550994e-06, "loss": 0.9171, "step": 10150 }, { "epoch": 0.12376147124419583, "grad_norm": 1.7845780849456787, "learning_rate": 4.611866581141758e-06, "loss": 0.9165, "step": 10155 }, { "epoch": 0.12382240746834364, "grad_norm": 2.6715967655181885, "learning_rate": 4.611545862732521e-06, "loss": 0.9337, "step": 10160 }, { "epoch": 0.12388334369249143, "grad_norm": 1.995252251625061, "learning_rate": 4.611225144323285e-06, "loss": 0.9382, "step": 10165 }, { "epoch": 0.12394427991663924, "grad_norm": 2.486029863357544, "learning_rate": 4.610904425914048e-06, "loss": 0.9564, "step": 10170 }, { "epoch": 0.12400521614078705, "grad_norm": 2.021084785461426, "learning_rate": 4.610583707504811e-06, "loss": 0.9405, "step": 10175 }, { "epoch": 0.12406615236493486, "grad_norm": 2.2970259189605713, "learning_rate": 4.610262989095575e-06, "loss": 0.8963, "step": 10180 }, { "epoch": 0.12412708858908267, "grad_norm": 2.119945526123047, "learning_rate": 4.609942270686338e-06, "loss": 0.9382, "step": 10185 }, { "epoch": 0.12418802481323048, "grad_norm": 2.0477957725524902, "learning_rate": 4.609621552277101e-06, "loss": 0.8518, "step": 10190 }, { "epoch": 0.12424896103737829, "grad_norm": 1.9909738302230835, "learning_rate": 4.609300833867865e-06, "loss": 0.87, "step": 10195 }, { "epoch": 0.12430989726152608, "grad_norm": 2.0225987434387207, "learning_rate": 4.608980115458628e-06, "loss": 0.9574, "step": 10200 }, { "epoch": 0.12437083348567389, "grad_norm": 1.8968130350112915, "learning_rate": 4.608659397049391e-06, "loss": 0.9244, "step": 10205 }, { "epoch": 0.1244317697098217, "grad_norm": 2.0425028800964355, "learning_rate": 4.608338678640154e-06, "loss": 0.9795, "step": 10210 }, { "epoch": 0.12449270593396951, "grad_norm": 2.2547667026519775, "learning_rate": 4.608017960230918e-06, "loss": 0.9717, "step": 10215 }, { "epoch": 0.12455364215811732, "grad_norm": 2.0253520011901855, "learning_rate": 4.607697241821681e-06, "loss": 0.9103, "step": 10220 }, { "epoch": 0.12461457838226513, "grad_norm": 2.1792826652526855, "learning_rate": 4.607376523412444e-06, "loss": 0.8889, "step": 10225 }, { "epoch": 0.12467551460641293, "grad_norm": 2.355794668197632, "learning_rate": 4.607055805003208e-06, "loss": 0.978, "step": 10230 }, { "epoch": 0.12473645083056073, "grad_norm": 2.0736377239227295, "learning_rate": 4.606735086593971e-06, "loss": 0.8967, "step": 10235 }, { "epoch": 0.12479738705470854, "grad_norm": 1.7680598497390747, "learning_rate": 4.606414368184734e-06, "loss": 0.9074, "step": 10240 }, { "epoch": 0.12485832327885635, "grad_norm": 1.6056829690933228, "learning_rate": 4.606093649775498e-06, "loss": 0.8622, "step": 10245 }, { "epoch": 0.12491925950300416, "grad_norm": 1.9578392505645752, "learning_rate": 4.605772931366261e-06, "loss": 0.8498, "step": 10250 }, { "epoch": 0.12498019572715197, "grad_norm": 2.4208712577819824, "learning_rate": 4.605452212957024e-06, "loss": 0.9695, "step": 10255 }, { "epoch": 0.12504113195129976, "grad_norm": 2.130455493927002, "learning_rate": 4.6051314945477875e-06, "loss": 0.9521, "step": 10260 }, { "epoch": 0.12510206817544758, "grad_norm": 1.8274550437927246, "learning_rate": 4.6048107761385505e-06, "loss": 0.8893, "step": 10265 }, { "epoch": 0.12516300439959538, "grad_norm": 2.2873380184173584, "learning_rate": 4.6044900577293135e-06, "loss": 0.8991, "step": 10270 }, { "epoch": 0.1252239406237432, "grad_norm": 1.9731091260910034, "learning_rate": 4.604169339320077e-06, "loss": 0.919, "step": 10275 }, { "epoch": 0.125284876847891, "grad_norm": 1.9291584491729736, "learning_rate": 4.60384862091084e-06, "loss": 0.9193, "step": 10280 }, { "epoch": 0.1253458130720388, "grad_norm": 2.244885206222534, "learning_rate": 4.603527902501604e-06, "loss": 0.8782, "step": 10285 }, { "epoch": 0.12540674929618661, "grad_norm": 2.2305490970611572, "learning_rate": 4.603207184092367e-06, "loss": 0.873, "step": 10290 }, { "epoch": 0.1254676855203344, "grad_norm": 1.6664221286773682, "learning_rate": 4.60288646568313e-06, "loss": 0.9668, "step": 10295 }, { "epoch": 0.12552862174448223, "grad_norm": 2.3243932723999023, "learning_rate": 4.602565747273894e-06, "loss": 0.9288, "step": 10300 }, { "epoch": 0.12558955796863003, "grad_norm": 1.8305624723434448, "learning_rate": 4.602245028864657e-06, "loss": 0.8562, "step": 10305 }, { "epoch": 0.12565049419277785, "grad_norm": 1.7861642837524414, "learning_rate": 4.601924310455421e-06, "loss": 0.9007, "step": 10310 }, { "epoch": 0.12571143041692565, "grad_norm": 1.8817044496536255, "learning_rate": 4.601603592046184e-06, "loss": 0.9432, "step": 10315 }, { "epoch": 0.12577236664107344, "grad_norm": 2.2049400806427, "learning_rate": 4.601282873636947e-06, "loss": 0.9262, "step": 10320 }, { "epoch": 0.12583330286522126, "grad_norm": 1.8464765548706055, "learning_rate": 4.600962155227711e-06, "loss": 0.9325, "step": 10325 }, { "epoch": 0.12589423908936906, "grad_norm": 2.3079793453216553, "learning_rate": 4.600641436818474e-06, "loss": 0.8816, "step": 10330 }, { "epoch": 0.12595517531351688, "grad_norm": 3.0419445037841797, "learning_rate": 4.600320718409237e-06, "loss": 0.9145, "step": 10335 }, { "epoch": 0.12601611153766468, "grad_norm": 2.0099523067474365, "learning_rate": 4.600000000000001e-06, "loss": 0.8769, "step": 10340 }, { "epoch": 0.1260770477618125, "grad_norm": 1.7218934297561646, "learning_rate": 4.599679281590764e-06, "loss": 0.9645, "step": 10345 }, { "epoch": 0.1261379839859603, "grad_norm": 2.1701953411102295, "learning_rate": 4.599358563181527e-06, "loss": 0.9629, "step": 10350 }, { "epoch": 0.1261989202101081, "grad_norm": 1.9519742727279663, "learning_rate": 4.599037844772291e-06, "loss": 0.9056, "step": 10355 }, { "epoch": 0.1262598564342559, "grad_norm": 1.9204118251800537, "learning_rate": 4.598717126363054e-06, "loss": 0.8852, "step": 10360 }, { "epoch": 0.1263207926584037, "grad_norm": 2.054067850112915, "learning_rate": 4.598396407953817e-06, "loss": 0.9192, "step": 10365 }, { "epoch": 0.12638172888255153, "grad_norm": 2.5311055183410645, "learning_rate": 4.598075689544581e-06, "loss": 0.8778, "step": 10370 }, { "epoch": 0.12644266510669933, "grad_norm": 1.974632978439331, "learning_rate": 4.597754971135344e-06, "loss": 0.9043, "step": 10375 }, { "epoch": 0.12650360133084715, "grad_norm": 2.7401273250579834, "learning_rate": 4.597434252726107e-06, "loss": 0.8854, "step": 10380 }, { "epoch": 0.12656453755499494, "grad_norm": 2.071155309677124, "learning_rate": 4.59711353431687e-06, "loss": 0.8919, "step": 10385 }, { "epoch": 0.12662547377914274, "grad_norm": 2.1274168491363525, "learning_rate": 4.5967928159076336e-06, "loss": 0.9085, "step": 10390 }, { "epoch": 0.12668641000329056, "grad_norm": 2.2395031452178955, "learning_rate": 4.5964720974983966e-06, "loss": 0.972, "step": 10395 }, { "epoch": 0.12674734622743836, "grad_norm": 2.122756004333496, "learning_rate": 4.59615137908916e-06, "loss": 0.9413, "step": 10400 }, { "epoch": 0.12680828245158618, "grad_norm": 1.9402661323547363, "learning_rate": 4.5958306606799235e-06, "loss": 0.9372, "step": 10405 }, { "epoch": 0.12686921867573397, "grad_norm": 1.872487187385559, "learning_rate": 4.5955099422706865e-06, "loss": 0.9671, "step": 10410 }, { "epoch": 0.1269301548998818, "grad_norm": 1.7594419717788696, "learning_rate": 4.5951892238614495e-06, "loss": 0.9134, "step": 10415 }, { "epoch": 0.1269910911240296, "grad_norm": 2.034548044204712, "learning_rate": 4.594868505452213e-06, "loss": 0.834, "step": 10420 }, { "epoch": 0.1270520273481774, "grad_norm": 2.180129289627075, "learning_rate": 4.594547787042976e-06, "loss": 0.913, "step": 10425 }, { "epoch": 0.1271129635723252, "grad_norm": 1.8255213499069214, "learning_rate": 4.594227068633739e-06, "loss": 0.8967, "step": 10430 }, { "epoch": 0.127173899796473, "grad_norm": 1.950398564338684, "learning_rate": 4.593906350224503e-06, "loss": 0.9536, "step": 10435 }, { "epoch": 0.12723483602062083, "grad_norm": 1.8297744989395142, "learning_rate": 4.593585631815266e-06, "loss": 0.9594, "step": 10440 }, { "epoch": 0.12729577224476862, "grad_norm": 2.01684308052063, "learning_rate": 4.59326491340603e-06, "loss": 0.8728, "step": 10445 }, { "epoch": 0.12735670846891642, "grad_norm": 2.64780330657959, "learning_rate": 4.592944194996793e-06, "loss": 0.9298, "step": 10450 }, { "epoch": 0.12741764469306424, "grad_norm": 1.7993321418762207, "learning_rate": 4.592623476587556e-06, "loss": 0.8865, "step": 10455 }, { "epoch": 0.12747858091721204, "grad_norm": 2.011965274810791, "learning_rate": 4.59230275817832e-06, "loss": 0.854, "step": 10460 }, { "epoch": 0.12753951714135986, "grad_norm": 1.8813261985778809, "learning_rate": 4.591982039769083e-06, "loss": 0.9045, "step": 10465 }, { "epoch": 0.12760045336550765, "grad_norm": 2.066725254058838, "learning_rate": 4.591661321359847e-06, "loss": 0.9193, "step": 10470 }, { "epoch": 0.12766138958965548, "grad_norm": 1.9519696235656738, "learning_rate": 4.59134060295061e-06, "loss": 0.8921, "step": 10475 }, { "epoch": 0.12772232581380327, "grad_norm": 1.9511018991470337, "learning_rate": 4.591019884541373e-06, "loss": 0.9606, "step": 10480 }, { "epoch": 0.12778326203795107, "grad_norm": 1.874739646911621, "learning_rate": 4.590699166132137e-06, "loss": 0.9047, "step": 10485 }, { "epoch": 0.1278441982620989, "grad_norm": 1.9000062942504883, "learning_rate": 4.5903784477229e-06, "loss": 0.9127, "step": 10490 }, { "epoch": 0.12790513448624669, "grad_norm": 2.32045841217041, "learning_rate": 4.590057729313663e-06, "loss": 0.9747, "step": 10495 }, { "epoch": 0.1279660707103945, "grad_norm": 2.1778857707977295, "learning_rate": 4.589737010904427e-06, "loss": 0.8396, "step": 10500 }, { "epoch": 0.1280270069345423, "grad_norm": 2.2265841960906982, "learning_rate": 4.58941629249519e-06, "loss": 0.9671, "step": 10505 }, { "epoch": 0.12808794315869013, "grad_norm": 3.1064958572387695, "learning_rate": 4.589095574085953e-06, "loss": 0.9323, "step": 10510 }, { "epoch": 0.12814887938283792, "grad_norm": 2.093313217163086, "learning_rate": 4.588774855676717e-06, "loss": 0.9181, "step": 10515 }, { "epoch": 0.12820981560698572, "grad_norm": 1.9310370683670044, "learning_rate": 4.58845413726748e-06, "loss": 0.9548, "step": 10520 }, { "epoch": 0.12827075183113354, "grad_norm": 1.9491368532180786, "learning_rate": 4.588133418858243e-06, "loss": 0.9067, "step": 10525 }, { "epoch": 0.12833168805528133, "grad_norm": 1.7339751720428467, "learning_rate": 4.5878127004490065e-06, "loss": 0.8663, "step": 10530 }, { "epoch": 0.12839262427942916, "grad_norm": 2.0920629501342773, "learning_rate": 4.5874919820397695e-06, "loss": 0.9237, "step": 10535 }, { "epoch": 0.12845356050357695, "grad_norm": 1.791576623916626, "learning_rate": 4.5871712636305325e-06, "loss": 0.9316, "step": 10540 }, { "epoch": 0.12851449672772478, "grad_norm": 1.9518262147903442, "learning_rate": 4.5868505452212955e-06, "loss": 0.9002, "step": 10545 }, { "epoch": 0.12857543295187257, "grad_norm": 1.946154236793518, "learning_rate": 4.586529826812059e-06, "loss": 0.8698, "step": 10550 }, { "epoch": 0.12863636917602037, "grad_norm": 1.6853981018066406, "learning_rate": 4.5862091084028224e-06, "loss": 0.8744, "step": 10555 }, { "epoch": 0.1286973054001682, "grad_norm": 2.0724427700042725, "learning_rate": 4.5858883899935854e-06, "loss": 1.0537, "step": 10560 }, { "epoch": 0.12875824162431598, "grad_norm": 1.8981726169586182, "learning_rate": 4.585567671584349e-06, "loss": 0.9322, "step": 10565 }, { "epoch": 0.1288191778484638, "grad_norm": 2.0523340702056885, "learning_rate": 4.585246953175112e-06, "loss": 0.9492, "step": 10570 }, { "epoch": 0.1288801140726116, "grad_norm": 2.329714059829712, "learning_rate": 4.584926234765875e-06, "loss": 0.8932, "step": 10575 }, { "epoch": 0.12894105029675942, "grad_norm": 2.1648776531219482, "learning_rate": 4.584605516356639e-06, "loss": 0.9979, "step": 10580 }, { "epoch": 0.12900198652090722, "grad_norm": 1.940568208694458, "learning_rate": 4.584284797947402e-06, "loss": 0.9137, "step": 10585 }, { "epoch": 0.12906292274505501, "grad_norm": 2.239189863204956, "learning_rate": 4.583964079538166e-06, "loss": 0.8396, "step": 10590 }, { "epoch": 0.12912385896920284, "grad_norm": 1.9963628053665161, "learning_rate": 4.583643361128929e-06, "loss": 0.9241, "step": 10595 }, { "epoch": 0.12918479519335063, "grad_norm": 2.16945481300354, "learning_rate": 4.583322642719692e-06, "loss": 0.8884, "step": 10600 }, { "epoch": 0.12924573141749846, "grad_norm": 2.1891677379608154, "learning_rate": 4.583001924310456e-06, "loss": 0.8503, "step": 10605 }, { "epoch": 0.12930666764164625, "grad_norm": 2.14886212348938, "learning_rate": 4.582681205901219e-06, "loss": 0.8499, "step": 10610 }, { "epoch": 0.12936760386579407, "grad_norm": 2.2265639305114746, "learning_rate": 4.582360487491983e-06, "loss": 0.9378, "step": 10615 }, { "epoch": 0.12942854008994187, "grad_norm": 2.121704339981079, "learning_rate": 4.582039769082746e-06, "loss": 0.9568, "step": 10620 }, { "epoch": 0.12948947631408966, "grad_norm": 1.9172128438949585, "learning_rate": 4.581719050673509e-06, "loss": 0.8942, "step": 10625 }, { "epoch": 0.12955041253823749, "grad_norm": 2.3049378395080566, "learning_rate": 4.581398332264273e-06, "loss": 0.9286, "step": 10630 }, { "epoch": 0.12961134876238528, "grad_norm": 2.168236494064331, "learning_rate": 4.581077613855036e-06, "loss": 0.8915, "step": 10635 }, { "epoch": 0.1296722849865331, "grad_norm": 1.8924778699874878, "learning_rate": 4.580756895445799e-06, "loss": 0.8857, "step": 10640 }, { "epoch": 0.1297332212106809, "grad_norm": 2.0683181285858154, "learning_rate": 4.580436177036563e-06, "loss": 0.847, "step": 10645 }, { "epoch": 0.12979415743482872, "grad_norm": 2.3391716480255127, "learning_rate": 4.580115458627326e-06, "loss": 0.915, "step": 10650 }, { "epoch": 0.12985509365897652, "grad_norm": 1.898260235786438, "learning_rate": 4.579794740218089e-06, "loss": 0.9624, "step": 10655 }, { "epoch": 0.1299160298831243, "grad_norm": 2.137397527694702, "learning_rate": 4.5794740218088526e-06, "loss": 0.9396, "step": 10660 }, { "epoch": 0.12997696610727213, "grad_norm": 2.614586353302002, "learning_rate": 4.5791533033996156e-06, "loss": 0.9598, "step": 10665 }, { "epoch": 0.13003790233141993, "grad_norm": 1.919897198677063, "learning_rate": 4.578832584990379e-06, "loss": 0.9002, "step": 10670 }, { "epoch": 0.13009883855556775, "grad_norm": 2.018552303314209, "learning_rate": 4.5785118665811425e-06, "loss": 0.9057, "step": 10675 }, { "epoch": 0.13015977477971555, "grad_norm": 1.7261278629302979, "learning_rate": 4.5781911481719055e-06, "loss": 0.8534, "step": 10680 }, { "epoch": 0.13022071100386334, "grad_norm": 1.7458564043045044, "learning_rate": 4.5778704297626685e-06, "loss": 0.8828, "step": 10685 }, { "epoch": 0.13028164722801117, "grad_norm": 2.0611562728881836, "learning_rate": 4.577549711353432e-06, "loss": 0.9507, "step": 10690 }, { "epoch": 0.13034258345215896, "grad_norm": 2.0426747798919678, "learning_rate": 4.577228992944195e-06, "loss": 0.8476, "step": 10695 }, { "epoch": 0.13040351967630678, "grad_norm": 2.0550153255462646, "learning_rate": 4.576908274534958e-06, "loss": 0.8397, "step": 10700 }, { "epoch": 0.13046445590045458, "grad_norm": 1.89596426486969, "learning_rate": 4.576587556125722e-06, "loss": 0.8699, "step": 10705 }, { "epoch": 0.1305253921246024, "grad_norm": 2.466771125793457, "learning_rate": 4.576266837716485e-06, "loss": 0.9481, "step": 10710 }, { "epoch": 0.1305863283487502, "grad_norm": 2.3046133518218994, "learning_rate": 4.575946119307248e-06, "loss": 0.886, "step": 10715 }, { "epoch": 0.130647264572898, "grad_norm": 2.233095645904541, "learning_rate": 4.575625400898011e-06, "loss": 0.919, "step": 10720 }, { "epoch": 0.13070820079704581, "grad_norm": 1.7735344171524048, "learning_rate": 4.575304682488775e-06, "loss": 0.8193, "step": 10725 }, { "epoch": 0.1307691370211936, "grad_norm": 2.2549490928649902, "learning_rate": 4.574983964079538e-06, "loss": 0.8587, "step": 10730 }, { "epoch": 0.13083007324534143, "grad_norm": 2.1423044204711914, "learning_rate": 4.574663245670302e-06, "loss": 0.9325, "step": 10735 }, { "epoch": 0.13089100946948923, "grad_norm": 2.607790231704712, "learning_rate": 4.574342527261065e-06, "loss": 0.9186, "step": 10740 }, { "epoch": 0.13095194569363705, "grad_norm": 2.053023099899292, "learning_rate": 4.574021808851828e-06, "loss": 0.8833, "step": 10745 }, { "epoch": 0.13101288191778485, "grad_norm": 2.682203531265259, "learning_rate": 4.573701090442592e-06, "loss": 0.9914, "step": 10750 }, { "epoch": 0.13107381814193264, "grad_norm": 2.218069314956665, "learning_rate": 4.573380372033355e-06, "loss": 0.9374, "step": 10755 }, { "epoch": 0.13113475436608046, "grad_norm": 1.9028596878051758, "learning_rate": 4.573059653624119e-06, "loss": 0.8988, "step": 10760 }, { "epoch": 0.13119569059022826, "grad_norm": 3.588834047317505, "learning_rate": 4.572738935214882e-06, "loss": 0.8805, "step": 10765 }, { "epoch": 0.13125662681437608, "grad_norm": 1.9491311311721802, "learning_rate": 4.572418216805645e-06, "loss": 0.929, "step": 10770 }, { "epoch": 0.13131756303852388, "grad_norm": 2.5265614986419678, "learning_rate": 4.572097498396409e-06, "loss": 0.9379, "step": 10775 }, { "epoch": 0.1313784992626717, "grad_norm": 2.0057973861694336, "learning_rate": 4.571776779987172e-06, "loss": 0.8815, "step": 10780 }, { "epoch": 0.1314394354868195, "grad_norm": 2.0490670204162598, "learning_rate": 4.571456061577936e-06, "loss": 0.835, "step": 10785 }, { "epoch": 0.1315003717109673, "grad_norm": 3.213988780975342, "learning_rate": 4.571135343168699e-06, "loss": 0.9701, "step": 10790 }, { "epoch": 0.1315613079351151, "grad_norm": 1.9232497215270996, "learning_rate": 4.570814624759462e-06, "loss": 0.8291, "step": 10795 }, { "epoch": 0.1316222441592629, "grad_norm": 1.8228182792663574, "learning_rate": 4.570493906350225e-06, "loss": 0.864, "step": 10800 }, { "epoch": 0.13168318038341073, "grad_norm": 2.863671064376831, "learning_rate": 4.5701731879409885e-06, "loss": 0.927, "step": 10805 }, { "epoch": 0.13174411660755853, "grad_norm": 1.7324364185333252, "learning_rate": 4.5698524695317515e-06, "loss": 0.951, "step": 10810 }, { "epoch": 0.13180505283170635, "grad_norm": 2.079144239425659, "learning_rate": 4.5695317511225145e-06, "loss": 0.8919, "step": 10815 }, { "epoch": 0.13186598905585414, "grad_norm": 1.8501924276351929, "learning_rate": 4.569211032713278e-06, "loss": 0.897, "step": 10820 }, { "epoch": 0.13192692528000194, "grad_norm": 2.221916913986206, "learning_rate": 4.5688903143040414e-06, "loss": 0.8751, "step": 10825 }, { "epoch": 0.13198786150414976, "grad_norm": 1.7397550344467163, "learning_rate": 4.5685695958948044e-06, "loss": 0.8443, "step": 10830 }, { "epoch": 0.13204879772829756, "grad_norm": 2.0104026794433594, "learning_rate": 4.568248877485568e-06, "loss": 0.8139, "step": 10835 }, { "epoch": 0.13210973395244538, "grad_norm": 2.1831088066101074, "learning_rate": 4.567928159076331e-06, "loss": 0.9688, "step": 10840 }, { "epoch": 0.13217067017659317, "grad_norm": 1.8835762739181519, "learning_rate": 4.567607440667094e-06, "loss": 0.9087, "step": 10845 }, { "epoch": 0.132231606400741, "grad_norm": 1.8861734867095947, "learning_rate": 4.567286722257858e-06, "loss": 0.8809, "step": 10850 }, { "epoch": 0.1322925426248888, "grad_norm": 1.950074315071106, "learning_rate": 4.566966003848621e-06, "loss": 0.8885, "step": 10855 }, { "epoch": 0.1323534788490366, "grad_norm": 2.1889736652374268, "learning_rate": 4.566645285439384e-06, "loss": 1.0126, "step": 10860 }, { "epoch": 0.1324144150731844, "grad_norm": 1.9177234172821045, "learning_rate": 4.566324567030148e-06, "loss": 0.884, "step": 10865 }, { "epoch": 0.1324753512973322, "grad_norm": 2.1493399143218994, "learning_rate": 4.566003848620911e-06, "loss": 0.8512, "step": 10870 }, { "epoch": 0.13253628752148003, "grad_norm": 2.001209020614624, "learning_rate": 4.565683130211674e-06, "loss": 0.878, "step": 10875 }, { "epoch": 0.13259722374562782, "grad_norm": 2.0011613368988037, "learning_rate": 4.565362411802438e-06, "loss": 0.9244, "step": 10880 }, { "epoch": 0.13265815996977565, "grad_norm": 2.3448448181152344, "learning_rate": 4.565041693393201e-06, "loss": 0.9587, "step": 10885 }, { "epoch": 0.13271909619392344, "grad_norm": 2.3411006927490234, "learning_rate": 4.564720974983964e-06, "loss": 0.9254, "step": 10890 }, { "epoch": 0.13278003241807124, "grad_norm": 2.4189751148223877, "learning_rate": 4.564400256574728e-06, "loss": 0.9295, "step": 10895 }, { "epoch": 0.13284096864221906, "grad_norm": 1.6451547145843506, "learning_rate": 4.564079538165491e-06, "loss": 0.8575, "step": 10900 }, { "epoch": 0.13290190486636685, "grad_norm": 1.8899805545806885, "learning_rate": 4.563758819756254e-06, "loss": 0.8366, "step": 10905 }, { "epoch": 0.13296284109051468, "grad_norm": 1.9466712474822998, "learning_rate": 4.563438101347018e-06, "loss": 0.8883, "step": 10910 }, { "epoch": 0.13302377731466247, "grad_norm": 1.8492134809494019, "learning_rate": 4.563117382937781e-06, "loss": 0.9556, "step": 10915 }, { "epoch": 0.13308471353881027, "grad_norm": 1.8761314153671265, "learning_rate": 4.562796664528545e-06, "loss": 0.9798, "step": 10920 }, { "epoch": 0.1331456497629581, "grad_norm": 2.0330421924591064, "learning_rate": 4.562475946119308e-06, "loss": 0.9397, "step": 10925 }, { "epoch": 0.13320658598710589, "grad_norm": 1.7094658613204956, "learning_rate": 4.562155227710071e-06, "loss": 0.8944, "step": 10930 }, { "epoch": 0.1332675222112537, "grad_norm": 2.106879234313965, "learning_rate": 4.5618345093008346e-06, "loss": 0.9399, "step": 10935 }, { "epoch": 0.1333284584354015, "grad_norm": 2.1923537254333496, "learning_rate": 4.561513790891598e-06, "loss": 0.9391, "step": 10940 }, { "epoch": 0.13338939465954933, "grad_norm": 1.8747978210449219, "learning_rate": 4.5611930724823615e-06, "loss": 0.8525, "step": 10945 }, { "epoch": 0.13345033088369712, "grad_norm": 1.8359477519989014, "learning_rate": 4.5608723540731245e-06, "loss": 0.9079, "step": 10950 }, { "epoch": 0.13351126710784492, "grad_norm": 2.009946823120117, "learning_rate": 4.5605516356638875e-06, "loss": 0.927, "step": 10955 }, { "epoch": 0.13357220333199274, "grad_norm": 2.0917813777923584, "learning_rate": 4.560230917254651e-06, "loss": 0.9808, "step": 10960 }, { "epoch": 0.13363313955614053, "grad_norm": 1.9357832670211792, "learning_rate": 4.559910198845414e-06, "loss": 0.9171, "step": 10965 }, { "epoch": 0.13369407578028836, "grad_norm": 1.8508548736572266, "learning_rate": 4.559589480436177e-06, "loss": 0.998, "step": 10970 }, { "epoch": 0.13375501200443615, "grad_norm": 1.8944259881973267, "learning_rate": 4.55926876202694e-06, "loss": 0.9304, "step": 10975 }, { "epoch": 0.13381594822858398, "grad_norm": 1.9730976819992065, "learning_rate": 4.558948043617704e-06, "loss": 0.9438, "step": 10980 }, { "epoch": 0.13387688445273177, "grad_norm": 2.2130374908447266, "learning_rate": 4.558627325208467e-06, "loss": 0.851, "step": 10985 }, { "epoch": 0.13393782067687957, "grad_norm": 2.1941604614257812, "learning_rate": 4.55830660679923e-06, "loss": 0.9072, "step": 10990 }, { "epoch": 0.1339987569010274, "grad_norm": 2.030574321746826, "learning_rate": 4.557985888389994e-06, "loss": 0.9321, "step": 10995 }, { "epoch": 0.13405969312517518, "grad_norm": 2.3072891235351562, "learning_rate": 4.557665169980757e-06, "loss": 0.9008, "step": 11000 }, { "epoch": 0.134120629349323, "grad_norm": 2.279339551925659, "learning_rate": 4.55734445157152e-06, "loss": 0.932, "step": 11005 }, { "epoch": 0.1341815655734708, "grad_norm": 2.108912467956543, "learning_rate": 4.557023733162284e-06, "loss": 0.877, "step": 11010 }, { "epoch": 0.13424250179761862, "grad_norm": 1.7538508176803589, "learning_rate": 4.556703014753047e-06, "loss": 0.8518, "step": 11015 }, { "epoch": 0.13430343802176642, "grad_norm": 1.7974058389663696, "learning_rate": 4.55638229634381e-06, "loss": 0.9054, "step": 11020 }, { "epoch": 0.13436437424591421, "grad_norm": 2.2005958557128906, "learning_rate": 4.556061577934574e-06, "loss": 0.9393, "step": 11025 }, { "epoch": 0.13442531047006204, "grad_norm": 1.9797109365463257, "learning_rate": 4.555740859525337e-06, "loss": 0.9235, "step": 11030 }, { "epoch": 0.13448624669420983, "grad_norm": 1.9141637086868286, "learning_rate": 4.5554201411161e-06, "loss": 0.962, "step": 11035 }, { "epoch": 0.13454718291835766, "grad_norm": 1.8824163675308228, "learning_rate": 4.555099422706864e-06, "loss": 0.8711, "step": 11040 }, { "epoch": 0.13460811914250545, "grad_norm": 2.214263916015625, "learning_rate": 4.554778704297627e-06, "loss": 0.8491, "step": 11045 }, { "epoch": 0.13466905536665327, "grad_norm": 2.0896754264831543, "learning_rate": 4.55445798588839e-06, "loss": 0.9419, "step": 11050 }, { "epoch": 0.13472999159080107, "grad_norm": 2.371718168258667, "learning_rate": 4.554137267479154e-06, "loss": 0.9421, "step": 11055 }, { "epoch": 0.13479092781494886, "grad_norm": 1.8869744539260864, "learning_rate": 4.553816549069917e-06, "loss": 0.9027, "step": 11060 }, { "epoch": 0.13485186403909669, "grad_norm": 2.4134504795074463, "learning_rate": 4.553495830660681e-06, "loss": 0.9074, "step": 11065 }, { "epoch": 0.13491280026324448, "grad_norm": 1.8048206567764282, "learning_rate": 4.553175112251444e-06, "loss": 0.8295, "step": 11070 }, { "epoch": 0.1349737364873923, "grad_norm": 1.7558019161224365, "learning_rate": 4.552854393842207e-06, "loss": 0.8873, "step": 11075 }, { "epoch": 0.1350346727115401, "grad_norm": 1.9609074592590332, "learning_rate": 4.5525336754329705e-06, "loss": 0.8536, "step": 11080 }, { "epoch": 0.13509560893568792, "grad_norm": 2.6046571731567383, "learning_rate": 4.5522129570237335e-06, "loss": 0.9836, "step": 11085 }, { "epoch": 0.13515654515983572, "grad_norm": 2.152052640914917, "learning_rate": 4.551892238614497e-06, "loss": 0.8733, "step": 11090 }, { "epoch": 0.1352174813839835, "grad_norm": 1.9522351026535034, "learning_rate": 4.5515715202052604e-06, "loss": 0.9282, "step": 11095 }, { "epoch": 0.13527841760813133, "grad_norm": 1.9443877935409546, "learning_rate": 4.5512508017960234e-06, "loss": 0.8758, "step": 11100 }, { "epoch": 0.13533935383227913, "grad_norm": 1.7317241430282593, "learning_rate": 4.550930083386787e-06, "loss": 0.9427, "step": 11105 }, { "epoch": 0.13540029005642695, "grad_norm": 2.2352137565612793, "learning_rate": 4.55060936497755e-06, "loss": 0.8669, "step": 11110 }, { "epoch": 0.13546122628057475, "grad_norm": 1.9185529947280884, "learning_rate": 4.550288646568313e-06, "loss": 0.9308, "step": 11115 }, { "epoch": 0.13552216250472257, "grad_norm": 2.046663999557495, "learning_rate": 4.549967928159077e-06, "loss": 0.8493, "step": 11120 }, { "epoch": 0.13558309872887037, "grad_norm": 2.3805549144744873, "learning_rate": 4.54964720974984e-06, "loss": 0.8907, "step": 11125 }, { "epoch": 0.13564403495301816, "grad_norm": 2.0689170360565186, "learning_rate": 4.549326491340603e-06, "loss": 0.9104, "step": 11130 }, { "epoch": 0.13570497117716598, "grad_norm": 2.3918333053588867, "learning_rate": 4.549005772931366e-06, "loss": 0.9883, "step": 11135 }, { "epoch": 0.13576590740131378, "grad_norm": 2.0022525787353516, "learning_rate": 4.54868505452213e-06, "loss": 0.9009, "step": 11140 }, { "epoch": 0.1358268436254616, "grad_norm": 1.812596321105957, "learning_rate": 4.548364336112893e-06, "loss": 0.9252, "step": 11145 }, { "epoch": 0.1358877798496094, "grad_norm": 1.8419326543807983, "learning_rate": 4.548043617703656e-06, "loss": 0.8635, "step": 11150 }, { "epoch": 0.1359487160737572, "grad_norm": 2.093832492828369, "learning_rate": 4.54772289929442e-06, "loss": 0.9783, "step": 11155 }, { "epoch": 0.13600965229790501, "grad_norm": 2.02325439453125, "learning_rate": 4.547402180885183e-06, "loss": 0.9131, "step": 11160 }, { "epoch": 0.1360705885220528, "grad_norm": 2.1583259105682373, "learning_rate": 4.547081462475946e-06, "loss": 0.9027, "step": 11165 }, { "epoch": 0.13613152474620063, "grad_norm": 2.4764506816864014, "learning_rate": 4.54676074406671e-06, "loss": 0.9005, "step": 11170 }, { "epoch": 0.13619246097034843, "grad_norm": 2.0478880405426025, "learning_rate": 4.546440025657473e-06, "loss": 0.9266, "step": 11175 }, { "epoch": 0.13625339719449625, "grad_norm": 1.8227574825286865, "learning_rate": 4.546119307248236e-06, "loss": 0.9918, "step": 11180 }, { "epoch": 0.13631433341864405, "grad_norm": 2.0078182220458984, "learning_rate": 4.545798588839e-06, "loss": 0.9616, "step": 11185 }, { "epoch": 0.13637526964279184, "grad_norm": 2.2795536518096924, "learning_rate": 4.545477870429763e-06, "loss": 0.8903, "step": 11190 }, { "epoch": 0.13643620586693966, "grad_norm": 1.8324999809265137, "learning_rate": 4.545157152020526e-06, "loss": 0.9558, "step": 11195 }, { "epoch": 0.13649714209108746, "grad_norm": 2.1401326656341553, "learning_rate": 4.54483643361129e-06, "loss": 0.9103, "step": 11200 }, { "epoch": 0.13655807831523528, "grad_norm": 1.8426287174224854, "learning_rate": 4.544515715202053e-06, "loss": 0.8346, "step": 11205 }, { "epoch": 0.13661901453938308, "grad_norm": 2.3382205963134766, "learning_rate": 4.544194996792817e-06, "loss": 0.8231, "step": 11210 }, { "epoch": 0.1366799507635309, "grad_norm": 2.004833698272705, "learning_rate": 4.54387427838358e-06, "loss": 0.8771, "step": 11215 }, { "epoch": 0.1367408869876787, "grad_norm": 1.9318724870681763, "learning_rate": 4.543553559974343e-06, "loss": 0.969, "step": 11220 }, { "epoch": 0.1368018232118265, "grad_norm": 1.9204479455947876, "learning_rate": 4.5432328415651065e-06, "loss": 0.8984, "step": 11225 }, { "epoch": 0.1368627594359743, "grad_norm": 2.129556179046631, "learning_rate": 4.5429121231558695e-06, "loss": 0.8967, "step": 11230 }, { "epoch": 0.1369236956601221, "grad_norm": 1.8780196905136108, "learning_rate": 4.542591404746633e-06, "loss": 0.9017, "step": 11235 }, { "epoch": 0.13698463188426993, "grad_norm": 1.9625874757766724, "learning_rate": 4.542270686337396e-06, "loss": 0.8783, "step": 11240 }, { "epoch": 0.13704556810841773, "grad_norm": 1.979195475578308, "learning_rate": 4.541949967928159e-06, "loss": 0.9128, "step": 11245 }, { "epoch": 0.13710650433256555, "grad_norm": 1.8581907749176025, "learning_rate": 4.541629249518923e-06, "loss": 0.9592, "step": 11250 }, { "epoch": 0.13716744055671334, "grad_norm": 1.8694441318511963, "learning_rate": 4.541308531109686e-06, "loss": 0.9559, "step": 11255 }, { "epoch": 0.13722837678086114, "grad_norm": 2.155315399169922, "learning_rate": 4.540987812700449e-06, "loss": 0.9459, "step": 11260 }, { "epoch": 0.13728931300500896, "grad_norm": 1.8528226613998413, "learning_rate": 4.540667094291213e-06, "loss": 0.9144, "step": 11265 }, { "epoch": 0.13735024922915676, "grad_norm": 1.951140284538269, "learning_rate": 4.540346375881976e-06, "loss": 0.9132, "step": 11270 }, { "epoch": 0.13741118545330458, "grad_norm": 1.9869025945663452, "learning_rate": 4.540025657472739e-06, "loss": 0.9422, "step": 11275 }, { "epoch": 0.13747212167745237, "grad_norm": 2.029862642288208, "learning_rate": 4.539704939063503e-06, "loss": 0.9062, "step": 11280 }, { "epoch": 0.1375330579016002, "grad_norm": 1.977674126625061, "learning_rate": 4.539384220654266e-06, "loss": 0.8902, "step": 11285 }, { "epoch": 0.137593994125748, "grad_norm": 2.10031795501709, "learning_rate": 4.539063502245029e-06, "loss": 0.9309, "step": 11290 }, { "epoch": 0.1376549303498958, "grad_norm": 2.110154867172241, "learning_rate": 4.538742783835793e-06, "loss": 0.8625, "step": 11295 }, { "epoch": 0.1377158665740436, "grad_norm": 1.749841570854187, "learning_rate": 4.538422065426556e-06, "loss": 0.9346, "step": 11300 }, { "epoch": 0.1377768027981914, "grad_norm": 1.6685072183609009, "learning_rate": 4.538101347017319e-06, "loss": 0.8602, "step": 11305 }, { "epoch": 0.13783773902233923, "grad_norm": 1.791539192199707, "learning_rate": 4.537780628608082e-06, "loss": 0.9097, "step": 11310 }, { "epoch": 0.13789867524648702, "grad_norm": 2.1093146800994873, "learning_rate": 4.537459910198846e-06, "loss": 0.9782, "step": 11315 }, { "epoch": 0.13795961147063485, "grad_norm": 2.0195364952087402, "learning_rate": 4.537139191789609e-06, "loss": 0.9263, "step": 11320 }, { "epoch": 0.13802054769478264, "grad_norm": 2.705111265182495, "learning_rate": 4.536818473380372e-06, "loss": 0.9455, "step": 11325 }, { "epoch": 0.13808148391893044, "grad_norm": 1.929911494255066, "learning_rate": 4.536497754971136e-06, "loss": 0.8666, "step": 11330 }, { "epoch": 0.13814242014307826, "grad_norm": 1.877357006072998, "learning_rate": 4.536177036561899e-06, "loss": 0.8614, "step": 11335 }, { "epoch": 0.13820335636722605, "grad_norm": 2.009719133377075, "learning_rate": 4.535856318152662e-06, "loss": 0.9177, "step": 11340 }, { "epoch": 0.13826429259137388, "grad_norm": 1.9201987981796265, "learning_rate": 4.535535599743426e-06, "loss": 0.9338, "step": 11345 }, { "epoch": 0.13832522881552167, "grad_norm": 2.0663185119628906, "learning_rate": 4.535214881334189e-06, "loss": 0.9344, "step": 11350 }, { "epoch": 0.1383861650396695, "grad_norm": 2.2572121620178223, "learning_rate": 4.534894162924952e-06, "loss": 0.8892, "step": 11355 }, { "epoch": 0.1384471012638173, "grad_norm": 1.861609697341919, "learning_rate": 4.5345734445157156e-06, "loss": 0.9617, "step": 11360 }, { "epoch": 0.13850803748796509, "grad_norm": 2.115039348602295, "learning_rate": 4.534252726106479e-06, "loss": 0.9406, "step": 11365 }, { "epoch": 0.1385689737121129, "grad_norm": 2.00862979888916, "learning_rate": 4.5339320076972424e-06, "loss": 0.8923, "step": 11370 }, { "epoch": 0.1386299099362607, "grad_norm": 1.9058096408843994, "learning_rate": 4.5336112892880055e-06, "loss": 0.8801, "step": 11375 }, { "epoch": 0.13869084616040853, "grad_norm": 2.031684637069702, "learning_rate": 4.5332905708787685e-06, "loss": 0.9253, "step": 11380 }, { "epoch": 0.13875178238455632, "grad_norm": 1.8587175607681274, "learning_rate": 4.532969852469532e-06, "loss": 0.8389, "step": 11385 }, { "epoch": 0.13881271860870412, "grad_norm": 1.9127758741378784, "learning_rate": 4.532649134060295e-06, "loss": 0.9721, "step": 11390 }, { "epoch": 0.13887365483285194, "grad_norm": 1.9207807779312134, "learning_rate": 4.532328415651059e-06, "loss": 0.9698, "step": 11395 }, { "epoch": 0.13893459105699973, "grad_norm": 1.949100375175476, "learning_rate": 4.532007697241822e-06, "loss": 0.8525, "step": 11400 }, { "epoch": 0.13899552728114756, "grad_norm": 1.80766761302948, "learning_rate": 4.531686978832585e-06, "loss": 0.9032, "step": 11405 }, { "epoch": 0.13905646350529535, "grad_norm": 2.2009952068328857, "learning_rate": 4.531366260423349e-06, "loss": 0.9498, "step": 11410 }, { "epoch": 0.13911739972944318, "grad_norm": 2.0552353858947754, "learning_rate": 4.531045542014112e-06, "loss": 0.9131, "step": 11415 }, { "epoch": 0.13917833595359097, "grad_norm": 2.4691522121429443, "learning_rate": 4.530724823604875e-06, "loss": 0.833, "step": 11420 }, { "epoch": 0.13923927217773877, "grad_norm": 2.2521004676818848, "learning_rate": 4.530404105195639e-06, "loss": 0.8794, "step": 11425 }, { "epoch": 0.1393002084018866, "grad_norm": 2.0862770080566406, "learning_rate": 4.530083386786402e-06, "loss": 0.7998, "step": 11430 }, { "epoch": 0.13936114462603438, "grad_norm": 2.273591995239258, "learning_rate": 4.529762668377165e-06, "loss": 0.8897, "step": 11435 }, { "epoch": 0.1394220808501822, "grad_norm": 2.354876756668091, "learning_rate": 4.529441949967929e-06, "loss": 0.9204, "step": 11440 }, { "epoch": 0.13948301707433, "grad_norm": 1.9482496976852417, "learning_rate": 4.529121231558692e-06, "loss": 0.8908, "step": 11445 }, { "epoch": 0.13954395329847782, "grad_norm": 2.30145263671875, "learning_rate": 4.528800513149455e-06, "loss": 0.8264, "step": 11450 }, { "epoch": 0.13960488952262562, "grad_norm": 1.770737648010254, "learning_rate": 4.528479794740219e-06, "loss": 0.9516, "step": 11455 }, { "epoch": 0.13966582574677341, "grad_norm": 2.365908622741699, "learning_rate": 4.528159076330982e-06, "loss": 0.9007, "step": 11460 }, { "epoch": 0.13972676197092124, "grad_norm": 1.92036771774292, "learning_rate": 4.527838357921745e-06, "loss": 0.8555, "step": 11465 }, { "epoch": 0.13978769819506903, "grad_norm": 1.8764324188232422, "learning_rate": 4.527517639512508e-06, "loss": 0.8811, "step": 11470 }, { "epoch": 0.13984863441921686, "grad_norm": 2.150156259536743, "learning_rate": 4.527196921103272e-06, "loss": 0.9401, "step": 11475 }, { "epoch": 0.13990957064336465, "grad_norm": 1.865944266319275, "learning_rate": 4.526876202694035e-06, "loss": 0.8485, "step": 11480 }, { "epoch": 0.13997050686751247, "grad_norm": 2.1021687984466553, "learning_rate": 4.526555484284798e-06, "loss": 0.9415, "step": 11485 }, { "epoch": 0.14003144309166027, "grad_norm": 2.2144083976745605, "learning_rate": 4.526234765875562e-06, "loss": 0.9063, "step": 11490 }, { "epoch": 0.14009237931580806, "grad_norm": 1.7802038192749023, "learning_rate": 4.525914047466325e-06, "loss": 0.9257, "step": 11495 }, { "epoch": 0.14015331553995589, "grad_norm": 2.1035804748535156, "learning_rate": 4.525593329057088e-06, "loss": 0.8792, "step": 11500 }, { "epoch": 0.14021425176410368, "grad_norm": 2.4242846965789795, "learning_rate": 4.5252726106478515e-06, "loss": 0.9097, "step": 11505 }, { "epoch": 0.1402751879882515, "grad_norm": 1.8807523250579834, "learning_rate": 4.5249518922386145e-06, "loss": 0.9361, "step": 11510 }, { "epoch": 0.1403361242123993, "grad_norm": 2.5075926780700684, "learning_rate": 4.524631173829378e-06, "loss": 0.9256, "step": 11515 }, { "epoch": 0.14039706043654712, "grad_norm": 1.8687163591384888, "learning_rate": 4.524310455420141e-06, "loss": 0.9469, "step": 11520 }, { "epoch": 0.14045799666069492, "grad_norm": 1.7945760488510132, "learning_rate": 4.5239897370109044e-06, "loss": 0.8591, "step": 11525 }, { "epoch": 0.1405189328848427, "grad_norm": 2.0601119995117188, "learning_rate": 4.523669018601668e-06, "loss": 0.9199, "step": 11530 }, { "epoch": 0.14057986910899054, "grad_norm": 2.074321985244751, "learning_rate": 4.523348300192431e-06, "loss": 0.8561, "step": 11535 }, { "epoch": 0.14064080533313833, "grad_norm": 2.017869472503662, "learning_rate": 4.523027581783195e-06, "loss": 0.8559, "step": 11540 }, { "epoch": 0.14070174155728615, "grad_norm": 1.7630877494812012, "learning_rate": 4.522706863373958e-06, "loss": 0.9207, "step": 11545 }, { "epoch": 0.14076267778143395, "grad_norm": 1.7689592838287354, "learning_rate": 4.522386144964721e-06, "loss": 0.916, "step": 11550 }, { "epoch": 0.14082361400558177, "grad_norm": 1.8737359046936035, "learning_rate": 4.522065426555485e-06, "loss": 0.8788, "step": 11555 }, { "epoch": 0.14088455022972957, "grad_norm": 2.372319459915161, "learning_rate": 4.521744708146248e-06, "loss": 1.0091, "step": 11560 }, { "epoch": 0.14094548645387736, "grad_norm": 2.1424436569213867, "learning_rate": 4.521423989737011e-06, "loss": 0.8631, "step": 11565 }, { "epoch": 0.14100642267802518, "grad_norm": 2.067686080932617, "learning_rate": 4.521103271327775e-06, "loss": 0.9217, "step": 11570 }, { "epoch": 0.14106735890217298, "grad_norm": 2.3531129360198975, "learning_rate": 4.520782552918538e-06, "loss": 0.9021, "step": 11575 }, { "epoch": 0.1411282951263208, "grad_norm": 2.5119738578796387, "learning_rate": 4.520461834509301e-06, "loss": 0.9524, "step": 11580 }, { "epoch": 0.1411892313504686, "grad_norm": 2.0973477363586426, "learning_rate": 4.520141116100065e-06, "loss": 0.8956, "step": 11585 }, { "epoch": 0.14125016757461642, "grad_norm": 1.9433923959732056, "learning_rate": 4.519820397690828e-06, "loss": 0.921, "step": 11590 }, { "epoch": 0.14131110379876421, "grad_norm": 1.9475380182266235, "learning_rate": 4.519499679281591e-06, "loss": 0.8442, "step": 11595 }, { "epoch": 0.141372040022912, "grad_norm": 1.9487719535827637, "learning_rate": 4.519178960872355e-06, "loss": 0.8368, "step": 11600 }, { "epoch": 0.14143297624705983, "grad_norm": 1.5444759130477905, "learning_rate": 4.518858242463118e-06, "loss": 0.9099, "step": 11605 }, { "epoch": 0.14149391247120763, "grad_norm": 1.9092003107070923, "learning_rate": 4.518537524053881e-06, "loss": 0.9255, "step": 11610 }, { "epoch": 0.14155484869535545, "grad_norm": 1.999608039855957, "learning_rate": 4.518216805644645e-06, "loss": 0.9028, "step": 11615 }, { "epoch": 0.14161578491950325, "grad_norm": 2.083103895187378, "learning_rate": 4.517896087235408e-06, "loss": 0.9408, "step": 11620 }, { "epoch": 0.14167672114365104, "grad_norm": 2.1067869663238525, "learning_rate": 4.517575368826171e-06, "loss": 0.8905, "step": 11625 }, { "epoch": 0.14173765736779886, "grad_norm": 1.9136285781860352, "learning_rate": 4.5172546504169346e-06, "loss": 0.9056, "step": 11630 }, { "epoch": 0.14179859359194666, "grad_norm": 2.6113476753234863, "learning_rate": 4.516933932007698e-06, "loss": 0.8601, "step": 11635 }, { "epoch": 0.14185952981609448, "grad_norm": 1.9516854286193848, "learning_rate": 4.516613213598461e-06, "loss": 0.9088, "step": 11640 }, { "epoch": 0.14192046604024228, "grad_norm": 1.6722429990768433, "learning_rate": 4.516292495189224e-06, "loss": 0.875, "step": 11645 }, { "epoch": 0.1419814022643901, "grad_norm": 2.1334619522094727, "learning_rate": 4.5159717767799875e-06, "loss": 0.9179, "step": 11650 }, { "epoch": 0.1420423384885379, "grad_norm": 1.699507474899292, "learning_rate": 4.5156510583707505e-06, "loss": 0.8617, "step": 11655 }, { "epoch": 0.1421032747126857, "grad_norm": 2.280416250228882, "learning_rate": 4.5153303399615135e-06, "loss": 0.9104, "step": 11660 }, { "epoch": 0.1421642109368335, "grad_norm": 2.02980899810791, "learning_rate": 4.515009621552277e-06, "loss": 0.9636, "step": 11665 }, { "epoch": 0.1422251471609813, "grad_norm": 1.8635623455047607, "learning_rate": 4.51468890314304e-06, "loss": 0.939, "step": 11670 }, { "epoch": 0.14228608338512913, "grad_norm": 2.1187198162078857, "learning_rate": 4.514368184733804e-06, "loss": 0.9096, "step": 11675 }, { "epoch": 0.14234701960927693, "grad_norm": 2.0773913860321045, "learning_rate": 4.514047466324567e-06, "loss": 0.8786, "step": 11680 }, { "epoch": 0.14240795583342475, "grad_norm": 1.4463703632354736, "learning_rate": 4.513726747915331e-06, "loss": 0.8075, "step": 11685 }, { "epoch": 0.14246889205757254, "grad_norm": 2.457878589630127, "learning_rate": 4.513406029506094e-06, "loss": 0.8704, "step": 11690 }, { "epoch": 0.14252982828172034, "grad_norm": 1.8132755756378174, "learning_rate": 4.513085311096857e-06, "loss": 0.8778, "step": 11695 }, { "epoch": 0.14259076450586816, "grad_norm": 2.14494252204895, "learning_rate": 4.512764592687621e-06, "loss": 0.8567, "step": 11700 }, { "epoch": 0.14265170073001596, "grad_norm": 1.9044408798217773, "learning_rate": 4.512443874278384e-06, "loss": 0.9038, "step": 11705 }, { "epoch": 0.14271263695416378, "grad_norm": 1.6698691844940186, "learning_rate": 4.512123155869148e-06, "loss": 0.8829, "step": 11710 }, { "epoch": 0.14277357317831157, "grad_norm": 2.295078754425049, "learning_rate": 4.511802437459911e-06, "loss": 0.9297, "step": 11715 }, { "epoch": 0.1428345094024594, "grad_norm": 1.9621307849884033, "learning_rate": 4.511481719050674e-06, "loss": 0.9554, "step": 11720 }, { "epoch": 0.1428954456266072, "grad_norm": 1.8429111242294312, "learning_rate": 4.511161000641437e-06, "loss": 0.9069, "step": 11725 }, { "epoch": 0.142956381850755, "grad_norm": 1.9086997509002686, "learning_rate": 4.510840282232201e-06, "loss": 0.8018, "step": 11730 }, { "epoch": 0.1430173180749028, "grad_norm": 1.7666716575622559, "learning_rate": 4.510519563822964e-06, "loss": 0.9055, "step": 11735 }, { "epoch": 0.1430782542990506, "grad_norm": 2.121164083480835, "learning_rate": 4.510198845413727e-06, "loss": 0.8449, "step": 11740 }, { "epoch": 0.14313919052319843, "grad_norm": 1.8259425163269043, "learning_rate": 4.509878127004491e-06, "loss": 0.9331, "step": 11745 }, { "epoch": 0.14320012674734622, "grad_norm": 1.7045856714248657, "learning_rate": 4.509557408595254e-06, "loss": 0.871, "step": 11750 }, { "epoch": 0.14326106297149405, "grad_norm": 2.209071636199951, "learning_rate": 4.509236690186017e-06, "loss": 0.9645, "step": 11755 }, { "epoch": 0.14332199919564184, "grad_norm": 2.3358800411224365, "learning_rate": 4.508915971776781e-06, "loss": 0.8924, "step": 11760 }, { "epoch": 0.14338293541978964, "grad_norm": 2.0228986740112305, "learning_rate": 4.508595253367544e-06, "loss": 0.8731, "step": 11765 }, { "epoch": 0.14344387164393746, "grad_norm": 2.247129201889038, "learning_rate": 4.508274534958307e-06, "loss": 0.9486, "step": 11770 }, { "epoch": 0.14350480786808525, "grad_norm": 1.910204291343689, "learning_rate": 4.5079538165490705e-06, "loss": 0.9365, "step": 11775 }, { "epoch": 0.14356574409223308, "grad_norm": 2.0476763248443604, "learning_rate": 4.5076330981398335e-06, "loss": 0.9436, "step": 11780 }, { "epoch": 0.14362668031638087, "grad_norm": 2.0961902141571045, "learning_rate": 4.5073123797305966e-06, "loss": 0.9147, "step": 11785 }, { "epoch": 0.1436876165405287, "grad_norm": 1.895566701889038, "learning_rate": 4.50699166132136e-06, "loss": 0.9252, "step": 11790 }, { "epoch": 0.1437485527646765, "grad_norm": 1.9628269672393799, "learning_rate": 4.5066709429121234e-06, "loss": 0.9124, "step": 11795 }, { "epoch": 0.14380948898882429, "grad_norm": 2.1055099964141846, "learning_rate": 4.5063502245028865e-06, "loss": 0.9243, "step": 11800 }, { "epoch": 0.1438704252129721, "grad_norm": 2.543718099594116, "learning_rate": 4.50602950609365e-06, "loss": 0.864, "step": 11805 }, { "epoch": 0.1439313614371199, "grad_norm": 2.1656241416931152, "learning_rate": 4.505708787684413e-06, "loss": 0.9353, "step": 11810 }, { "epoch": 0.14399229766126773, "grad_norm": 1.8278141021728516, "learning_rate": 4.505388069275176e-06, "loss": 0.9015, "step": 11815 }, { "epoch": 0.14405323388541552, "grad_norm": 1.9372795820236206, "learning_rate": 4.50506735086594e-06, "loss": 0.8353, "step": 11820 }, { "epoch": 0.14411417010956334, "grad_norm": 2.2339959144592285, "learning_rate": 4.504746632456703e-06, "loss": 0.9115, "step": 11825 }, { "epoch": 0.14417510633371114, "grad_norm": 1.8086235523223877, "learning_rate": 4.504425914047466e-06, "loss": 0.8882, "step": 11830 }, { "epoch": 0.14423604255785893, "grad_norm": 1.955595850944519, "learning_rate": 4.50410519563823e-06, "loss": 0.8917, "step": 11835 }, { "epoch": 0.14429697878200676, "grad_norm": 1.8192328214645386, "learning_rate": 4.503784477228993e-06, "loss": 0.9049, "step": 11840 }, { "epoch": 0.14435791500615455, "grad_norm": 2.7032394409179688, "learning_rate": 4.503463758819757e-06, "loss": 0.9455, "step": 11845 }, { "epoch": 0.14441885123030238, "grad_norm": 1.9731764793395996, "learning_rate": 4.50314304041052e-06, "loss": 0.905, "step": 11850 }, { "epoch": 0.14447978745445017, "grad_norm": 2.0590600967407227, "learning_rate": 4.502822322001283e-06, "loss": 0.8664, "step": 11855 }, { "epoch": 0.144540723678598, "grad_norm": 2.40425705909729, "learning_rate": 4.502501603592047e-06, "loss": 0.9576, "step": 11860 }, { "epoch": 0.1446016599027458, "grad_norm": 1.9348522424697876, "learning_rate": 4.50218088518281e-06, "loss": 0.8321, "step": 11865 }, { "epoch": 0.14466259612689358, "grad_norm": 1.7665339708328247, "learning_rate": 4.501860166773574e-06, "loss": 0.8756, "step": 11870 }, { "epoch": 0.1447235323510414, "grad_norm": 1.892961025238037, "learning_rate": 4.501539448364337e-06, "loss": 0.9372, "step": 11875 }, { "epoch": 0.1447844685751892, "grad_norm": 1.9381022453308105, "learning_rate": 4.5012187299551e-06, "loss": 0.907, "step": 11880 }, { "epoch": 0.14484540479933702, "grad_norm": 1.9132485389709473, "learning_rate": 4.500898011545864e-06, "loss": 0.9309, "step": 11885 }, { "epoch": 0.14490634102348482, "grad_norm": 2.1150832176208496, "learning_rate": 4.500577293136627e-06, "loss": 0.9515, "step": 11890 }, { "epoch": 0.14496727724763261, "grad_norm": 1.8032828569412231, "learning_rate": 4.50025657472739e-06, "loss": 0.9349, "step": 11895 }, { "epoch": 0.14502821347178044, "grad_norm": 2.2796192169189453, "learning_rate": 4.499935856318153e-06, "loss": 0.825, "step": 11900 }, { "epoch": 0.14508914969592823, "grad_norm": 1.9539145231246948, "learning_rate": 4.499615137908917e-06, "loss": 0.9313, "step": 11905 }, { "epoch": 0.14515008592007606, "grad_norm": 2.2405319213867188, "learning_rate": 4.49929441949968e-06, "loss": 0.8781, "step": 11910 }, { "epoch": 0.14521102214422385, "grad_norm": 1.993906021118164, "learning_rate": 4.498973701090443e-06, "loss": 0.9041, "step": 11915 }, { "epoch": 0.14527195836837167, "grad_norm": 1.904508352279663, "learning_rate": 4.4986529826812065e-06, "loss": 0.906, "step": 11920 }, { "epoch": 0.14533289459251947, "grad_norm": 2.3389217853546143, "learning_rate": 4.4983322642719695e-06, "loss": 0.8402, "step": 11925 }, { "epoch": 0.14539383081666726, "grad_norm": 2.095165491104126, "learning_rate": 4.4980115458627325e-06, "loss": 0.9523, "step": 11930 }, { "epoch": 0.14545476704081509, "grad_norm": 2.074445962905884, "learning_rate": 4.497690827453496e-06, "loss": 0.8731, "step": 11935 }, { "epoch": 0.14551570326496288, "grad_norm": 1.9184889793395996, "learning_rate": 4.497370109044259e-06, "loss": 0.8975, "step": 11940 }, { "epoch": 0.1455766394891107, "grad_norm": 2.034179925918579, "learning_rate": 4.497049390635022e-06, "loss": 0.8958, "step": 11945 }, { "epoch": 0.1456375757132585, "grad_norm": 1.9127519130706787, "learning_rate": 4.496728672225786e-06, "loss": 0.8822, "step": 11950 }, { "epoch": 0.14569851193740632, "grad_norm": 2.08644437789917, "learning_rate": 4.496407953816549e-06, "loss": 0.8641, "step": 11955 }, { "epoch": 0.14575944816155412, "grad_norm": 2.0427863597869873, "learning_rate": 4.496087235407312e-06, "loss": 0.8693, "step": 11960 }, { "epoch": 0.1458203843857019, "grad_norm": 1.859229564666748, "learning_rate": 4.495766516998076e-06, "loss": 0.9111, "step": 11965 }, { "epoch": 0.14588132060984974, "grad_norm": 2.3213930130004883, "learning_rate": 4.495445798588839e-06, "loss": 0.8834, "step": 11970 }, { "epoch": 0.14594225683399753, "grad_norm": 1.950987696647644, "learning_rate": 4.495125080179602e-06, "loss": 0.9088, "step": 11975 }, { "epoch": 0.14600319305814535, "grad_norm": 1.9253138303756714, "learning_rate": 4.494804361770366e-06, "loss": 0.9313, "step": 11980 }, { "epoch": 0.14606412928229315, "grad_norm": 2.132711410522461, "learning_rate": 4.494483643361129e-06, "loss": 0.876, "step": 11985 }, { "epoch": 0.14612506550644097, "grad_norm": 2.822172164916992, "learning_rate": 4.494162924951893e-06, "loss": 0.8728, "step": 11990 }, { "epoch": 0.14618600173058877, "grad_norm": 2.241292953491211, "learning_rate": 4.493842206542656e-06, "loss": 0.8606, "step": 11995 }, { "epoch": 0.14624693795473656, "grad_norm": 2.0676498413085938, "learning_rate": 4.493521488133419e-06, "loss": 0.949, "step": 12000 }, { "epoch": 0.14630787417888438, "grad_norm": 2.093207597732544, "learning_rate": 4.493200769724183e-06, "loss": 0.9339, "step": 12005 }, { "epoch": 0.14636881040303218, "grad_norm": 1.9670062065124512, "learning_rate": 4.492880051314946e-06, "loss": 0.9782, "step": 12010 }, { "epoch": 0.14642974662718, "grad_norm": 1.8809889554977417, "learning_rate": 4.49255933290571e-06, "loss": 0.9005, "step": 12015 }, { "epoch": 0.1464906828513278, "grad_norm": 1.926801323890686, "learning_rate": 4.492238614496473e-06, "loss": 0.9234, "step": 12020 }, { "epoch": 0.14655161907547562, "grad_norm": 1.7331843376159668, "learning_rate": 4.491917896087236e-06, "loss": 0.8853, "step": 12025 }, { "epoch": 0.14661255529962341, "grad_norm": 1.824433445930481, "learning_rate": 4.491597177678e-06, "loss": 0.8905, "step": 12030 }, { "epoch": 0.1466734915237712, "grad_norm": 2.177546739578247, "learning_rate": 4.491276459268763e-06, "loss": 0.9436, "step": 12035 }, { "epoch": 0.14673442774791903, "grad_norm": 2.4328887462615967, "learning_rate": 4.490955740859526e-06, "loss": 0.8795, "step": 12040 }, { "epoch": 0.14679536397206683, "grad_norm": 2.624610185623169, "learning_rate": 4.4906350224502895e-06, "loss": 0.8936, "step": 12045 }, { "epoch": 0.14685630019621465, "grad_norm": 1.9685935974121094, "learning_rate": 4.4903143040410525e-06, "loss": 0.8609, "step": 12050 }, { "epoch": 0.14691723642036245, "grad_norm": 2.2523412704467773, "learning_rate": 4.4899935856318156e-06, "loss": 0.856, "step": 12055 }, { "epoch": 0.14697817264451027, "grad_norm": 1.9988845586776733, "learning_rate": 4.4896728672225786e-06, "loss": 0.9113, "step": 12060 }, { "epoch": 0.14703910886865806, "grad_norm": 1.7796013355255127, "learning_rate": 4.4893521488133424e-06, "loss": 0.9937, "step": 12065 }, { "epoch": 0.14710004509280586, "grad_norm": 2.2033631801605225, "learning_rate": 4.4890314304041055e-06, "loss": 0.8641, "step": 12070 }, { "epoch": 0.14716098131695368, "grad_norm": 1.8969290256500244, "learning_rate": 4.4887107119948685e-06, "loss": 0.9001, "step": 12075 }, { "epoch": 0.14722191754110148, "grad_norm": 1.9517772197723389, "learning_rate": 4.488389993585632e-06, "loss": 0.9637, "step": 12080 }, { "epoch": 0.1472828537652493, "grad_norm": 1.9077048301696777, "learning_rate": 4.488069275176395e-06, "loss": 0.8911, "step": 12085 }, { "epoch": 0.1473437899893971, "grad_norm": 1.7636137008666992, "learning_rate": 4.487748556767158e-06, "loss": 0.861, "step": 12090 }, { "epoch": 0.14740472621354492, "grad_norm": 2.11824369430542, "learning_rate": 4.487427838357922e-06, "loss": 0.9242, "step": 12095 }, { "epoch": 0.1474656624376927, "grad_norm": 2.4708869457244873, "learning_rate": 4.487107119948685e-06, "loss": 0.9179, "step": 12100 }, { "epoch": 0.1475265986618405, "grad_norm": 1.7988901138305664, "learning_rate": 4.486786401539448e-06, "loss": 0.8931, "step": 12105 }, { "epoch": 0.14758753488598833, "grad_norm": 1.8658514022827148, "learning_rate": 4.486465683130212e-06, "loss": 0.872, "step": 12110 }, { "epoch": 0.14764847111013613, "grad_norm": 2.1975889205932617, "learning_rate": 4.486144964720975e-06, "loss": 0.8705, "step": 12115 }, { "epoch": 0.14770940733428395, "grad_norm": 1.7581180334091187, "learning_rate": 4.485824246311738e-06, "loss": 0.9333, "step": 12120 }, { "epoch": 0.14777034355843174, "grad_norm": 2.164109945297241, "learning_rate": 4.485503527902502e-06, "loss": 0.8557, "step": 12125 }, { "epoch": 0.14783127978257954, "grad_norm": 2.1122097969055176, "learning_rate": 4.485182809493265e-06, "loss": 0.8636, "step": 12130 }, { "epoch": 0.14789221600672736, "grad_norm": 1.998821496963501, "learning_rate": 4.484862091084028e-06, "loss": 0.857, "step": 12135 }, { "epoch": 0.14795315223087516, "grad_norm": 1.9516605138778687, "learning_rate": 4.484541372674792e-06, "loss": 0.8193, "step": 12140 }, { "epoch": 0.14801408845502298, "grad_norm": 1.8217238187789917, "learning_rate": 4.484220654265555e-06, "loss": 0.8745, "step": 12145 }, { "epoch": 0.14807502467917077, "grad_norm": 2.5395572185516357, "learning_rate": 4.483899935856319e-06, "loss": 0.942, "step": 12150 }, { "epoch": 0.1481359609033186, "grad_norm": 1.8519396781921387, "learning_rate": 4.483579217447082e-06, "loss": 0.8854, "step": 12155 }, { "epoch": 0.1481968971274664, "grad_norm": 2.1148436069488525, "learning_rate": 4.483258499037846e-06, "loss": 0.9428, "step": 12160 }, { "epoch": 0.1482578333516142, "grad_norm": 1.835180401802063, "learning_rate": 4.482937780628609e-06, "loss": 0.9293, "step": 12165 }, { "epoch": 0.148318769575762, "grad_norm": 1.9125113487243652, "learning_rate": 4.482617062219372e-06, "loss": 0.923, "step": 12170 }, { "epoch": 0.1483797057999098, "grad_norm": 2.2678511142730713, "learning_rate": 4.482296343810136e-06, "loss": 0.948, "step": 12175 }, { "epoch": 0.14844064202405763, "grad_norm": 1.8840705156326294, "learning_rate": 4.481975625400899e-06, "loss": 0.9261, "step": 12180 }, { "epoch": 0.14850157824820542, "grad_norm": 2.1683847904205322, "learning_rate": 4.481654906991662e-06, "loss": 0.951, "step": 12185 }, { "epoch": 0.14856251447235325, "grad_norm": 2.124168634414673, "learning_rate": 4.4813341885824255e-06, "loss": 0.913, "step": 12190 }, { "epoch": 0.14862345069650104, "grad_norm": 1.755099892616272, "learning_rate": 4.4810134701731885e-06, "loss": 0.9195, "step": 12195 }, { "epoch": 0.14868438692064884, "grad_norm": 2.21586537361145, "learning_rate": 4.4806927517639515e-06, "loss": 0.8636, "step": 12200 }, { "epoch": 0.14874532314479666, "grad_norm": 1.903825283050537, "learning_rate": 4.480372033354715e-06, "loss": 0.8545, "step": 12205 }, { "epoch": 0.14880625936894445, "grad_norm": 2.4784183502197266, "learning_rate": 4.480051314945478e-06, "loss": 0.9452, "step": 12210 }, { "epoch": 0.14886719559309228, "grad_norm": 2.049992799758911, "learning_rate": 4.479730596536241e-06, "loss": 0.9414, "step": 12215 }, { "epoch": 0.14892813181724007, "grad_norm": 2.5502047538757324, "learning_rate": 4.479409878127005e-06, "loss": 0.9243, "step": 12220 }, { "epoch": 0.1489890680413879, "grad_norm": 1.8140841722488403, "learning_rate": 4.479089159717768e-06, "loss": 0.8757, "step": 12225 }, { "epoch": 0.1490500042655357, "grad_norm": 1.9423539638519287, "learning_rate": 4.478768441308531e-06, "loss": 0.9442, "step": 12230 }, { "epoch": 0.14911094048968349, "grad_norm": 2.0679521560668945, "learning_rate": 4.478447722899294e-06, "loss": 0.922, "step": 12235 }, { "epoch": 0.1491718767138313, "grad_norm": 1.9480857849121094, "learning_rate": 4.478127004490058e-06, "loss": 0.8951, "step": 12240 }, { "epoch": 0.1492328129379791, "grad_norm": 1.8856793642044067, "learning_rate": 4.477806286080821e-06, "loss": 0.8952, "step": 12245 }, { "epoch": 0.14929374916212693, "grad_norm": 2.0131099224090576, "learning_rate": 4.477485567671584e-06, "loss": 0.8336, "step": 12250 }, { "epoch": 0.14935468538627472, "grad_norm": 2.1303188800811768, "learning_rate": 4.477164849262348e-06, "loss": 0.9305, "step": 12255 }, { "epoch": 0.14941562161042254, "grad_norm": 2.3075547218322754, "learning_rate": 4.476844130853111e-06, "loss": 0.8823, "step": 12260 }, { "epoch": 0.14947655783457034, "grad_norm": 1.6807568073272705, "learning_rate": 4.476523412443874e-06, "loss": 0.9218, "step": 12265 }, { "epoch": 0.14953749405871813, "grad_norm": 1.8839329481124878, "learning_rate": 4.476202694034638e-06, "loss": 0.8871, "step": 12270 }, { "epoch": 0.14959843028286596, "grad_norm": 2.2036681175231934, "learning_rate": 4.475881975625401e-06, "loss": 0.9243, "step": 12275 }, { "epoch": 0.14965936650701375, "grad_norm": 2.062946081161499, "learning_rate": 4.475561257216164e-06, "loss": 0.8356, "step": 12280 }, { "epoch": 0.14972030273116158, "grad_norm": 2.0038864612579346, "learning_rate": 4.475240538806928e-06, "loss": 0.9254, "step": 12285 }, { "epoch": 0.14978123895530937, "grad_norm": 1.977797269821167, "learning_rate": 4.474919820397691e-06, "loss": 0.9019, "step": 12290 }, { "epoch": 0.1498421751794572, "grad_norm": 2.1203086376190186, "learning_rate": 4.474599101988455e-06, "loss": 0.9044, "step": 12295 }, { "epoch": 0.149903111403605, "grad_norm": 1.8430455923080444, "learning_rate": 4.474278383579218e-06, "loss": 0.9197, "step": 12300 }, { "epoch": 0.14996404762775278, "grad_norm": 2.2841720581054688, "learning_rate": 4.473957665169981e-06, "loss": 0.9103, "step": 12305 }, { "epoch": 0.1500249838519006, "grad_norm": 1.9034295082092285, "learning_rate": 4.473636946760745e-06, "loss": 0.8744, "step": 12310 }, { "epoch": 0.1500859200760484, "grad_norm": 1.9612149000167847, "learning_rate": 4.473316228351508e-06, "loss": 0.9831, "step": 12315 }, { "epoch": 0.15014685630019622, "grad_norm": 2.0261614322662354, "learning_rate": 4.4729955099422715e-06, "loss": 0.9439, "step": 12320 }, { "epoch": 0.15020779252434402, "grad_norm": 1.8947933912277222, "learning_rate": 4.4726747915330346e-06, "loss": 0.867, "step": 12325 }, { "epoch": 0.15026872874849184, "grad_norm": 2.1239919662475586, "learning_rate": 4.4723540731237976e-06, "loss": 0.9165, "step": 12330 }, { "epoch": 0.15032966497263964, "grad_norm": 1.8672969341278076, "learning_rate": 4.4720333547145614e-06, "loss": 0.9139, "step": 12335 }, { "epoch": 0.15039060119678743, "grad_norm": 1.92372465133667, "learning_rate": 4.4717126363053245e-06, "loss": 0.8857, "step": 12340 }, { "epoch": 0.15045153742093526, "grad_norm": 2.2320444583892822, "learning_rate": 4.4713919178960875e-06, "loss": 0.9415, "step": 12345 }, { "epoch": 0.15051247364508305, "grad_norm": 1.8224767446517944, "learning_rate": 4.471071199486851e-06, "loss": 0.8324, "step": 12350 }, { "epoch": 0.15057340986923087, "grad_norm": 1.7960879802703857, "learning_rate": 4.470750481077614e-06, "loss": 0.8921, "step": 12355 }, { "epoch": 0.15063434609337867, "grad_norm": 2.098647356033325, "learning_rate": 4.470429762668377e-06, "loss": 0.9029, "step": 12360 }, { "epoch": 0.15069528231752646, "grad_norm": 1.7943260669708252, "learning_rate": 4.470109044259141e-06, "loss": 0.8645, "step": 12365 }, { "epoch": 0.1507562185416743, "grad_norm": 1.8360487222671509, "learning_rate": 4.469788325849904e-06, "loss": 1.0893, "step": 12370 }, { "epoch": 0.15081715476582208, "grad_norm": 1.698880910873413, "learning_rate": 4.469467607440667e-06, "loss": 0.9238, "step": 12375 }, { "epoch": 0.1508780909899699, "grad_norm": 1.6932123899459839, "learning_rate": 4.469146889031431e-06, "loss": 0.8891, "step": 12380 }, { "epoch": 0.1509390272141177, "grad_norm": 1.9177392721176147, "learning_rate": 4.468826170622194e-06, "loss": 0.8081, "step": 12385 }, { "epoch": 0.15099996343826552, "grad_norm": 1.8877965211868286, "learning_rate": 4.468505452212957e-06, "loss": 0.8938, "step": 12390 }, { "epoch": 0.15106089966241332, "grad_norm": 1.8527635335922241, "learning_rate": 4.468184733803721e-06, "loss": 0.8952, "step": 12395 }, { "epoch": 0.1511218358865611, "grad_norm": 2.3406903743743896, "learning_rate": 4.467864015394484e-06, "loss": 0.9378, "step": 12400 }, { "epoch": 0.15118277211070894, "grad_norm": 2.2686710357666016, "learning_rate": 4.467543296985247e-06, "loss": 0.9965, "step": 12405 }, { "epoch": 0.15124370833485673, "grad_norm": 1.8690910339355469, "learning_rate": 4.46722257857601e-06, "loss": 0.8939, "step": 12410 }, { "epoch": 0.15130464455900455, "grad_norm": 1.797900915145874, "learning_rate": 4.466901860166774e-06, "loss": 0.927, "step": 12415 }, { "epoch": 0.15136558078315235, "grad_norm": 2.2003114223480225, "learning_rate": 4.466581141757537e-06, "loss": 0.9563, "step": 12420 }, { "epoch": 0.15142651700730017, "grad_norm": 1.864068865776062, "learning_rate": 4.4662604233483e-06, "loss": 0.8772, "step": 12425 }, { "epoch": 0.15148745323144797, "grad_norm": 2.166049003601074, "learning_rate": 4.465939704939064e-06, "loss": 0.9044, "step": 12430 }, { "epoch": 0.15154838945559576, "grad_norm": 1.7192660570144653, "learning_rate": 4.465618986529827e-06, "loss": 0.8567, "step": 12435 }, { "epoch": 0.15160932567974358, "grad_norm": 2.1794962882995605, "learning_rate": 4.465298268120591e-06, "loss": 0.9033, "step": 12440 }, { "epoch": 0.15167026190389138, "grad_norm": 2.280944347381592, "learning_rate": 4.464977549711354e-06, "loss": 0.9424, "step": 12445 }, { "epoch": 0.1517311981280392, "grad_norm": 2.263733148574829, "learning_rate": 4.464656831302117e-06, "loss": 0.8897, "step": 12450 }, { "epoch": 0.151792134352187, "grad_norm": 1.840530514717102, "learning_rate": 4.464336112892881e-06, "loss": 0.8802, "step": 12455 }, { "epoch": 0.15185307057633482, "grad_norm": 2.074402332305908, "learning_rate": 4.464015394483644e-06, "loss": 0.8693, "step": 12460 }, { "epoch": 0.15191400680048261, "grad_norm": 1.7796342372894287, "learning_rate": 4.4636946760744075e-06, "loss": 0.867, "step": 12465 }, { "epoch": 0.1519749430246304, "grad_norm": 1.6340785026550293, "learning_rate": 4.4633739576651705e-06, "loss": 0.914, "step": 12470 }, { "epoch": 0.15203587924877823, "grad_norm": 1.7967549562454224, "learning_rate": 4.4630532392559335e-06, "loss": 0.88, "step": 12475 }, { "epoch": 0.15209681547292603, "grad_norm": 2.0785624980926514, "learning_rate": 4.462732520846697e-06, "loss": 0.9637, "step": 12480 }, { "epoch": 0.15215775169707385, "grad_norm": 1.9786360263824463, "learning_rate": 4.46241180243746e-06, "loss": 0.8711, "step": 12485 }, { "epoch": 0.15221868792122165, "grad_norm": 2.189225196838379, "learning_rate": 4.4620910840282234e-06, "loss": 0.815, "step": 12490 }, { "epoch": 0.15227962414536947, "grad_norm": 1.668688178062439, "learning_rate": 4.461770365618987e-06, "loss": 0.8705, "step": 12495 }, { "epoch": 0.15234056036951726, "grad_norm": 2.0532288551330566, "learning_rate": 4.46144964720975e-06, "loss": 0.8681, "step": 12500 }, { "epoch": 0.15240149659366506, "grad_norm": 2.4692108631134033, "learning_rate": 4.461128928800513e-06, "loss": 0.9678, "step": 12505 }, { "epoch": 0.15246243281781288, "grad_norm": 2.445373296737671, "learning_rate": 4.460808210391277e-06, "loss": 0.9121, "step": 12510 }, { "epoch": 0.15252336904196068, "grad_norm": 1.992371678352356, "learning_rate": 4.46048749198204e-06, "loss": 0.9345, "step": 12515 }, { "epoch": 0.1525843052661085, "grad_norm": 1.9752579927444458, "learning_rate": 4.460166773572803e-06, "loss": 0.8654, "step": 12520 }, { "epoch": 0.1526452414902563, "grad_norm": 1.9002666473388672, "learning_rate": 4.459846055163567e-06, "loss": 0.9007, "step": 12525 }, { "epoch": 0.15270617771440412, "grad_norm": 2.1370201110839844, "learning_rate": 4.45952533675433e-06, "loss": 0.9474, "step": 12530 }, { "epoch": 0.1527671139385519, "grad_norm": 1.8414547443389893, "learning_rate": 4.459204618345093e-06, "loss": 0.8418, "step": 12535 }, { "epoch": 0.1528280501626997, "grad_norm": 2.0055723190307617, "learning_rate": 4.458883899935857e-06, "loss": 0.8469, "step": 12540 }, { "epoch": 0.15288898638684753, "grad_norm": 1.8780043125152588, "learning_rate": 4.45856318152662e-06, "loss": 0.8481, "step": 12545 }, { "epoch": 0.15294992261099533, "grad_norm": 2.4998326301574707, "learning_rate": 4.458242463117383e-06, "loss": 0.8677, "step": 12550 }, { "epoch": 0.15301085883514315, "grad_norm": 2.0648579597473145, "learning_rate": 4.457921744708147e-06, "loss": 0.9216, "step": 12555 }, { "epoch": 0.15307179505929094, "grad_norm": 2.2935354709625244, "learning_rate": 4.45760102629891e-06, "loss": 1.0092, "step": 12560 }, { "epoch": 0.15313273128343877, "grad_norm": 2.225349187850952, "learning_rate": 4.457280307889673e-06, "loss": 0.9595, "step": 12565 }, { "epoch": 0.15319366750758656, "grad_norm": 2.047556161880493, "learning_rate": 4.456959589480436e-06, "loss": 0.8429, "step": 12570 }, { "epoch": 0.15325460373173436, "grad_norm": 1.9246140718460083, "learning_rate": 4.4566388710712e-06, "loss": 0.923, "step": 12575 }, { "epoch": 0.15331553995588218, "grad_norm": 2.2532103061676025, "learning_rate": 4.456318152661963e-06, "loss": 0.8793, "step": 12580 }, { "epoch": 0.15337647618002997, "grad_norm": 1.7239205837249756, "learning_rate": 4.455997434252726e-06, "loss": 0.8916, "step": 12585 }, { "epoch": 0.1534374124041778, "grad_norm": 2.1074345111846924, "learning_rate": 4.45567671584349e-06, "loss": 0.9035, "step": 12590 }, { "epoch": 0.1534983486283256, "grad_norm": 1.819434642791748, "learning_rate": 4.455355997434253e-06, "loss": 0.9208, "step": 12595 }, { "epoch": 0.1535592848524734, "grad_norm": 2.143853187561035, "learning_rate": 4.4550352790250166e-06, "loss": 0.8801, "step": 12600 }, { "epoch": 0.1536202210766212, "grad_norm": 1.7835667133331299, "learning_rate": 4.45471456061578e-06, "loss": 0.8937, "step": 12605 }, { "epoch": 0.153681157300769, "grad_norm": 1.9822551012039185, "learning_rate": 4.4543938422065435e-06, "loss": 0.8804, "step": 12610 }, { "epoch": 0.15374209352491683, "grad_norm": 1.9078471660614014, "learning_rate": 4.4540731237973065e-06, "loss": 0.9172, "step": 12615 }, { "epoch": 0.15380302974906462, "grad_norm": 2.352811813354492, "learning_rate": 4.4537524053880695e-06, "loss": 0.8511, "step": 12620 }, { "epoch": 0.15386396597321245, "grad_norm": 1.9942513704299927, "learning_rate": 4.453431686978833e-06, "loss": 0.9844, "step": 12625 }, { "epoch": 0.15392490219736024, "grad_norm": 2.0095489025115967, "learning_rate": 4.453110968569596e-06, "loss": 0.8664, "step": 12630 }, { "epoch": 0.15398583842150804, "grad_norm": 1.8644583225250244, "learning_rate": 4.45279025016036e-06, "loss": 0.8548, "step": 12635 }, { "epoch": 0.15404677464565586, "grad_norm": 2.0917680263519287, "learning_rate": 4.452469531751123e-06, "loss": 0.8818, "step": 12640 }, { "epoch": 0.15410771086980365, "grad_norm": 1.8497648239135742, "learning_rate": 4.452148813341886e-06, "loss": 0.9411, "step": 12645 }, { "epoch": 0.15416864709395148, "grad_norm": 2.0563251972198486, "learning_rate": 4.451828094932649e-06, "loss": 0.8914, "step": 12650 }, { "epoch": 0.15422958331809927, "grad_norm": 2.192237615585327, "learning_rate": 4.451507376523413e-06, "loss": 0.948, "step": 12655 }, { "epoch": 0.1542905195422471, "grad_norm": 1.9133671522140503, "learning_rate": 4.451186658114176e-06, "loss": 0.8513, "step": 12660 }, { "epoch": 0.1543514557663949, "grad_norm": 2.078148365020752, "learning_rate": 4.450865939704939e-06, "loss": 0.8965, "step": 12665 }, { "epoch": 0.15441239199054269, "grad_norm": 2.050342321395874, "learning_rate": 4.450545221295703e-06, "loss": 0.9141, "step": 12670 }, { "epoch": 0.1544733282146905, "grad_norm": 2.0532705783843994, "learning_rate": 4.450224502886466e-06, "loss": 0.8727, "step": 12675 }, { "epoch": 0.1545342644388383, "grad_norm": 2.1018447875976562, "learning_rate": 4.449903784477229e-06, "loss": 0.9505, "step": 12680 }, { "epoch": 0.15459520066298613, "grad_norm": 2.1605453491210938, "learning_rate": 4.449583066067993e-06, "loss": 0.8822, "step": 12685 }, { "epoch": 0.15465613688713392, "grad_norm": 2.1515052318573, "learning_rate": 4.449262347658756e-06, "loss": 0.912, "step": 12690 }, { "epoch": 0.15471707311128174, "grad_norm": 2.0800795555114746, "learning_rate": 4.448941629249519e-06, "loss": 0.8936, "step": 12695 }, { "epoch": 0.15477800933542954, "grad_norm": 1.7520190477371216, "learning_rate": 4.448620910840283e-06, "loss": 0.8959, "step": 12700 }, { "epoch": 0.15483894555957733, "grad_norm": 2.1336114406585693, "learning_rate": 4.448300192431046e-06, "loss": 0.8761, "step": 12705 }, { "epoch": 0.15489988178372516, "grad_norm": 2.032870054244995, "learning_rate": 4.447979474021809e-06, "loss": 0.894, "step": 12710 }, { "epoch": 0.15496081800787295, "grad_norm": 2.1412689685821533, "learning_rate": 4.447658755612573e-06, "loss": 0.9388, "step": 12715 }, { "epoch": 0.15502175423202078, "grad_norm": 1.9372174739837646, "learning_rate": 4.447338037203336e-06, "loss": 0.9358, "step": 12720 }, { "epoch": 0.15508269045616857, "grad_norm": 1.8284878730773926, "learning_rate": 4.447017318794099e-06, "loss": 0.8858, "step": 12725 }, { "epoch": 0.1551436266803164, "grad_norm": 2.069835901260376, "learning_rate": 4.446696600384863e-06, "loss": 0.9219, "step": 12730 }, { "epoch": 0.1552045629044642, "grad_norm": 2.3405470848083496, "learning_rate": 4.446375881975626e-06, "loss": 0.9389, "step": 12735 }, { "epoch": 0.15526549912861198, "grad_norm": 2.591705799102783, "learning_rate": 4.446055163566389e-06, "loss": 0.8691, "step": 12740 }, { "epoch": 0.1553264353527598, "grad_norm": 2.619215726852417, "learning_rate": 4.4457344451571525e-06, "loss": 0.8943, "step": 12745 }, { "epoch": 0.1553873715769076, "grad_norm": 2.1050052642822266, "learning_rate": 4.4454137267479155e-06, "loss": 0.8981, "step": 12750 }, { "epoch": 0.15544830780105542, "grad_norm": 1.7870203256607056, "learning_rate": 4.4450930083386786e-06, "loss": 0.8339, "step": 12755 }, { "epoch": 0.15550924402520322, "grad_norm": 2.548497438430786, "learning_rate": 4.4447722899294424e-06, "loss": 0.8869, "step": 12760 }, { "epoch": 0.15557018024935104, "grad_norm": 17.42800521850586, "learning_rate": 4.4444515715202054e-06, "loss": 0.884, "step": 12765 }, { "epoch": 0.15563111647349884, "grad_norm": 1.8367465734481812, "learning_rate": 4.444130853110969e-06, "loss": 1.2119, "step": 12770 }, { "epoch": 0.15569205269764663, "grad_norm": 1.8733614683151245, "learning_rate": 4.443810134701732e-06, "loss": 0.9025, "step": 12775 }, { "epoch": 0.15575298892179446, "grad_norm": 1.8854979276657104, "learning_rate": 4.443489416292495e-06, "loss": 0.9151, "step": 12780 }, { "epoch": 0.15581392514594225, "grad_norm": 1.9065916538238525, "learning_rate": 4.443168697883259e-06, "loss": 0.9318, "step": 12785 }, { "epoch": 0.15587486137009007, "grad_norm": 1.8961384296417236, "learning_rate": 4.442847979474022e-06, "loss": 0.9507, "step": 12790 }, { "epoch": 0.15593579759423787, "grad_norm": 2.2069034576416016, "learning_rate": 4.442527261064786e-06, "loss": 0.9097, "step": 12795 }, { "epoch": 0.1559967338183857, "grad_norm": 1.9916762113571167, "learning_rate": 4.442206542655549e-06, "loss": 0.9076, "step": 12800 }, { "epoch": 0.1560576700425335, "grad_norm": 1.8317018747329712, "learning_rate": 4.441885824246312e-06, "loss": 0.8195, "step": 12805 }, { "epoch": 0.15611860626668128, "grad_norm": 2.238276481628418, "learning_rate": 4.441565105837076e-06, "loss": 0.9351, "step": 12810 }, { "epoch": 0.1561795424908291, "grad_norm": 1.774844765663147, "learning_rate": 4.441244387427839e-06, "loss": 0.8656, "step": 12815 }, { "epoch": 0.1562404787149769, "grad_norm": 1.69944167137146, "learning_rate": 4.440923669018602e-06, "loss": 0.8962, "step": 12820 }, { "epoch": 0.15630141493912472, "grad_norm": 1.977575421333313, "learning_rate": 4.440602950609365e-06, "loss": 0.9313, "step": 12825 }, { "epoch": 0.15636235116327252, "grad_norm": 2.0713465213775635, "learning_rate": 4.440282232200129e-06, "loss": 0.851, "step": 12830 }, { "epoch": 0.1564232873874203, "grad_norm": 1.7546520233154297, "learning_rate": 4.439961513790892e-06, "loss": 0.9599, "step": 12835 }, { "epoch": 0.15648422361156814, "grad_norm": 2.210979461669922, "learning_rate": 4.439640795381655e-06, "loss": 0.8908, "step": 12840 }, { "epoch": 0.15654515983571593, "grad_norm": 1.8766686916351318, "learning_rate": 4.439320076972419e-06, "loss": 0.9489, "step": 12845 }, { "epoch": 0.15660609605986375, "grad_norm": 2.4776041507720947, "learning_rate": 4.438999358563182e-06, "loss": 0.9237, "step": 12850 }, { "epoch": 0.15666703228401155, "grad_norm": 1.7989857196807861, "learning_rate": 4.438678640153945e-06, "loss": 0.8909, "step": 12855 }, { "epoch": 0.15672796850815937, "grad_norm": 1.721665859222412, "learning_rate": 4.438357921744709e-06, "loss": 0.8407, "step": 12860 }, { "epoch": 0.15678890473230717, "grad_norm": 1.9053053855895996, "learning_rate": 4.438037203335472e-06, "loss": 0.8668, "step": 12865 }, { "epoch": 0.15684984095645496, "grad_norm": 1.9662646055221558, "learning_rate": 4.437716484926235e-06, "loss": 0.8787, "step": 12870 }, { "epoch": 0.15691077718060278, "grad_norm": 2.1617348194122314, "learning_rate": 4.437395766516999e-06, "loss": 0.852, "step": 12875 }, { "epoch": 0.15697171340475058, "grad_norm": 1.8630234003067017, "learning_rate": 4.437075048107762e-06, "loss": 0.9622, "step": 12880 }, { "epoch": 0.1570326496288984, "grad_norm": 2.033724784851074, "learning_rate": 4.436754329698525e-06, "loss": 0.8846, "step": 12885 }, { "epoch": 0.1570935858530462, "grad_norm": 1.6579087972640991, "learning_rate": 4.4364336112892885e-06, "loss": 0.8759, "step": 12890 }, { "epoch": 0.15715452207719402, "grad_norm": 1.9693284034729004, "learning_rate": 4.4361128928800515e-06, "loss": 0.8246, "step": 12895 }, { "epoch": 0.15721545830134181, "grad_norm": 1.9947935342788696, "learning_rate": 4.4357921744708145e-06, "loss": 0.9469, "step": 12900 }, { "epoch": 0.1572763945254896, "grad_norm": 2.1056370735168457, "learning_rate": 4.435471456061578e-06, "loss": 0.9084, "step": 12905 }, { "epoch": 0.15733733074963743, "grad_norm": 2.1478049755096436, "learning_rate": 4.435150737652341e-06, "loss": 0.911, "step": 12910 }, { "epoch": 0.15739826697378523, "grad_norm": 2.080249786376953, "learning_rate": 4.434830019243105e-06, "loss": 0.9665, "step": 12915 }, { "epoch": 0.15745920319793305, "grad_norm": 2.0922229290008545, "learning_rate": 4.434509300833868e-06, "loss": 0.911, "step": 12920 }, { "epoch": 0.15752013942208085, "grad_norm": 1.732424259185791, "learning_rate": 4.434188582424631e-06, "loss": 0.8554, "step": 12925 }, { "epoch": 0.15758107564622867, "grad_norm": 2.255662679672241, "learning_rate": 4.433867864015395e-06, "loss": 0.9221, "step": 12930 }, { "epoch": 0.15764201187037646, "grad_norm": 2.025911331176758, "learning_rate": 4.433547145606158e-06, "loss": 0.8739, "step": 12935 }, { "epoch": 0.15770294809452426, "grad_norm": 2.165957450866699, "learning_rate": 4.433226427196922e-06, "loss": 0.8996, "step": 12940 }, { "epoch": 0.15776388431867208, "grad_norm": 2.2648422718048096, "learning_rate": 4.432905708787685e-06, "loss": 0.9656, "step": 12945 }, { "epoch": 0.15782482054281988, "grad_norm": 1.9542617797851562, "learning_rate": 4.432584990378448e-06, "loss": 0.9193, "step": 12950 }, { "epoch": 0.1578857567669677, "grad_norm": 1.8043376207351685, "learning_rate": 4.432264271969212e-06, "loss": 0.8874, "step": 12955 }, { "epoch": 0.1579466929911155, "grad_norm": 1.9568967819213867, "learning_rate": 4.431943553559975e-06, "loss": 0.8586, "step": 12960 }, { "epoch": 0.15800762921526332, "grad_norm": 2.078256130218506, "learning_rate": 4.431622835150738e-06, "loss": 0.9017, "step": 12965 }, { "epoch": 0.1580685654394111, "grad_norm": 1.9923936128616333, "learning_rate": 4.431302116741502e-06, "loss": 0.9674, "step": 12970 }, { "epoch": 0.1581295016635589, "grad_norm": 1.7484092712402344, "learning_rate": 4.430981398332265e-06, "loss": 0.8796, "step": 12975 }, { "epoch": 0.15819043788770673, "grad_norm": 2.143812656402588, "learning_rate": 4.430660679923028e-06, "loss": 0.9065, "step": 12980 }, { "epoch": 0.15825137411185453, "grad_norm": 1.917534351348877, "learning_rate": 4.430339961513791e-06, "loss": 0.9683, "step": 12985 }, { "epoch": 0.15831231033600235, "grad_norm": 2.264685869216919, "learning_rate": 4.430019243104555e-06, "loss": 0.8868, "step": 12990 }, { "epoch": 0.15837324656015014, "grad_norm": 1.9954169988632202, "learning_rate": 4.429698524695318e-06, "loss": 0.8738, "step": 12995 }, { "epoch": 0.15843418278429797, "grad_norm": 2.1484129428863525, "learning_rate": 4.429377806286081e-06, "loss": 0.9004, "step": 13000 }, { "epoch": 0.15849511900844576, "grad_norm": 1.9261860847473145, "learning_rate": 4.429057087876845e-06, "loss": 1.0114, "step": 13005 }, { "epoch": 0.15855605523259356, "grad_norm": 2.3347461223602295, "learning_rate": 4.428736369467608e-06, "loss": 0.8817, "step": 13010 }, { "epoch": 0.15861699145674138, "grad_norm": 2.1912736892700195, "learning_rate": 4.428415651058371e-06, "loss": 0.893, "step": 13015 }, { "epoch": 0.15867792768088917, "grad_norm": 2.247762441635132, "learning_rate": 4.4280949326491345e-06, "loss": 0.8588, "step": 13020 }, { "epoch": 0.158738863905037, "grad_norm": 1.9416296482086182, "learning_rate": 4.4277742142398976e-06, "loss": 0.8701, "step": 13025 }, { "epoch": 0.1587998001291848, "grad_norm": 1.9804184436798096, "learning_rate": 4.427453495830661e-06, "loss": 0.9942, "step": 13030 }, { "epoch": 0.15886073635333262, "grad_norm": 2.094036102294922, "learning_rate": 4.4271327774214244e-06, "loss": 0.8622, "step": 13035 }, { "epoch": 0.1589216725774804, "grad_norm": 2.0347213745117188, "learning_rate": 4.4268120590121875e-06, "loss": 0.9259, "step": 13040 }, { "epoch": 0.1589826088016282, "grad_norm": 1.8752591609954834, "learning_rate": 4.4264913406029505e-06, "loss": 0.9703, "step": 13045 }, { "epoch": 0.15904354502577603, "grad_norm": 1.6616761684417725, "learning_rate": 4.426170622193714e-06, "loss": 0.8312, "step": 13050 }, { "epoch": 0.15910448124992382, "grad_norm": 2.0166234970092773, "learning_rate": 4.425849903784477e-06, "loss": 0.8483, "step": 13055 }, { "epoch": 0.15916541747407165, "grad_norm": 2.1078968048095703, "learning_rate": 4.42552918537524e-06, "loss": 0.9579, "step": 13060 }, { "epoch": 0.15922635369821944, "grad_norm": 1.7135988473892212, "learning_rate": 4.425208466966004e-06, "loss": 0.8466, "step": 13065 }, { "epoch": 0.15928728992236724, "grad_norm": 2.147521495819092, "learning_rate": 4.424887748556767e-06, "loss": 0.9783, "step": 13070 }, { "epoch": 0.15934822614651506, "grad_norm": 2.2676780223846436, "learning_rate": 4.424567030147531e-06, "loss": 0.8671, "step": 13075 }, { "epoch": 0.15940916237066285, "grad_norm": 1.8422831296920776, "learning_rate": 4.424246311738294e-06, "loss": 0.899, "step": 13080 }, { "epoch": 0.15947009859481068, "grad_norm": 2.0547592639923096, "learning_rate": 4.423925593329058e-06, "loss": 0.8491, "step": 13085 }, { "epoch": 0.15953103481895847, "grad_norm": 2.1510448455810547, "learning_rate": 4.423604874919821e-06, "loss": 1.0021, "step": 13090 }, { "epoch": 0.1595919710431063, "grad_norm": 2.109999418258667, "learning_rate": 4.423284156510584e-06, "loss": 0.8658, "step": 13095 }, { "epoch": 0.1596529072672541, "grad_norm": 1.9451391696929932, "learning_rate": 4.422963438101348e-06, "loss": 0.8777, "step": 13100 }, { "epoch": 0.15971384349140189, "grad_norm": 2.061858892440796, "learning_rate": 4.422642719692111e-06, "loss": 0.89, "step": 13105 }, { "epoch": 0.1597747797155497, "grad_norm": 1.884069561958313, "learning_rate": 4.422322001282874e-06, "loss": 0.9054, "step": 13110 }, { "epoch": 0.1598357159396975, "grad_norm": 2.209019660949707, "learning_rate": 4.422001282873638e-06, "loss": 0.8633, "step": 13115 }, { "epoch": 0.15989665216384533, "grad_norm": 2.0582780838012695, "learning_rate": 4.421680564464401e-06, "loss": 0.9167, "step": 13120 }, { "epoch": 0.15995758838799312, "grad_norm": 1.783394455909729, "learning_rate": 4.421359846055164e-06, "loss": 0.8713, "step": 13125 }, { "epoch": 0.16001852461214094, "grad_norm": 1.8445191383361816, "learning_rate": 4.421039127645928e-06, "loss": 0.859, "step": 13130 }, { "epoch": 0.16007946083628874, "grad_norm": 1.7330831289291382, "learning_rate": 4.420718409236691e-06, "loss": 0.8859, "step": 13135 }, { "epoch": 0.16014039706043653, "grad_norm": 1.954209327697754, "learning_rate": 4.420397690827454e-06, "loss": 0.9037, "step": 13140 }, { "epoch": 0.16020133328458436, "grad_norm": 2.3205645084381104, "learning_rate": 4.420076972418218e-06, "loss": 0.9383, "step": 13145 }, { "epoch": 0.16026226950873215, "grad_norm": 2.1596527099609375, "learning_rate": 4.419756254008981e-06, "loss": 0.8158, "step": 13150 }, { "epoch": 0.16032320573287998, "grad_norm": 1.736662745475769, "learning_rate": 4.419435535599744e-06, "loss": 0.883, "step": 13155 }, { "epoch": 0.16038414195702777, "grad_norm": 2.5542750358581543, "learning_rate": 4.419114817190507e-06, "loss": 0.9345, "step": 13160 }, { "epoch": 0.1604450781811756, "grad_norm": 1.903972864151001, "learning_rate": 4.4187940987812705e-06, "loss": 0.9154, "step": 13165 }, { "epoch": 0.1605060144053234, "grad_norm": 2.002145528793335, "learning_rate": 4.4184733803720335e-06, "loss": 0.8479, "step": 13170 }, { "epoch": 0.16056695062947118, "grad_norm": 2.031074047088623, "learning_rate": 4.4181526619627965e-06, "loss": 0.8622, "step": 13175 }, { "epoch": 0.160627886853619, "grad_norm": 2.3471250534057617, "learning_rate": 4.41783194355356e-06, "loss": 0.9168, "step": 13180 }, { "epoch": 0.1606888230777668, "grad_norm": 1.8161700963974, "learning_rate": 4.417511225144323e-06, "loss": 0.8899, "step": 13185 }, { "epoch": 0.16074975930191462, "grad_norm": 2.0596208572387695, "learning_rate": 4.4171905067350864e-06, "loss": 0.8955, "step": 13190 }, { "epoch": 0.16081069552606242, "grad_norm": 2.7606515884399414, "learning_rate": 4.41686978832585e-06, "loss": 0.8955, "step": 13195 }, { "epoch": 0.16087163175021024, "grad_norm": 2.0492396354675293, "learning_rate": 4.416549069916613e-06, "loss": 0.8894, "step": 13200 }, { "epoch": 0.16093256797435804, "grad_norm": 2.089040756225586, "learning_rate": 4.416228351507376e-06, "loss": 0.8851, "step": 13205 }, { "epoch": 0.16099350419850583, "grad_norm": 2.1529335975646973, "learning_rate": 4.41590763309814e-06, "loss": 0.9016, "step": 13210 }, { "epoch": 0.16105444042265366, "grad_norm": 2.301391839981079, "learning_rate": 4.415586914688903e-06, "loss": 0.8693, "step": 13215 }, { "epoch": 0.16111537664680145, "grad_norm": 1.7738196849822998, "learning_rate": 4.415266196279667e-06, "loss": 0.8393, "step": 13220 }, { "epoch": 0.16117631287094927, "grad_norm": 1.9700336456298828, "learning_rate": 4.41494547787043e-06, "loss": 0.8845, "step": 13225 }, { "epoch": 0.16123724909509707, "grad_norm": 1.74201238155365, "learning_rate": 4.414624759461193e-06, "loss": 0.9, "step": 13230 }, { "epoch": 0.1612981853192449, "grad_norm": 1.9421510696411133, "learning_rate": 4.414304041051957e-06, "loss": 0.862, "step": 13235 }, { "epoch": 0.1613591215433927, "grad_norm": 2.229253053665161, "learning_rate": 4.41398332264272e-06, "loss": 0.8695, "step": 13240 }, { "epoch": 0.16142005776754048, "grad_norm": 2.1040964126586914, "learning_rate": 4.413662604233484e-06, "loss": 0.931, "step": 13245 }, { "epoch": 0.1614809939916883, "grad_norm": 2.095411539077759, "learning_rate": 4.413341885824247e-06, "loss": 0.9025, "step": 13250 }, { "epoch": 0.1615419302158361, "grad_norm": 2.1192398071289062, "learning_rate": 4.41302116741501e-06, "loss": 0.9132, "step": 13255 }, { "epoch": 0.16160286643998392, "grad_norm": 1.8556632995605469, "learning_rate": 4.412700449005774e-06, "loss": 0.9379, "step": 13260 }, { "epoch": 0.16166380266413172, "grad_norm": 2.0947999954223633, "learning_rate": 4.412379730596537e-06, "loss": 0.9665, "step": 13265 }, { "epoch": 0.16172473888827954, "grad_norm": 1.9548736810684204, "learning_rate": 4.4120590121873e-06, "loss": 0.8145, "step": 13270 }, { "epoch": 0.16178567511242734, "grad_norm": 2.0010499954223633, "learning_rate": 4.411738293778064e-06, "loss": 0.9651, "step": 13275 }, { "epoch": 0.16184661133657513, "grad_norm": 1.9349932670593262, "learning_rate": 4.411417575368827e-06, "loss": 0.9039, "step": 13280 }, { "epoch": 0.16190754756072295, "grad_norm": 2.103896379470825, "learning_rate": 4.41109685695959e-06, "loss": 0.9149, "step": 13285 }, { "epoch": 0.16196848378487075, "grad_norm": 2.420509099960327, "learning_rate": 4.4107761385503535e-06, "loss": 0.89, "step": 13290 }, { "epoch": 0.16202942000901857, "grad_norm": 2.5333049297332764, "learning_rate": 4.4104554201411166e-06, "loss": 0.898, "step": 13295 }, { "epoch": 0.16209035623316637, "grad_norm": 1.8641386032104492, "learning_rate": 4.41013470173188e-06, "loss": 0.8827, "step": 13300 }, { "epoch": 0.16215129245731416, "grad_norm": 1.9296140670776367, "learning_rate": 4.4098139833226434e-06, "loss": 0.8789, "step": 13305 }, { "epoch": 0.16221222868146198, "grad_norm": 2.14011812210083, "learning_rate": 4.4094932649134065e-06, "loss": 0.9348, "step": 13310 }, { "epoch": 0.16227316490560978, "grad_norm": 2.436422824859619, "learning_rate": 4.4091725465041695e-06, "loss": 0.8836, "step": 13315 }, { "epoch": 0.1623341011297576, "grad_norm": 1.996449589729309, "learning_rate": 4.408851828094933e-06, "loss": 0.9171, "step": 13320 }, { "epoch": 0.1623950373539054, "grad_norm": 1.944953203201294, "learning_rate": 4.408531109685696e-06, "loss": 0.8421, "step": 13325 }, { "epoch": 0.16245597357805322, "grad_norm": 2.091418981552124, "learning_rate": 4.408210391276459e-06, "loss": 0.7983, "step": 13330 }, { "epoch": 0.16251690980220102, "grad_norm": 2.2455573081970215, "learning_rate": 4.407889672867222e-06, "loss": 0.9447, "step": 13335 }, { "epoch": 0.1625778460263488, "grad_norm": 1.7926613092422485, "learning_rate": 4.407568954457986e-06, "loss": 0.8336, "step": 13340 }, { "epoch": 0.16263878225049663, "grad_norm": 1.655584692955017, "learning_rate": 4.407248236048749e-06, "loss": 0.8408, "step": 13345 }, { "epoch": 0.16269971847464443, "grad_norm": 2.0805001258850098, "learning_rate": 4.406927517639512e-06, "loss": 0.9883, "step": 13350 }, { "epoch": 0.16276065469879225, "grad_norm": 1.9444935321807861, "learning_rate": 4.406606799230276e-06, "loss": 0.9449, "step": 13355 }, { "epoch": 0.16282159092294005, "grad_norm": 1.8653570413589478, "learning_rate": 4.406286080821039e-06, "loss": 0.8955, "step": 13360 }, { "epoch": 0.16288252714708787, "grad_norm": 2.191577672958374, "learning_rate": 4.405965362411803e-06, "loss": 0.8984, "step": 13365 }, { "epoch": 0.16294346337123566, "grad_norm": 2.371185064315796, "learning_rate": 4.405644644002566e-06, "loss": 0.8186, "step": 13370 }, { "epoch": 0.16300439959538346, "grad_norm": 1.7808951139450073, "learning_rate": 4.405323925593329e-06, "loss": 0.9133, "step": 13375 }, { "epoch": 0.16306533581953128, "grad_norm": 2.341111898422241, "learning_rate": 4.405003207184093e-06, "loss": 0.9257, "step": 13380 }, { "epoch": 0.16312627204367908, "grad_norm": 1.8416248559951782, "learning_rate": 4.404682488774856e-06, "loss": 0.9754, "step": 13385 }, { "epoch": 0.1631872082678269, "grad_norm": 1.7657628059387207, "learning_rate": 4.40436177036562e-06, "loss": 0.8222, "step": 13390 }, { "epoch": 0.1632481444919747, "grad_norm": 2.1138393878936768, "learning_rate": 4.404041051956383e-06, "loss": 0.9004, "step": 13395 }, { "epoch": 0.16330908071612252, "grad_norm": 2.1080234050750732, "learning_rate": 4.403720333547146e-06, "loss": 0.9009, "step": 13400 }, { "epoch": 0.1633700169402703, "grad_norm": 1.9770647287368774, "learning_rate": 4.40339961513791e-06, "loss": 0.8936, "step": 13405 }, { "epoch": 0.1634309531644181, "grad_norm": 2.5691592693328857, "learning_rate": 4.403078896728673e-06, "loss": 0.9431, "step": 13410 }, { "epoch": 0.16349188938856593, "grad_norm": 2.3442013263702393, "learning_rate": 4.402758178319436e-06, "loss": 0.8304, "step": 13415 }, { "epoch": 0.16355282561271373, "grad_norm": 2.2321202754974365, "learning_rate": 4.4024374599102e-06, "loss": 0.9299, "step": 13420 }, { "epoch": 0.16361376183686155, "grad_norm": 1.7662434577941895, "learning_rate": 4.402116741500963e-06, "loss": 0.8047, "step": 13425 }, { "epoch": 0.16367469806100934, "grad_norm": 1.6517210006713867, "learning_rate": 4.401796023091726e-06, "loss": 0.8723, "step": 13430 }, { "epoch": 0.16373563428515717, "grad_norm": 2.118156671524048, "learning_rate": 4.4014753046824895e-06, "loss": 0.8784, "step": 13435 }, { "epoch": 0.16379657050930496, "grad_norm": 1.9635772705078125, "learning_rate": 4.4011545862732525e-06, "loss": 0.8908, "step": 13440 }, { "epoch": 0.16385750673345276, "grad_norm": 1.9123703241348267, "learning_rate": 4.4008338678640155e-06, "loss": 0.9237, "step": 13445 }, { "epoch": 0.16391844295760058, "grad_norm": 2.004683017730713, "learning_rate": 4.400513149454779e-06, "loss": 0.9717, "step": 13450 }, { "epoch": 0.16397937918174837, "grad_norm": 1.9849992990493774, "learning_rate": 4.400192431045542e-06, "loss": 0.8661, "step": 13455 }, { "epoch": 0.1640403154058962, "grad_norm": 2.07360577583313, "learning_rate": 4.3998717126363054e-06, "loss": 0.8745, "step": 13460 }, { "epoch": 0.164101251630044, "grad_norm": 1.9859064817428589, "learning_rate": 4.399550994227069e-06, "loss": 0.95, "step": 13465 }, { "epoch": 0.16416218785419182, "grad_norm": 2.21441912651062, "learning_rate": 4.399230275817832e-06, "loss": 0.9107, "step": 13470 }, { "epoch": 0.1642231240783396, "grad_norm": 2.0462546348571777, "learning_rate": 4.398909557408595e-06, "loss": 0.8931, "step": 13475 }, { "epoch": 0.1642840603024874, "grad_norm": 2.0831401348114014, "learning_rate": 4.398588838999359e-06, "loss": 0.8995, "step": 13480 }, { "epoch": 0.16434499652663523, "grad_norm": 1.8450011014938354, "learning_rate": 4.398268120590122e-06, "loss": 0.8988, "step": 13485 }, { "epoch": 0.16440593275078302, "grad_norm": 1.586234211921692, "learning_rate": 4.397947402180885e-06, "loss": 0.8522, "step": 13490 }, { "epoch": 0.16446686897493085, "grad_norm": 2.1630988121032715, "learning_rate": 4.397626683771648e-06, "loss": 0.8719, "step": 13495 }, { "epoch": 0.16452780519907864, "grad_norm": 1.9716273546218872, "learning_rate": 4.397305965362412e-06, "loss": 0.8594, "step": 13500 }, { "epoch": 0.16458874142322646, "grad_norm": 2.1410465240478516, "learning_rate": 4.396985246953175e-06, "loss": 0.903, "step": 13505 }, { "epoch": 0.16464967764737426, "grad_norm": 2.135646343231201, "learning_rate": 4.396664528543938e-06, "loss": 0.9401, "step": 13510 }, { "epoch": 0.16471061387152205, "grad_norm": 1.7182613611221313, "learning_rate": 4.396343810134702e-06, "loss": 0.9423, "step": 13515 }, { "epoch": 0.16477155009566988, "grad_norm": 2.2266383171081543, "learning_rate": 4.396023091725465e-06, "loss": 0.8826, "step": 13520 }, { "epoch": 0.16483248631981767, "grad_norm": 2.1794192790985107, "learning_rate": 4.395702373316229e-06, "loss": 0.9638, "step": 13525 }, { "epoch": 0.1648934225439655, "grad_norm": 2.2152767181396484, "learning_rate": 4.395381654906992e-06, "loss": 0.8681, "step": 13530 }, { "epoch": 0.1649543587681133, "grad_norm": 1.944278597831726, "learning_rate": 4.395060936497755e-06, "loss": 0.8991, "step": 13535 }, { "epoch": 0.16501529499226109, "grad_norm": 1.6154437065124512, "learning_rate": 4.394740218088519e-06, "loss": 0.8668, "step": 13540 }, { "epoch": 0.1650762312164089, "grad_norm": 2.0596108436584473, "learning_rate": 4.394419499679282e-06, "loss": 0.8839, "step": 13545 }, { "epoch": 0.1651371674405567, "grad_norm": 2.317997455596924, "learning_rate": 4.394098781270046e-06, "loss": 0.9003, "step": 13550 }, { "epoch": 0.16519810366470453, "grad_norm": 2.075212001800537, "learning_rate": 4.393778062860809e-06, "loss": 0.8929, "step": 13555 }, { "epoch": 0.16525903988885232, "grad_norm": 2.492255210876465, "learning_rate": 4.3934573444515725e-06, "loss": 0.9466, "step": 13560 }, { "epoch": 0.16531997611300014, "grad_norm": 1.9006532430648804, "learning_rate": 4.3931366260423356e-06, "loss": 0.9758, "step": 13565 }, { "epoch": 0.16538091233714794, "grad_norm": 2.151520013809204, "learning_rate": 4.3928159076330986e-06, "loss": 0.9102, "step": 13570 }, { "epoch": 0.16544184856129573, "grad_norm": 1.9188926219940186, "learning_rate": 4.392495189223862e-06, "loss": 0.8444, "step": 13575 }, { "epoch": 0.16550278478544356, "grad_norm": 2.241488218307495, "learning_rate": 4.3921744708146255e-06, "loss": 0.8314, "step": 13580 }, { "epoch": 0.16556372100959135, "grad_norm": 2.3300554752349854, "learning_rate": 4.3918537524053885e-06, "loss": 0.9202, "step": 13585 }, { "epoch": 0.16562465723373918, "grad_norm": 2.296156883239746, "learning_rate": 4.3915330339961515e-06, "loss": 0.9107, "step": 13590 }, { "epoch": 0.16568559345788697, "grad_norm": 2.1335206031799316, "learning_rate": 4.391212315586915e-06, "loss": 0.918, "step": 13595 }, { "epoch": 0.1657465296820348, "grad_norm": 1.9242273569107056, "learning_rate": 4.390891597177678e-06, "loss": 0.9272, "step": 13600 }, { "epoch": 0.1658074659061826, "grad_norm": 1.9243481159210205, "learning_rate": 4.390570878768441e-06, "loss": 0.8743, "step": 13605 }, { "epoch": 0.16586840213033038, "grad_norm": 2.150247812271118, "learning_rate": 4.390250160359205e-06, "loss": 0.9244, "step": 13610 }, { "epoch": 0.1659293383544782, "grad_norm": 1.9559944868087769, "learning_rate": 4.389929441949968e-06, "loss": 0.8416, "step": 13615 }, { "epoch": 0.165990274578626, "grad_norm": 2.459887981414795, "learning_rate": 4.389608723540731e-06, "loss": 0.8647, "step": 13620 }, { "epoch": 0.16605121080277382, "grad_norm": 1.829911470413208, "learning_rate": 4.389288005131495e-06, "loss": 0.9584, "step": 13625 }, { "epoch": 0.16611214702692162, "grad_norm": 1.8604217767715454, "learning_rate": 4.388967286722258e-06, "loss": 0.9328, "step": 13630 }, { "epoch": 0.16617308325106944, "grad_norm": 2.097517728805542, "learning_rate": 4.388646568313021e-06, "loss": 0.858, "step": 13635 }, { "epoch": 0.16623401947521724, "grad_norm": 1.8444186449050903, "learning_rate": 4.388325849903785e-06, "loss": 0.8976, "step": 13640 }, { "epoch": 0.16629495569936503, "grad_norm": 2.3919613361358643, "learning_rate": 4.388005131494548e-06, "loss": 0.9016, "step": 13645 }, { "epoch": 0.16635589192351286, "grad_norm": 2.129678249359131, "learning_rate": 4.387684413085311e-06, "loss": 0.9152, "step": 13650 }, { "epoch": 0.16641682814766065, "grad_norm": 2.1587612628936768, "learning_rate": 4.387363694676075e-06, "loss": 0.8931, "step": 13655 }, { "epoch": 0.16647776437180847, "grad_norm": 1.8286038637161255, "learning_rate": 4.387042976266838e-06, "loss": 0.9076, "step": 13660 }, { "epoch": 0.16653870059595627, "grad_norm": 2.09545636177063, "learning_rate": 4.386722257857601e-06, "loss": 0.8643, "step": 13665 }, { "epoch": 0.1665996368201041, "grad_norm": 2.1707708835601807, "learning_rate": 4.386401539448365e-06, "loss": 0.8669, "step": 13670 }, { "epoch": 0.1666605730442519, "grad_norm": 1.867599606513977, "learning_rate": 4.386080821039128e-06, "loss": 0.9079, "step": 13675 }, { "epoch": 0.16672150926839968, "grad_norm": 1.7779308557510376, "learning_rate": 4.385760102629891e-06, "loss": 0.8903, "step": 13680 }, { "epoch": 0.1667824454925475, "grad_norm": 2.614581346511841, "learning_rate": 4.385439384220655e-06, "loss": 0.8505, "step": 13685 }, { "epoch": 0.1668433817166953, "grad_norm": 2.0241246223449707, "learning_rate": 4.385118665811418e-06, "loss": 0.9481, "step": 13690 }, { "epoch": 0.16690431794084312, "grad_norm": 2.6068971157073975, "learning_rate": 4.384797947402182e-06, "loss": 0.8655, "step": 13695 }, { "epoch": 0.16696525416499092, "grad_norm": 2.2999589443206787, "learning_rate": 4.384477228992945e-06, "loss": 0.8822, "step": 13700 }, { "epoch": 0.16702619038913874, "grad_norm": 2.1231775283813477, "learning_rate": 4.384156510583708e-06, "loss": 0.92, "step": 13705 }, { "epoch": 0.16708712661328654, "grad_norm": 1.8686959743499756, "learning_rate": 4.3838357921744715e-06, "loss": 0.907, "step": 13710 }, { "epoch": 0.16714806283743433, "grad_norm": 1.899526834487915, "learning_rate": 4.3835150737652345e-06, "loss": 0.9257, "step": 13715 }, { "epoch": 0.16720899906158215, "grad_norm": 2.582059383392334, "learning_rate": 4.383194355355998e-06, "loss": 0.9611, "step": 13720 }, { "epoch": 0.16726993528572995, "grad_norm": 2.119943141937256, "learning_rate": 4.382873636946761e-06, "loss": 0.9028, "step": 13725 }, { "epoch": 0.16733087150987777, "grad_norm": 2.3207364082336426, "learning_rate": 4.3825529185375244e-06, "loss": 0.8059, "step": 13730 }, { "epoch": 0.16739180773402557, "grad_norm": 1.821109652519226, "learning_rate": 4.382232200128288e-06, "loss": 0.9372, "step": 13735 }, { "epoch": 0.1674527439581734, "grad_norm": 1.9868974685668945, "learning_rate": 4.381911481719051e-06, "loss": 0.9158, "step": 13740 }, { "epoch": 0.16751368018232118, "grad_norm": 1.7082158327102661, "learning_rate": 4.381590763309814e-06, "loss": 0.8279, "step": 13745 }, { "epoch": 0.16757461640646898, "grad_norm": 2.019897222518921, "learning_rate": 4.381270044900577e-06, "loss": 0.93, "step": 13750 }, { "epoch": 0.1676355526306168, "grad_norm": 2.029534339904785, "learning_rate": 4.380949326491341e-06, "loss": 0.9299, "step": 13755 }, { "epoch": 0.1676964888547646, "grad_norm": 1.8369399309158325, "learning_rate": 4.380628608082104e-06, "loss": 0.9372, "step": 13760 }, { "epoch": 0.16775742507891242, "grad_norm": 1.804488182067871, "learning_rate": 4.380307889672867e-06, "loss": 0.8718, "step": 13765 }, { "epoch": 0.16781836130306022, "grad_norm": 1.785528540611267, "learning_rate": 4.379987171263631e-06, "loss": 0.9121, "step": 13770 }, { "epoch": 0.16787929752720804, "grad_norm": 2.015841007232666, "learning_rate": 4.379666452854394e-06, "loss": 0.9026, "step": 13775 }, { "epoch": 0.16794023375135583, "grad_norm": 2.0953123569488525, "learning_rate": 4.379345734445157e-06, "loss": 0.858, "step": 13780 }, { "epoch": 0.16800116997550363, "grad_norm": 1.8609741926193237, "learning_rate": 4.379025016035921e-06, "loss": 0.8957, "step": 13785 }, { "epoch": 0.16806210619965145, "grad_norm": 2.3980937004089355, "learning_rate": 4.378704297626684e-06, "loss": 0.9475, "step": 13790 }, { "epoch": 0.16812304242379925, "grad_norm": 1.8847099542617798, "learning_rate": 4.378383579217447e-06, "loss": 0.9663, "step": 13795 }, { "epoch": 0.16818397864794707, "grad_norm": 1.788615107536316, "learning_rate": 4.378062860808211e-06, "loss": 0.8294, "step": 13800 }, { "epoch": 0.16824491487209486, "grad_norm": 2.2088661193847656, "learning_rate": 4.377742142398974e-06, "loss": 0.8864, "step": 13805 }, { "epoch": 0.16830585109624266, "grad_norm": 2.013914108276367, "learning_rate": 4.377421423989737e-06, "loss": 0.9014, "step": 13810 }, { "epoch": 0.16836678732039048, "grad_norm": 2.0997605323791504, "learning_rate": 4.377100705580501e-06, "loss": 0.8403, "step": 13815 }, { "epoch": 0.16842772354453828, "grad_norm": 1.961967945098877, "learning_rate": 4.376779987171264e-06, "loss": 0.8757, "step": 13820 }, { "epoch": 0.1684886597686861, "grad_norm": 2.0056276321411133, "learning_rate": 4.376459268762027e-06, "loss": 0.9124, "step": 13825 }, { "epoch": 0.1685495959928339, "grad_norm": 2.06179141998291, "learning_rate": 4.376138550352791e-06, "loss": 0.8633, "step": 13830 }, { "epoch": 0.16861053221698172, "grad_norm": 1.9073741436004639, "learning_rate": 4.375817831943554e-06, "loss": 0.9362, "step": 13835 }, { "epoch": 0.1686714684411295, "grad_norm": 2.388353109359741, "learning_rate": 4.3754971135343176e-06, "loss": 0.893, "step": 13840 }, { "epoch": 0.1687324046652773, "grad_norm": 2.149265766143799, "learning_rate": 4.375176395125081e-06, "loss": 0.8618, "step": 13845 }, { "epoch": 0.16879334088942513, "grad_norm": 2.056178569793701, "learning_rate": 4.374855676715844e-06, "loss": 0.8665, "step": 13850 }, { "epoch": 0.16885427711357293, "grad_norm": 2.0040993690490723, "learning_rate": 4.3745349583066075e-06, "loss": 0.8413, "step": 13855 }, { "epoch": 0.16891521333772075, "grad_norm": 2.0702457427978516, "learning_rate": 4.3742142398973705e-06, "loss": 0.8282, "step": 13860 }, { "epoch": 0.16897614956186854, "grad_norm": 1.8380221128463745, "learning_rate": 4.373893521488134e-06, "loss": 0.8283, "step": 13865 }, { "epoch": 0.16903708578601637, "grad_norm": 2.205008029937744, "learning_rate": 4.373572803078897e-06, "loss": 0.9541, "step": 13870 }, { "epoch": 0.16909802201016416, "grad_norm": 1.8633147478103638, "learning_rate": 4.37325208466966e-06, "loss": 0.8268, "step": 13875 }, { "epoch": 0.16915895823431196, "grad_norm": 1.7923827171325684, "learning_rate": 4.372931366260424e-06, "loss": 0.9287, "step": 13880 }, { "epoch": 0.16921989445845978, "grad_norm": 2.1410562992095947, "learning_rate": 4.372610647851187e-06, "loss": 0.8343, "step": 13885 }, { "epoch": 0.16928083068260757, "grad_norm": 1.8963322639465332, "learning_rate": 4.37228992944195e-06, "loss": 0.9085, "step": 13890 }, { "epoch": 0.1693417669067554, "grad_norm": 1.7030540704727173, "learning_rate": 4.371969211032714e-06, "loss": 0.9074, "step": 13895 }, { "epoch": 0.1694027031309032, "grad_norm": 2.0790517330169678, "learning_rate": 4.371648492623477e-06, "loss": 0.8321, "step": 13900 }, { "epoch": 0.16946363935505102, "grad_norm": 2.0577213764190674, "learning_rate": 4.37132777421424e-06, "loss": 1.0136, "step": 13905 }, { "epoch": 0.1695245755791988, "grad_norm": 1.8834266662597656, "learning_rate": 4.371007055805004e-06, "loss": 0.8331, "step": 13910 }, { "epoch": 0.1695855118033466, "grad_norm": 2.1099374294281006, "learning_rate": 4.370686337395767e-06, "loss": 0.9207, "step": 13915 }, { "epoch": 0.16964644802749443, "grad_norm": 1.9687856435775757, "learning_rate": 4.37036561898653e-06, "loss": 0.8759, "step": 13920 }, { "epoch": 0.16970738425164222, "grad_norm": 2.2234408855438232, "learning_rate": 4.370044900577293e-06, "loss": 0.9097, "step": 13925 }, { "epoch": 0.16976832047579005, "grad_norm": 2.2905566692352295, "learning_rate": 4.369724182168057e-06, "loss": 0.9463, "step": 13930 }, { "epoch": 0.16982925669993784, "grad_norm": 1.690542459487915, "learning_rate": 4.36940346375882e-06, "loss": 0.8611, "step": 13935 }, { "epoch": 0.16989019292408566, "grad_norm": 1.7473877668380737, "learning_rate": 4.369082745349583e-06, "loss": 0.8777, "step": 13940 }, { "epoch": 0.16995112914823346, "grad_norm": 2.0991742610931396, "learning_rate": 4.368762026940347e-06, "loss": 0.9407, "step": 13945 }, { "epoch": 0.17001206537238125, "grad_norm": 1.7531688213348389, "learning_rate": 4.36844130853111e-06, "loss": 0.9198, "step": 13950 }, { "epoch": 0.17007300159652908, "grad_norm": 2.3104488849639893, "learning_rate": 4.368120590121873e-06, "loss": 0.911, "step": 13955 }, { "epoch": 0.17013393782067687, "grad_norm": 2.167478084564209, "learning_rate": 4.367799871712637e-06, "loss": 0.9345, "step": 13960 }, { "epoch": 0.1701948740448247, "grad_norm": 1.965890645980835, "learning_rate": 4.3674791533034e-06, "loss": 0.9414, "step": 13965 }, { "epoch": 0.1702558102689725, "grad_norm": 1.9447051286697388, "learning_rate": 4.367158434894163e-06, "loss": 0.8284, "step": 13970 }, { "epoch": 0.1703167464931203, "grad_norm": 1.793428659439087, "learning_rate": 4.366837716484927e-06, "loss": 0.876, "step": 13975 }, { "epoch": 0.1703776827172681, "grad_norm": 1.9596999883651733, "learning_rate": 4.36651699807569e-06, "loss": 0.9037, "step": 13980 }, { "epoch": 0.1704386189414159, "grad_norm": 1.5417895317077637, "learning_rate": 4.366196279666453e-06, "loss": 0.8136, "step": 13985 }, { "epoch": 0.17049955516556373, "grad_norm": 2.0405783653259277, "learning_rate": 4.3658755612572165e-06, "loss": 0.8988, "step": 13990 }, { "epoch": 0.17056049138971152, "grad_norm": 1.8453326225280762, "learning_rate": 4.3655548428479796e-06, "loss": 0.8777, "step": 13995 }, { "epoch": 0.17062142761385934, "grad_norm": 1.831450343132019, "learning_rate": 4.3652341244387434e-06, "loss": 0.8724, "step": 14000 }, { "epoch": 0.17068236383800714, "grad_norm": 2.0504682064056396, "learning_rate": 4.3649134060295064e-06, "loss": 0.862, "step": 14005 }, { "epoch": 0.17074330006215496, "grad_norm": 2.433337450027466, "learning_rate": 4.3645926876202695e-06, "loss": 0.8842, "step": 14010 }, { "epoch": 0.17080423628630276, "grad_norm": 1.8200526237487793, "learning_rate": 4.364271969211033e-06, "loss": 0.89, "step": 14015 }, { "epoch": 0.17086517251045055, "grad_norm": 2.2432923316955566, "learning_rate": 4.363951250801796e-06, "loss": 0.9408, "step": 14020 }, { "epoch": 0.17092610873459838, "grad_norm": 1.9420160055160522, "learning_rate": 4.36363053239256e-06, "loss": 0.9601, "step": 14025 }, { "epoch": 0.17098704495874617, "grad_norm": 1.9344077110290527, "learning_rate": 4.363309813983323e-06, "loss": 0.8402, "step": 14030 }, { "epoch": 0.171047981182894, "grad_norm": 2.1071836948394775, "learning_rate": 4.362989095574086e-06, "loss": 0.881, "step": 14035 }, { "epoch": 0.1711089174070418, "grad_norm": 1.8754349946975708, "learning_rate": 4.36266837716485e-06, "loss": 0.8627, "step": 14040 }, { "epoch": 0.17116985363118958, "grad_norm": 1.8566933870315552, "learning_rate": 4.362347658755613e-06, "loss": 0.9219, "step": 14045 }, { "epoch": 0.1712307898553374, "grad_norm": 1.919055461883545, "learning_rate": 4.362026940346376e-06, "loss": 0.8297, "step": 14050 }, { "epoch": 0.1712917260794852, "grad_norm": 1.7946066856384277, "learning_rate": 4.36170622193714e-06, "loss": 0.8587, "step": 14055 }, { "epoch": 0.17135266230363302, "grad_norm": 1.915052890777588, "learning_rate": 4.361385503527903e-06, "loss": 0.8296, "step": 14060 }, { "epoch": 0.17141359852778082, "grad_norm": 2.502915859222412, "learning_rate": 4.361064785118666e-06, "loss": 0.8642, "step": 14065 }, { "epoch": 0.17147453475192864, "grad_norm": 1.7585452795028687, "learning_rate": 4.36074406670943e-06, "loss": 0.8853, "step": 14070 }, { "epoch": 0.17153547097607644, "grad_norm": 1.6558408737182617, "learning_rate": 4.360423348300193e-06, "loss": 0.9209, "step": 14075 }, { "epoch": 0.17159640720022423, "grad_norm": 2.050569534301758, "learning_rate": 4.360102629890956e-06, "loss": 0.8744, "step": 14080 }, { "epoch": 0.17165734342437206, "grad_norm": 2.3730359077453613, "learning_rate": 4.359781911481719e-06, "loss": 0.8995, "step": 14085 }, { "epoch": 0.17171827964851985, "grad_norm": 2.0354599952697754, "learning_rate": 4.359461193072483e-06, "loss": 0.8958, "step": 14090 }, { "epoch": 0.17177921587266767, "grad_norm": 1.7394499778747559, "learning_rate": 4.359140474663246e-06, "loss": 0.9091, "step": 14095 }, { "epoch": 0.17184015209681547, "grad_norm": 1.683526873588562, "learning_rate": 4.358819756254009e-06, "loss": 0.9303, "step": 14100 }, { "epoch": 0.1719010883209633, "grad_norm": 1.8515909910202026, "learning_rate": 4.358499037844773e-06, "loss": 0.7833, "step": 14105 }, { "epoch": 0.1719620245451111, "grad_norm": 2.442809820175171, "learning_rate": 4.358178319435536e-06, "loss": 0.8132, "step": 14110 }, { "epoch": 0.17202296076925888, "grad_norm": 2.010509729385376, "learning_rate": 4.357857601026299e-06, "loss": 0.9022, "step": 14115 }, { "epoch": 0.1720838969934067, "grad_norm": 1.8004120588302612, "learning_rate": 4.357536882617063e-06, "loss": 0.8179, "step": 14120 }, { "epoch": 0.1721448332175545, "grad_norm": 2.263928174972534, "learning_rate": 4.357216164207826e-06, "loss": 0.9529, "step": 14125 }, { "epoch": 0.17220576944170232, "grad_norm": 1.8510644435882568, "learning_rate": 4.356895445798589e-06, "loss": 0.8875, "step": 14130 }, { "epoch": 0.17226670566585012, "grad_norm": 1.9110969305038452, "learning_rate": 4.3565747273893525e-06, "loss": 0.9188, "step": 14135 }, { "epoch": 0.17232764188999794, "grad_norm": 1.8911986351013184, "learning_rate": 4.3562540089801155e-06, "loss": 0.8364, "step": 14140 }, { "epoch": 0.17238857811414574, "grad_norm": 2.3693106174468994, "learning_rate": 4.355933290570879e-06, "loss": 0.8855, "step": 14145 }, { "epoch": 0.17244951433829353, "grad_norm": 2.1403374671936035, "learning_rate": 4.355612572161642e-06, "loss": 0.8448, "step": 14150 }, { "epoch": 0.17251045056244135, "grad_norm": 2.011443853378296, "learning_rate": 4.355291853752405e-06, "loss": 0.9285, "step": 14155 }, { "epoch": 0.17257138678658915, "grad_norm": 1.8367629051208496, "learning_rate": 4.354971135343169e-06, "loss": 0.8671, "step": 14160 }, { "epoch": 0.17263232301073697, "grad_norm": 2.017282485961914, "learning_rate": 4.354650416933932e-06, "loss": 0.8333, "step": 14165 }, { "epoch": 0.17269325923488477, "grad_norm": 2.0861613750457764, "learning_rate": 4.354329698524696e-06, "loss": 0.837, "step": 14170 }, { "epoch": 0.1727541954590326, "grad_norm": 2.365079164505005, "learning_rate": 4.354008980115459e-06, "loss": 0.8522, "step": 14175 }, { "epoch": 0.17281513168318038, "grad_norm": 2.7288849353790283, "learning_rate": 4.353688261706222e-06, "loss": 0.9509, "step": 14180 }, { "epoch": 0.17287606790732818, "grad_norm": 2.1061816215515137, "learning_rate": 4.353367543296986e-06, "loss": 0.8434, "step": 14185 }, { "epoch": 0.172937004131476, "grad_norm": 1.9126254320144653, "learning_rate": 4.353046824887749e-06, "loss": 0.8988, "step": 14190 }, { "epoch": 0.1729979403556238, "grad_norm": 2.2560505867004395, "learning_rate": 4.352726106478512e-06, "loss": 0.8611, "step": 14195 }, { "epoch": 0.17305887657977162, "grad_norm": 1.958552360534668, "learning_rate": 4.352405388069276e-06, "loss": 0.9311, "step": 14200 }, { "epoch": 0.17311981280391942, "grad_norm": 2.0655441284179688, "learning_rate": 4.352084669660039e-06, "loss": 0.9335, "step": 14205 }, { "epoch": 0.17318074902806724, "grad_norm": 1.7794603109359741, "learning_rate": 4.351763951250802e-06, "loss": 0.8597, "step": 14210 }, { "epoch": 0.17324168525221503, "grad_norm": 2.955420970916748, "learning_rate": 4.351443232841566e-06, "loss": 0.9411, "step": 14215 }, { "epoch": 0.17330262147636283, "grad_norm": 1.9731462001800537, "learning_rate": 4.351122514432329e-06, "loss": 0.9234, "step": 14220 }, { "epoch": 0.17336355770051065, "grad_norm": 2.4114086627960205, "learning_rate": 4.350801796023092e-06, "loss": 0.913, "step": 14225 }, { "epoch": 0.17342449392465845, "grad_norm": 2.2737321853637695, "learning_rate": 4.350481077613856e-06, "loss": 0.8934, "step": 14230 }, { "epoch": 0.17348543014880627, "grad_norm": 2.0354461669921875, "learning_rate": 4.350160359204619e-06, "loss": 0.9336, "step": 14235 }, { "epoch": 0.17354636637295406, "grad_norm": 1.9676378965377808, "learning_rate": 4.349839640795382e-06, "loss": 0.9337, "step": 14240 }, { "epoch": 0.1736073025971019, "grad_norm": 1.998404622077942, "learning_rate": 4.349518922386146e-06, "loss": 0.8412, "step": 14245 }, { "epoch": 0.17366823882124968, "grad_norm": 1.7370575666427612, "learning_rate": 4.349198203976909e-06, "loss": 0.8533, "step": 14250 }, { "epoch": 0.17372917504539748, "grad_norm": 1.8772927522659302, "learning_rate": 4.348877485567672e-06, "loss": 0.9133, "step": 14255 }, { "epoch": 0.1737901112695453, "grad_norm": 2.2696785926818848, "learning_rate": 4.348556767158435e-06, "loss": 0.8653, "step": 14260 }, { "epoch": 0.1738510474936931, "grad_norm": 1.9606236219406128, "learning_rate": 4.3482360487491986e-06, "loss": 0.9152, "step": 14265 }, { "epoch": 0.17391198371784092, "grad_norm": 2.2864882946014404, "learning_rate": 4.347915330339962e-06, "loss": 0.9028, "step": 14270 }, { "epoch": 0.1739729199419887, "grad_norm": 2.1075172424316406, "learning_rate": 4.347594611930725e-06, "loss": 0.9272, "step": 14275 }, { "epoch": 0.1740338561661365, "grad_norm": 1.8517131805419922, "learning_rate": 4.3472738935214885e-06, "loss": 0.9657, "step": 14280 }, { "epoch": 0.17409479239028433, "grad_norm": 1.9164464473724365, "learning_rate": 4.3469531751122515e-06, "loss": 0.8788, "step": 14285 }, { "epoch": 0.17415572861443213, "grad_norm": 1.9762243032455444, "learning_rate": 4.346632456703015e-06, "loss": 0.9775, "step": 14290 }, { "epoch": 0.17421666483857995, "grad_norm": 1.8731495141983032, "learning_rate": 4.346311738293778e-06, "loss": 0.8308, "step": 14295 }, { "epoch": 0.17427760106272774, "grad_norm": 1.9297443628311157, "learning_rate": 4.345991019884541e-06, "loss": 0.8419, "step": 14300 }, { "epoch": 0.17433853728687557, "grad_norm": 2.079123020172119, "learning_rate": 4.345670301475305e-06, "loss": 1.003, "step": 14305 }, { "epoch": 0.17439947351102336, "grad_norm": 2.0157406330108643, "learning_rate": 4.345349583066068e-06, "loss": 0.9178, "step": 14310 }, { "epoch": 0.17446040973517116, "grad_norm": 1.952102541923523, "learning_rate": 4.345028864656832e-06, "loss": 0.8429, "step": 14315 }, { "epoch": 0.17452134595931898, "grad_norm": 2.205380439758301, "learning_rate": 4.344708146247595e-06, "loss": 0.8868, "step": 14320 }, { "epoch": 0.17458228218346677, "grad_norm": 2.4620718955993652, "learning_rate": 4.344387427838358e-06, "loss": 0.8718, "step": 14325 }, { "epoch": 0.1746432184076146, "grad_norm": 1.978450059890747, "learning_rate": 4.344066709429122e-06, "loss": 0.9123, "step": 14330 }, { "epoch": 0.1747041546317624, "grad_norm": 2.160489320755005, "learning_rate": 4.343745991019885e-06, "loss": 0.8386, "step": 14335 }, { "epoch": 0.17476509085591022, "grad_norm": 1.7539199590682983, "learning_rate": 4.343425272610648e-06, "loss": 0.8588, "step": 14340 }, { "epoch": 0.174826027080058, "grad_norm": 2.2105467319488525, "learning_rate": 4.343104554201412e-06, "loss": 0.9171, "step": 14345 }, { "epoch": 0.1748869633042058, "grad_norm": 2.0891835689544678, "learning_rate": 4.342783835792175e-06, "loss": 0.9336, "step": 14350 }, { "epoch": 0.17494789952835363, "grad_norm": 1.9178109169006348, "learning_rate": 4.342463117382938e-06, "loss": 0.9249, "step": 14355 }, { "epoch": 0.17500883575250142, "grad_norm": 1.975258708000183, "learning_rate": 4.342142398973702e-06, "loss": 0.8823, "step": 14360 }, { "epoch": 0.17506977197664925, "grad_norm": 2.0708956718444824, "learning_rate": 4.341821680564465e-06, "loss": 0.9019, "step": 14365 }, { "epoch": 0.17513070820079704, "grad_norm": 1.7663843631744385, "learning_rate": 4.341500962155228e-06, "loss": 0.8147, "step": 14370 }, { "epoch": 0.17519164442494486, "grad_norm": 2.1467528343200684, "learning_rate": 4.341180243745992e-06, "loss": 0.8865, "step": 14375 }, { "epoch": 0.17525258064909266, "grad_norm": 2.1210973262786865, "learning_rate": 4.340859525336755e-06, "loss": 0.9022, "step": 14380 }, { "epoch": 0.17531351687324045, "grad_norm": 1.9863368272781372, "learning_rate": 4.340538806927518e-06, "loss": 0.9301, "step": 14385 }, { "epoch": 0.17537445309738828, "grad_norm": 1.9741570949554443, "learning_rate": 4.340218088518282e-06, "loss": 0.8718, "step": 14390 }, { "epoch": 0.17543538932153607, "grad_norm": 2.0318284034729004, "learning_rate": 4.339897370109045e-06, "loss": 0.92, "step": 14395 }, { "epoch": 0.1754963255456839, "grad_norm": 1.8957422971725464, "learning_rate": 4.339576651699808e-06, "loss": 0.8944, "step": 14400 }, { "epoch": 0.1755572617698317, "grad_norm": 2.02280855178833, "learning_rate": 4.3392559332905715e-06, "loss": 0.8918, "step": 14405 }, { "epoch": 0.1756181979939795, "grad_norm": 2.165472984313965, "learning_rate": 4.3389352148813345e-06, "loss": 0.892, "step": 14410 }, { "epoch": 0.1756791342181273, "grad_norm": 1.6397271156311035, "learning_rate": 4.3386144964720975e-06, "loss": 0.9464, "step": 14415 }, { "epoch": 0.1757400704422751, "grad_norm": 1.8632709980010986, "learning_rate": 4.3382937780628606e-06, "loss": 0.9194, "step": 14420 }, { "epoch": 0.17580100666642293, "grad_norm": 1.8528393507003784, "learning_rate": 4.337973059653624e-06, "loss": 0.829, "step": 14425 }, { "epoch": 0.17586194289057072, "grad_norm": 1.7669662237167358, "learning_rate": 4.3376523412443874e-06, "loss": 0.8826, "step": 14430 }, { "epoch": 0.17592287911471854, "grad_norm": 1.932874321937561, "learning_rate": 4.3373316228351505e-06, "loss": 0.8367, "step": 14435 }, { "epoch": 0.17598381533886634, "grad_norm": 1.9690080881118774, "learning_rate": 4.337010904425914e-06, "loss": 0.8868, "step": 14440 }, { "epoch": 0.17604475156301416, "grad_norm": 2.6852571964263916, "learning_rate": 4.336690186016677e-06, "loss": 1.0036, "step": 14445 }, { "epoch": 0.17610568778716196, "grad_norm": 2.1251156330108643, "learning_rate": 4.336369467607441e-06, "loss": 0.813, "step": 14450 }, { "epoch": 0.17616662401130975, "grad_norm": 2.0698931217193604, "learning_rate": 4.336048749198204e-06, "loss": 0.8868, "step": 14455 }, { "epoch": 0.17622756023545758, "grad_norm": 2.3696975708007812, "learning_rate": 4.335728030788967e-06, "loss": 0.8961, "step": 14460 }, { "epoch": 0.17628849645960537, "grad_norm": 1.9221371412277222, "learning_rate": 4.335407312379731e-06, "loss": 0.9115, "step": 14465 }, { "epoch": 0.1763494326837532, "grad_norm": 1.7491161823272705, "learning_rate": 4.335086593970494e-06, "loss": 0.8826, "step": 14470 }, { "epoch": 0.176410368907901, "grad_norm": 2.103306293487549, "learning_rate": 4.334765875561258e-06, "loss": 0.9061, "step": 14475 }, { "epoch": 0.1764713051320488, "grad_norm": 2.4413397312164307, "learning_rate": 4.334445157152021e-06, "loss": 0.9281, "step": 14480 }, { "epoch": 0.1765322413561966, "grad_norm": 2.0019919872283936, "learning_rate": 4.334124438742785e-06, "loss": 0.8607, "step": 14485 }, { "epoch": 0.1765931775803444, "grad_norm": 1.9539098739624023, "learning_rate": 4.333803720333548e-06, "loss": 0.8843, "step": 14490 }, { "epoch": 0.17665411380449222, "grad_norm": 2.02756404876709, "learning_rate": 4.333483001924311e-06, "loss": 0.8719, "step": 14495 }, { "epoch": 0.17671505002864002, "grad_norm": 2.3664133548736572, "learning_rate": 4.333162283515075e-06, "loss": 0.8583, "step": 14500 }, { "epoch": 0.17677598625278784, "grad_norm": 2.516251564025879, "learning_rate": 4.332841565105838e-06, "loss": 0.8419, "step": 14505 }, { "epoch": 0.17683692247693564, "grad_norm": 1.9678095579147339, "learning_rate": 4.332520846696601e-06, "loss": 0.8852, "step": 14510 }, { "epoch": 0.17689785870108343, "grad_norm": 2.010878801345825, "learning_rate": 4.332200128287364e-06, "loss": 0.9016, "step": 14515 }, { "epoch": 0.17695879492523126, "grad_norm": 1.9591630697250366, "learning_rate": 4.331879409878128e-06, "loss": 0.8955, "step": 14520 }, { "epoch": 0.17701973114937905, "grad_norm": 2.053053379058838, "learning_rate": 4.331558691468891e-06, "loss": 0.8945, "step": 14525 }, { "epoch": 0.17708066737352687, "grad_norm": 1.944228172302246, "learning_rate": 4.331237973059654e-06, "loss": 0.9589, "step": 14530 }, { "epoch": 0.17714160359767467, "grad_norm": 1.8834010362625122, "learning_rate": 4.3309172546504176e-06, "loss": 0.8761, "step": 14535 }, { "epoch": 0.1772025398218225, "grad_norm": 1.868661880493164, "learning_rate": 4.330596536241181e-06, "loss": 0.9407, "step": 14540 }, { "epoch": 0.1772634760459703, "grad_norm": 2.271409034729004, "learning_rate": 4.330275817831944e-06, "loss": 0.9245, "step": 14545 }, { "epoch": 0.17732441227011808, "grad_norm": 1.779613971710205, "learning_rate": 4.3299550994227075e-06, "loss": 0.8931, "step": 14550 }, { "epoch": 0.1773853484942659, "grad_norm": 2.229430913925171, "learning_rate": 4.3296343810134705e-06, "loss": 0.9219, "step": 14555 }, { "epoch": 0.1774462847184137, "grad_norm": 1.9095301628112793, "learning_rate": 4.3293136626042335e-06, "loss": 0.9085, "step": 14560 }, { "epoch": 0.17750722094256152, "grad_norm": 1.8734947443008423, "learning_rate": 4.328992944194997e-06, "loss": 0.9372, "step": 14565 }, { "epoch": 0.17756815716670932, "grad_norm": 1.9450660943984985, "learning_rate": 4.32867222578576e-06, "loss": 0.9585, "step": 14570 }, { "epoch": 0.17762909339085714, "grad_norm": 1.9455146789550781, "learning_rate": 4.328351507376523e-06, "loss": 0.9257, "step": 14575 }, { "epoch": 0.17769002961500494, "grad_norm": 2.056993007659912, "learning_rate": 4.328030788967287e-06, "loss": 0.8955, "step": 14580 }, { "epoch": 0.17775096583915273, "grad_norm": 1.6649887561798096, "learning_rate": 4.32771007055805e-06, "loss": 0.8198, "step": 14585 }, { "epoch": 0.17781190206330055, "grad_norm": 1.787423849105835, "learning_rate": 4.327389352148813e-06, "loss": 0.8956, "step": 14590 }, { "epoch": 0.17787283828744835, "grad_norm": 2.0357754230499268, "learning_rate": 4.327068633739577e-06, "loss": 0.9307, "step": 14595 }, { "epoch": 0.17793377451159617, "grad_norm": 1.9176276922225952, "learning_rate": 4.32674791533034e-06, "loss": 0.9398, "step": 14600 }, { "epoch": 0.17799471073574397, "grad_norm": 1.7949891090393066, "learning_rate": 4.326427196921103e-06, "loss": 0.8783, "step": 14605 }, { "epoch": 0.1780556469598918, "grad_norm": 2.145665168762207, "learning_rate": 4.326106478511867e-06, "loss": 0.9528, "step": 14610 }, { "epoch": 0.17811658318403958, "grad_norm": 2.1117866039276123, "learning_rate": 4.32578576010263e-06, "loss": 0.8728, "step": 14615 }, { "epoch": 0.17817751940818738, "grad_norm": 1.9585868120193481, "learning_rate": 4.325465041693394e-06, "loss": 0.8505, "step": 14620 }, { "epoch": 0.1782384556323352, "grad_norm": 1.8774253129959106, "learning_rate": 4.325144323284157e-06, "loss": 0.8838, "step": 14625 }, { "epoch": 0.178299391856483, "grad_norm": 1.808071255683899, "learning_rate": 4.32482360487492e-06, "loss": 0.8844, "step": 14630 }, { "epoch": 0.17836032808063082, "grad_norm": 1.9681625366210938, "learning_rate": 4.324502886465684e-06, "loss": 0.8695, "step": 14635 }, { "epoch": 0.17842126430477862, "grad_norm": 2.031118869781494, "learning_rate": 4.324182168056447e-06, "loss": 0.8598, "step": 14640 }, { "epoch": 0.17848220052892644, "grad_norm": 2.479965925216675, "learning_rate": 4.323861449647211e-06, "loss": 0.8534, "step": 14645 }, { "epoch": 0.17854313675307423, "grad_norm": 1.979653000831604, "learning_rate": 4.323540731237974e-06, "loss": 0.9055, "step": 14650 }, { "epoch": 0.17860407297722203, "grad_norm": 1.9238613843917847, "learning_rate": 4.323220012828737e-06, "loss": 0.9277, "step": 14655 }, { "epoch": 0.17866500920136985, "grad_norm": 1.886468768119812, "learning_rate": 4.322899294419501e-06, "loss": 0.875, "step": 14660 }, { "epoch": 0.17872594542551765, "grad_norm": 2.339061975479126, "learning_rate": 4.322578576010264e-06, "loss": 0.8821, "step": 14665 }, { "epoch": 0.17878688164966547, "grad_norm": 2.2038562297821045, "learning_rate": 4.322257857601027e-06, "loss": 0.8493, "step": 14670 }, { "epoch": 0.17884781787381326, "grad_norm": 1.9703911542892456, "learning_rate": 4.32193713919179e-06, "loss": 0.8888, "step": 14675 }, { "epoch": 0.1789087540979611, "grad_norm": 2.142158269882202, "learning_rate": 4.3216164207825535e-06, "loss": 0.9105, "step": 14680 }, { "epoch": 0.17896969032210888, "grad_norm": 1.84000825881958, "learning_rate": 4.3212957023733165e-06, "loss": 0.8956, "step": 14685 }, { "epoch": 0.17903062654625668, "grad_norm": 1.8024951219558716, "learning_rate": 4.3209749839640796e-06, "loss": 0.9017, "step": 14690 }, { "epoch": 0.1790915627704045, "grad_norm": 2.1812286376953125, "learning_rate": 4.320654265554843e-06, "loss": 0.9021, "step": 14695 }, { "epoch": 0.1791524989945523, "grad_norm": 1.7620368003845215, "learning_rate": 4.3203335471456064e-06, "loss": 0.9622, "step": 14700 }, { "epoch": 0.17921343521870012, "grad_norm": 2.031473159790039, "learning_rate": 4.3200128287363695e-06, "loss": 0.9194, "step": 14705 }, { "epoch": 0.1792743714428479, "grad_norm": 1.6654106378555298, "learning_rate": 4.319692110327133e-06, "loss": 0.8506, "step": 14710 }, { "epoch": 0.17933530766699574, "grad_norm": 2.427762985229492, "learning_rate": 4.319371391917896e-06, "loss": 0.8961, "step": 14715 }, { "epoch": 0.17939624389114353, "grad_norm": 2.107050657272339, "learning_rate": 4.319050673508659e-06, "loss": 0.9716, "step": 14720 }, { "epoch": 0.17945718011529133, "grad_norm": 1.9646897315979004, "learning_rate": 4.318729955099423e-06, "loss": 0.9219, "step": 14725 }, { "epoch": 0.17951811633943915, "grad_norm": 2.186936855316162, "learning_rate": 4.318409236690186e-06, "loss": 0.9286, "step": 14730 }, { "epoch": 0.17957905256358694, "grad_norm": 1.9354956150054932, "learning_rate": 4.318088518280949e-06, "loss": 0.92, "step": 14735 }, { "epoch": 0.17963998878773477, "grad_norm": 2.356508731842041, "learning_rate": 4.317767799871713e-06, "loss": 0.8268, "step": 14740 }, { "epoch": 0.17970092501188256, "grad_norm": 1.7601722478866577, "learning_rate": 4.317447081462476e-06, "loss": 0.8728, "step": 14745 }, { "epoch": 0.17976186123603036, "grad_norm": 1.7786712646484375, "learning_rate": 4.317126363053239e-06, "loss": 0.9211, "step": 14750 }, { "epoch": 0.17982279746017818, "grad_norm": 1.7633188962936401, "learning_rate": 4.316805644644003e-06, "loss": 0.8385, "step": 14755 }, { "epoch": 0.17988373368432597, "grad_norm": 2.523242473602295, "learning_rate": 4.316484926234766e-06, "loss": 0.8978, "step": 14760 }, { "epoch": 0.1799446699084738, "grad_norm": 1.9406375885009766, "learning_rate": 4.31616420782553e-06, "loss": 0.8636, "step": 14765 }, { "epoch": 0.1800056061326216, "grad_norm": 2.07322096824646, "learning_rate": 4.315843489416293e-06, "loss": 0.8854, "step": 14770 }, { "epoch": 0.18006654235676942, "grad_norm": 2.225471019744873, "learning_rate": 4.315522771007056e-06, "loss": 0.861, "step": 14775 }, { "epoch": 0.1801274785809172, "grad_norm": 2.188244581222534, "learning_rate": 4.31520205259782e-06, "loss": 0.9498, "step": 14780 }, { "epoch": 0.180188414805065, "grad_norm": 2.122292995452881, "learning_rate": 4.314881334188583e-06, "loss": 0.8242, "step": 14785 }, { "epoch": 0.18024935102921283, "grad_norm": 2.234459400177002, "learning_rate": 4.314560615779347e-06, "loss": 0.8657, "step": 14790 }, { "epoch": 0.18031028725336062, "grad_norm": 1.9161391258239746, "learning_rate": 4.31423989737011e-06, "loss": 0.9677, "step": 14795 }, { "epoch": 0.18037122347750845, "grad_norm": 1.9347214698791504, "learning_rate": 4.313919178960873e-06, "loss": 0.938, "step": 14800 }, { "epoch": 0.18043215970165624, "grad_norm": 1.8702086210250854, "learning_rate": 4.3135984605516366e-06, "loss": 0.91, "step": 14805 }, { "epoch": 0.18049309592580406, "grad_norm": 1.867430329322815, "learning_rate": 4.3132777421424e-06, "loss": 0.8804, "step": 14810 }, { "epoch": 0.18055403214995186, "grad_norm": 1.7045809030532837, "learning_rate": 4.312957023733163e-06, "loss": 0.8443, "step": 14815 }, { "epoch": 0.18061496837409965, "grad_norm": 1.9513862133026123, "learning_rate": 4.3126363053239265e-06, "loss": 0.8798, "step": 14820 }, { "epoch": 0.18067590459824748, "grad_norm": 1.752058744430542, "learning_rate": 4.3123155869146895e-06, "loss": 0.9329, "step": 14825 }, { "epoch": 0.18073684082239527, "grad_norm": 2.197171211242676, "learning_rate": 4.3119948685054525e-06, "loss": 0.9903, "step": 14830 }, { "epoch": 0.1807977770465431, "grad_norm": 2.2143502235412598, "learning_rate": 4.311674150096216e-06, "loss": 0.9103, "step": 14835 }, { "epoch": 0.1808587132706909, "grad_norm": 1.8413978815078735, "learning_rate": 4.311353431686979e-06, "loss": 0.805, "step": 14840 }, { "epoch": 0.1809196494948387, "grad_norm": 2.1181278228759766, "learning_rate": 4.311032713277742e-06, "loss": 0.9205, "step": 14845 }, { "epoch": 0.1809805857189865, "grad_norm": 1.615478277206421, "learning_rate": 4.310711994868505e-06, "loss": 0.9069, "step": 14850 }, { "epoch": 0.1810415219431343, "grad_norm": 2.0645241737365723, "learning_rate": 4.310391276459269e-06, "loss": 0.9641, "step": 14855 }, { "epoch": 0.18110245816728213, "grad_norm": 1.740425944328308, "learning_rate": 4.310070558050032e-06, "loss": 0.8889, "step": 14860 }, { "epoch": 0.18116339439142992, "grad_norm": 2.057769536972046, "learning_rate": 4.309749839640795e-06, "loss": 0.8962, "step": 14865 }, { "epoch": 0.18122433061557774, "grad_norm": 1.7522175312042236, "learning_rate": 4.309429121231559e-06, "loss": 0.8411, "step": 14870 }, { "epoch": 0.18128526683972554, "grad_norm": 2.1883745193481445, "learning_rate": 4.309108402822322e-06, "loss": 0.8758, "step": 14875 }, { "epoch": 0.18134620306387336, "grad_norm": 1.741492509841919, "learning_rate": 4.308787684413085e-06, "loss": 0.8723, "step": 14880 }, { "epoch": 0.18140713928802116, "grad_norm": 1.895702600479126, "learning_rate": 4.308466966003849e-06, "loss": 0.9277, "step": 14885 }, { "epoch": 0.18146807551216895, "grad_norm": 1.9233248233795166, "learning_rate": 4.308146247594612e-06, "loss": 0.8678, "step": 14890 }, { "epoch": 0.18152901173631678, "grad_norm": 1.8231428861618042, "learning_rate": 4.307825529185375e-06, "loss": 0.8728, "step": 14895 }, { "epoch": 0.18158994796046457, "grad_norm": 1.7242472171783447, "learning_rate": 4.307504810776139e-06, "loss": 0.8849, "step": 14900 }, { "epoch": 0.1816508841846124, "grad_norm": 1.8206301927566528, "learning_rate": 4.307184092366902e-06, "loss": 0.8746, "step": 14905 }, { "epoch": 0.1817118204087602, "grad_norm": 1.9864470958709717, "learning_rate": 4.306863373957665e-06, "loss": 0.8189, "step": 14910 }, { "epoch": 0.181772756632908, "grad_norm": 2.108426332473755, "learning_rate": 4.306542655548429e-06, "loss": 0.9842, "step": 14915 }, { "epoch": 0.1818336928570558, "grad_norm": 2.219839096069336, "learning_rate": 4.306221937139192e-06, "loss": 0.915, "step": 14920 }, { "epoch": 0.1818946290812036, "grad_norm": 1.6540675163269043, "learning_rate": 4.305901218729956e-06, "loss": 0.7827, "step": 14925 }, { "epoch": 0.18195556530535142, "grad_norm": 2.449984073638916, "learning_rate": 4.305580500320719e-06, "loss": 0.8813, "step": 14930 }, { "epoch": 0.18201650152949922, "grad_norm": 1.8998464345932007, "learning_rate": 4.305259781911482e-06, "loss": 0.8781, "step": 14935 }, { "epoch": 0.18207743775364704, "grad_norm": 1.9023500680923462, "learning_rate": 4.304939063502246e-06, "loss": 0.9202, "step": 14940 }, { "epoch": 0.18213837397779484, "grad_norm": 1.9206222295761108, "learning_rate": 4.304618345093009e-06, "loss": 0.8934, "step": 14945 }, { "epoch": 0.18219931020194266, "grad_norm": 2.051295280456543, "learning_rate": 4.3042976266837725e-06, "loss": 0.8516, "step": 14950 }, { "epoch": 0.18226024642609046, "grad_norm": 2.137916326522827, "learning_rate": 4.3039769082745355e-06, "loss": 0.8182, "step": 14955 }, { "epoch": 0.18232118265023825, "grad_norm": 1.7612318992614746, "learning_rate": 4.3036561898652986e-06, "loss": 0.8719, "step": 14960 }, { "epoch": 0.18238211887438607, "grad_norm": 2.11143159866333, "learning_rate": 4.303335471456062e-06, "loss": 0.907, "step": 14965 }, { "epoch": 0.18244305509853387, "grad_norm": 2.0788824558258057, "learning_rate": 4.3030147530468254e-06, "loss": 0.9131, "step": 14970 }, { "epoch": 0.1825039913226817, "grad_norm": 1.891683578491211, "learning_rate": 4.3026940346375885e-06, "loss": 0.9353, "step": 14975 }, { "epoch": 0.1825649275468295, "grad_norm": 2.323380708694458, "learning_rate": 4.302373316228352e-06, "loss": 0.9403, "step": 14980 }, { "epoch": 0.18262586377097728, "grad_norm": 1.955621600151062, "learning_rate": 4.302052597819115e-06, "loss": 0.9068, "step": 14985 }, { "epoch": 0.1826867999951251, "grad_norm": 1.7773339748382568, "learning_rate": 4.301731879409878e-06, "loss": 0.9466, "step": 14990 }, { "epoch": 0.1827477362192729, "grad_norm": 2.295210361480713, "learning_rate": 4.301411161000642e-06, "loss": 0.8405, "step": 14995 }, { "epoch": 0.18280867244342072, "grad_norm": 2.0314652919769287, "learning_rate": 4.301090442591405e-06, "loss": 0.894, "step": 15000 }, { "epoch": 0.18286960866756852, "grad_norm": 1.7317439317703247, "learning_rate": 4.300769724182168e-06, "loss": 0.9037, "step": 15005 }, { "epoch": 0.18293054489171634, "grad_norm": 2.0017929077148438, "learning_rate": 4.300449005772931e-06, "loss": 0.928, "step": 15010 }, { "epoch": 0.18299148111586414, "grad_norm": 2.0356454849243164, "learning_rate": 4.300128287363695e-06, "loss": 1.0187, "step": 15015 }, { "epoch": 0.18305241734001193, "grad_norm": 1.69033944606781, "learning_rate": 4.299807568954458e-06, "loss": 0.913, "step": 15020 }, { "epoch": 0.18311335356415975, "grad_norm": 1.7849336862564087, "learning_rate": 4.299486850545221e-06, "loss": 0.8783, "step": 15025 }, { "epoch": 0.18317428978830755, "grad_norm": 1.7225795984268188, "learning_rate": 4.299166132135985e-06, "loss": 0.8605, "step": 15030 }, { "epoch": 0.18323522601245537, "grad_norm": 2.283296823501587, "learning_rate": 4.298845413726748e-06, "loss": 0.8868, "step": 15035 }, { "epoch": 0.18329616223660317, "grad_norm": 2.3946120738983154, "learning_rate": 4.298524695317511e-06, "loss": 0.8624, "step": 15040 }, { "epoch": 0.183357098460751, "grad_norm": 1.9854674339294434, "learning_rate": 4.298203976908275e-06, "loss": 0.8867, "step": 15045 }, { "epoch": 0.18341803468489878, "grad_norm": 2.365447521209717, "learning_rate": 4.297883258499038e-06, "loss": 0.8723, "step": 15050 }, { "epoch": 0.18347897090904658, "grad_norm": 2.0803098678588867, "learning_rate": 4.297562540089801e-06, "loss": 0.9165, "step": 15055 }, { "epoch": 0.1835399071331944, "grad_norm": 1.8879244327545166, "learning_rate": 4.297241821680565e-06, "loss": 0.9537, "step": 15060 }, { "epoch": 0.1836008433573422, "grad_norm": 2.8506112098693848, "learning_rate": 4.296921103271328e-06, "loss": 0.9289, "step": 15065 }, { "epoch": 0.18366177958149002, "grad_norm": 1.7924939393997192, "learning_rate": 4.296600384862092e-06, "loss": 0.9257, "step": 15070 }, { "epoch": 0.18372271580563782, "grad_norm": 1.9780001640319824, "learning_rate": 4.296279666452855e-06, "loss": 0.9147, "step": 15075 }, { "epoch": 0.18378365202978564, "grad_norm": 2.1615371704101562, "learning_rate": 4.295958948043618e-06, "loss": 0.9148, "step": 15080 }, { "epoch": 0.18384458825393343, "grad_norm": 1.8602944612503052, "learning_rate": 4.295638229634382e-06, "loss": 0.8678, "step": 15085 }, { "epoch": 0.18390552447808123, "grad_norm": 1.9754213094711304, "learning_rate": 4.295317511225145e-06, "loss": 0.9074, "step": 15090 }, { "epoch": 0.18396646070222905, "grad_norm": 1.7699999809265137, "learning_rate": 4.2949967928159085e-06, "loss": 0.9361, "step": 15095 }, { "epoch": 0.18402739692637685, "grad_norm": 2.28855562210083, "learning_rate": 4.2946760744066715e-06, "loss": 0.9667, "step": 15100 }, { "epoch": 0.18408833315052467, "grad_norm": 1.8336517810821533, "learning_rate": 4.2943553559974345e-06, "loss": 0.9226, "step": 15105 }, { "epoch": 0.18414926937467246, "grad_norm": 1.9420268535614014, "learning_rate": 4.294034637588198e-06, "loss": 0.8568, "step": 15110 }, { "epoch": 0.1842102055988203, "grad_norm": 2.1669976711273193, "learning_rate": 4.293713919178961e-06, "loss": 0.9146, "step": 15115 }, { "epoch": 0.18427114182296808, "grad_norm": 2.0487282276153564, "learning_rate": 4.293393200769724e-06, "loss": 0.9574, "step": 15120 }, { "epoch": 0.18433207804711588, "grad_norm": 1.9385652542114258, "learning_rate": 4.293072482360488e-06, "loss": 0.9328, "step": 15125 }, { "epoch": 0.1843930142712637, "grad_norm": 1.886169195175171, "learning_rate": 4.292751763951251e-06, "loss": 0.8219, "step": 15130 }, { "epoch": 0.1844539504954115, "grad_norm": 1.8105188608169556, "learning_rate": 4.292431045542014e-06, "loss": 0.886, "step": 15135 }, { "epoch": 0.18451488671955932, "grad_norm": 1.8517651557922363, "learning_rate": 4.292110327132778e-06, "loss": 0.8934, "step": 15140 }, { "epoch": 0.1845758229437071, "grad_norm": 2.0694193840026855, "learning_rate": 4.291789608723541e-06, "loss": 0.854, "step": 15145 }, { "epoch": 0.18463675916785494, "grad_norm": 2.031045436859131, "learning_rate": 4.291468890314304e-06, "loss": 0.9145, "step": 15150 }, { "epoch": 0.18469769539200273, "grad_norm": 2.0986313819885254, "learning_rate": 4.291148171905068e-06, "loss": 0.8649, "step": 15155 }, { "epoch": 0.18475863161615053, "grad_norm": 2.446051836013794, "learning_rate": 4.290827453495831e-06, "loss": 0.9076, "step": 15160 }, { "epoch": 0.18481956784029835, "grad_norm": 1.6574991941452026, "learning_rate": 4.290506735086594e-06, "loss": 0.865, "step": 15165 }, { "epoch": 0.18488050406444614, "grad_norm": 1.9615546464920044, "learning_rate": 4.290186016677358e-06, "loss": 0.9808, "step": 15170 }, { "epoch": 0.18494144028859397, "grad_norm": 2.1075387001037598, "learning_rate": 4.289865298268121e-06, "loss": 0.8909, "step": 15175 }, { "epoch": 0.18500237651274176, "grad_norm": 1.946228265762329, "learning_rate": 4.289544579858884e-06, "loss": 0.9022, "step": 15180 }, { "epoch": 0.18506331273688958, "grad_norm": 1.821866512298584, "learning_rate": 4.289223861449647e-06, "loss": 0.8908, "step": 15185 }, { "epoch": 0.18512424896103738, "grad_norm": 3.0509347915649414, "learning_rate": 4.288903143040411e-06, "loss": 0.8858, "step": 15190 }, { "epoch": 0.18518518518518517, "grad_norm": 2.140742778778076, "learning_rate": 4.288582424631174e-06, "loss": 0.9257, "step": 15195 }, { "epoch": 0.185246121409333, "grad_norm": 1.9072132110595703, "learning_rate": 4.288261706221937e-06, "loss": 0.8876, "step": 15200 }, { "epoch": 0.1853070576334808, "grad_norm": 2.0399153232574463, "learning_rate": 4.287940987812701e-06, "loss": 0.9408, "step": 15205 }, { "epoch": 0.18536799385762862, "grad_norm": 2.139413595199585, "learning_rate": 4.287620269403464e-06, "loss": 0.8692, "step": 15210 }, { "epoch": 0.1854289300817764, "grad_norm": 2.1020193099975586, "learning_rate": 4.287299550994227e-06, "loss": 0.8505, "step": 15215 }, { "epoch": 0.1854898663059242, "grad_norm": 2.083585500717163, "learning_rate": 4.286978832584991e-06, "loss": 0.8992, "step": 15220 }, { "epoch": 0.18555080253007203, "grad_norm": 1.8806418180465698, "learning_rate": 4.286658114175754e-06, "loss": 0.8677, "step": 15225 }, { "epoch": 0.18561173875421982, "grad_norm": 1.892156720161438, "learning_rate": 4.2863373957665176e-06, "loss": 0.8279, "step": 15230 }, { "epoch": 0.18567267497836765, "grad_norm": 1.8647702932357788, "learning_rate": 4.2860166773572806e-06, "loss": 0.8598, "step": 15235 }, { "epoch": 0.18573361120251544, "grad_norm": 1.8974238634109497, "learning_rate": 4.2856959589480444e-06, "loss": 0.8217, "step": 15240 }, { "epoch": 0.18579454742666326, "grad_norm": 1.7513571977615356, "learning_rate": 4.2853752405388075e-06, "loss": 0.9442, "step": 15245 }, { "epoch": 0.18585548365081106, "grad_norm": 1.6966129541397095, "learning_rate": 4.2850545221295705e-06, "loss": 0.8455, "step": 15250 }, { "epoch": 0.18591641987495885, "grad_norm": 1.8862009048461914, "learning_rate": 4.284733803720334e-06, "loss": 0.8952, "step": 15255 }, { "epoch": 0.18597735609910668, "grad_norm": 2.2231805324554443, "learning_rate": 4.284413085311097e-06, "loss": 0.9149, "step": 15260 }, { "epoch": 0.18603829232325447, "grad_norm": 2.047020196914673, "learning_rate": 4.28409236690186e-06, "loss": 0.9764, "step": 15265 }, { "epoch": 0.1860992285474023, "grad_norm": 1.8376377820968628, "learning_rate": 4.283771648492624e-06, "loss": 0.8505, "step": 15270 }, { "epoch": 0.1861601647715501, "grad_norm": 1.7484675645828247, "learning_rate": 4.283450930083387e-06, "loss": 0.8853, "step": 15275 }, { "epoch": 0.1862211009956979, "grad_norm": 2.13604736328125, "learning_rate": 4.28313021167415e-06, "loss": 0.9194, "step": 15280 }, { "epoch": 0.1862820372198457, "grad_norm": 2.1782937049865723, "learning_rate": 4.282809493264914e-06, "loss": 0.857, "step": 15285 }, { "epoch": 0.1863429734439935, "grad_norm": 2.021268606185913, "learning_rate": 4.282488774855677e-06, "loss": 0.9259, "step": 15290 }, { "epoch": 0.18640390966814133, "grad_norm": 2.005920886993408, "learning_rate": 4.28216805644644e-06, "loss": 0.8725, "step": 15295 }, { "epoch": 0.18646484589228912, "grad_norm": 2.4690420627593994, "learning_rate": 4.281847338037204e-06, "loss": 0.9165, "step": 15300 }, { "epoch": 0.18652578211643694, "grad_norm": 2.178152561187744, "learning_rate": 4.281526619627967e-06, "loss": 0.8502, "step": 15305 }, { "epoch": 0.18658671834058474, "grad_norm": 2.367223024368286, "learning_rate": 4.28120590121873e-06, "loss": 0.8931, "step": 15310 }, { "epoch": 0.18664765456473256, "grad_norm": 1.9651942253112793, "learning_rate": 4.280885182809494e-06, "loss": 0.829, "step": 15315 }, { "epoch": 0.18670859078888036, "grad_norm": 2.3055596351623535, "learning_rate": 4.280564464400257e-06, "loss": 1.008, "step": 15320 }, { "epoch": 0.18676952701302815, "grad_norm": 1.9129782915115356, "learning_rate": 4.28024374599102e-06, "loss": 0.9215, "step": 15325 }, { "epoch": 0.18683046323717598, "grad_norm": 2.0441105365753174, "learning_rate": 4.279923027581784e-06, "loss": 0.9302, "step": 15330 }, { "epoch": 0.18689139946132377, "grad_norm": 2.165292978286743, "learning_rate": 4.279602309172547e-06, "loss": 0.9534, "step": 15335 }, { "epoch": 0.1869523356854716, "grad_norm": 1.8325599431991577, "learning_rate": 4.27928159076331e-06, "loss": 0.8444, "step": 15340 }, { "epoch": 0.1870132719096194, "grad_norm": 2.373347043991089, "learning_rate": 4.278960872354073e-06, "loss": 0.8866, "step": 15345 }, { "epoch": 0.1870742081337672, "grad_norm": 2.110417127609253, "learning_rate": 4.278640153944837e-06, "loss": 0.8666, "step": 15350 }, { "epoch": 0.187135144357915, "grad_norm": 1.7566354274749756, "learning_rate": 4.2783194355356e-06, "loss": 0.9307, "step": 15355 }, { "epoch": 0.1871960805820628, "grad_norm": 1.9804730415344238, "learning_rate": 4.277998717126363e-06, "loss": 0.8588, "step": 15360 }, { "epoch": 0.18725701680621062, "grad_norm": 2.032674551010132, "learning_rate": 4.277677998717127e-06, "loss": 0.9598, "step": 15365 }, { "epoch": 0.18731795303035842, "grad_norm": 1.9853638410568237, "learning_rate": 4.27735728030789e-06, "loss": 0.9849, "step": 15370 }, { "epoch": 0.18737888925450624, "grad_norm": 1.9689977169036865, "learning_rate": 4.2770365618986535e-06, "loss": 0.902, "step": 15375 }, { "epoch": 0.18743982547865404, "grad_norm": 1.8995447158813477, "learning_rate": 4.2767158434894165e-06, "loss": 0.8572, "step": 15380 }, { "epoch": 0.18750076170280186, "grad_norm": 2.0974202156066895, "learning_rate": 4.2763951250801795e-06, "loss": 0.8405, "step": 15385 }, { "epoch": 0.18756169792694966, "grad_norm": 1.9211488962173462, "learning_rate": 4.276074406670943e-06, "loss": 0.8835, "step": 15390 }, { "epoch": 0.18762263415109745, "grad_norm": 2.0042269229888916, "learning_rate": 4.2757536882617064e-06, "loss": 0.8741, "step": 15395 }, { "epoch": 0.18768357037524527, "grad_norm": 1.8740653991699219, "learning_rate": 4.27543296985247e-06, "loss": 0.8782, "step": 15400 }, { "epoch": 0.18774450659939307, "grad_norm": 1.782461166381836, "learning_rate": 4.275112251443233e-06, "loss": 0.9189, "step": 15405 }, { "epoch": 0.1878054428235409, "grad_norm": 2.045964002609253, "learning_rate": 4.274791533033996e-06, "loss": 0.8821, "step": 15410 }, { "epoch": 0.1878663790476887, "grad_norm": 1.9369924068450928, "learning_rate": 4.27447081462476e-06, "loss": 0.9371, "step": 15415 }, { "epoch": 0.1879273152718365, "grad_norm": 1.9806880950927734, "learning_rate": 4.274150096215523e-06, "loss": 0.9034, "step": 15420 }, { "epoch": 0.1879882514959843, "grad_norm": 2.0968966484069824, "learning_rate": 4.273829377806287e-06, "loss": 0.8819, "step": 15425 }, { "epoch": 0.1880491877201321, "grad_norm": 2.9236109256744385, "learning_rate": 4.27350865939705e-06, "loss": 0.8791, "step": 15430 }, { "epoch": 0.18811012394427992, "grad_norm": 2.0112764835357666, "learning_rate": 4.273187940987813e-06, "loss": 0.9037, "step": 15435 }, { "epoch": 0.18817106016842772, "grad_norm": 1.8972556591033936, "learning_rate": 4.272867222578576e-06, "loss": 0.8536, "step": 15440 }, { "epoch": 0.18823199639257554, "grad_norm": 1.918446660041809, "learning_rate": 4.27254650416934e-06, "loss": 0.9153, "step": 15445 }, { "epoch": 0.18829293261672334, "grad_norm": 2.0677361488342285, "learning_rate": 4.272225785760103e-06, "loss": 0.8606, "step": 15450 }, { "epoch": 0.18835386884087116, "grad_norm": 2.634427547454834, "learning_rate": 4.271905067350866e-06, "loss": 0.8659, "step": 15455 }, { "epoch": 0.18841480506501895, "grad_norm": 1.6701850891113281, "learning_rate": 4.27158434894163e-06, "loss": 0.8449, "step": 15460 }, { "epoch": 0.18847574128916675, "grad_norm": 1.938733696937561, "learning_rate": 4.271263630532393e-06, "loss": 0.8413, "step": 15465 }, { "epoch": 0.18853667751331457, "grad_norm": 1.6934723854064941, "learning_rate": 4.270942912123156e-06, "loss": 0.9031, "step": 15470 }, { "epoch": 0.18859761373746237, "grad_norm": 2.097848892211914, "learning_rate": 4.27062219371392e-06, "loss": 0.8763, "step": 15475 }, { "epoch": 0.1886585499616102, "grad_norm": 1.868210792541504, "learning_rate": 4.270301475304683e-06, "loss": 0.8995, "step": 15480 }, { "epoch": 0.18871948618575798, "grad_norm": 2.219204902648926, "learning_rate": 4.269980756895446e-06, "loss": 0.8673, "step": 15485 }, { "epoch": 0.18878042240990578, "grad_norm": 1.970322608947754, "learning_rate": 4.26966003848621e-06, "loss": 0.8709, "step": 15490 }, { "epoch": 0.1888413586340536, "grad_norm": 2.065279245376587, "learning_rate": 4.269339320076973e-06, "loss": 0.9184, "step": 15495 }, { "epoch": 0.1889022948582014, "grad_norm": 1.989156723022461, "learning_rate": 4.269018601667736e-06, "loss": 0.892, "step": 15500 }, { "epoch": 0.18896323108234922, "grad_norm": 1.973073959350586, "learning_rate": 4.2686978832584996e-06, "loss": 0.8, "step": 15505 }, { "epoch": 0.18902416730649702, "grad_norm": 1.7849562168121338, "learning_rate": 4.268377164849263e-06, "loss": 0.9024, "step": 15510 }, { "epoch": 0.18908510353064484, "grad_norm": 2.050380229949951, "learning_rate": 4.268056446440026e-06, "loss": 0.8983, "step": 15515 }, { "epoch": 0.18914603975479263, "grad_norm": 2.0512168407440186, "learning_rate": 4.2677357280307895e-06, "loss": 0.9007, "step": 15520 }, { "epoch": 0.18920697597894043, "grad_norm": 2.1395797729492188, "learning_rate": 4.2674150096215525e-06, "loss": 0.8369, "step": 15525 }, { "epoch": 0.18926791220308825, "grad_norm": 1.888840675354004, "learning_rate": 4.2670942912123155e-06, "loss": 0.891, "step": 15530 }, { "epoch": 0.18932884842723605, "grad_norm": 1.9211053848266602, "learning_rate": 4.266773572803079e-06, "loss": 0.8674, "step": 15535 }, { "epoch": 0.18938978465138387, "grad_norm": 1.6945408582687378, "learning_rate": 4.266452854393842e-06, "loss": 0.8665, "step": 15540 }, { "epoch": 0.18945072087553166, "grad_norm": 1.8552509546279907, "learning_rate": 4.266132135984606e-06, "loss": 0.8515, "step": 15545 }, { "epoch": 0.1895116570996795, "grad_norm": 2.4097342491149902, "learning_rate": 4.265811417575369e-06, "loss": 0.8983, "step": 15550 }, { "epoch": 0.18957259332382728, "grad_norm": 2.233105182647705, "learning_rate": 4.265490699166132e-06, "loss": 0.9171, "step": 15555 }, { "epoch": 0.18963352954797508, "grad_norm": 2.089507579803467, "learning_rate": 4.265169980756896e-06, "loss": 1.0077, "step": 15560 }, { "epoch": 0.1896944657721229, "grad_norm": 2.207881450653076, "learning_rate": 4.264849262347659e-06, "loss": 0.9548, "step": 15565 }, { "epoch": 0.1897554019962707, "grad_norm": 2.114243507385254, "learning_rate": 4.264528543938423e-06, "loss": 0.8491, "step": 15570 }, { "epoch": 0.18981633822041852, "grad_norm": 1.9267120361328125, "learning_rate": 4.264207825529186e-06, "loss": 0.9054, "step": 15575 }, { "epoch": 0.1898772744445663, "grad_norm": 2.032179832458496, "learning_rate": 4.263887107119949e-06, "loss": 0.8847, "step": 15580 }, { "epoch": 0.18993821066871414, "grad_norm": 1.8309787511825562, "learning_rate": 4.263566388710713e-06, "loss": 0.837, "step": 15585 }, { "epoch": 0.18999914689286193, "grad_norm": 1.8689261674880981, "learning_rate": 4.263245670301476e-06, "loss": 0.9116, "step": 15590 }, { "epoch": 0.19006008311700973, "grad_norm": 2.389597177505493, "learning_rate": 4.262924951892239e-06, "loss": 0.9342, "step": 15595 }, { "epoch": 0.19012101934115755, "grad_norm": 1.9334614276885986, "learning_rate": 4.262604233483002e-06, "loss": 0.8565, "step": 15600 }, { "epoch": 0.19018195556530534, "grad_norm": 2.2198312282562256, "learning_rate": 4.262283515073766e-06, "loss": 0.9052, "step": 15605 }, { "epoch": 0.19024289178945317, "grad_norm": 1.795613169670105, "learning_rate": 4.261962796664529e-06, "loss": 0.8754, "step": 15610 }, { "epoch": 0.19030382801360096, "grad_norm": 2.0608901977539062, "learning_rate": 4.261642078255292e-06, "loss": 0.9262, "step": 15615 }, { "epoch": 0.19036476423774878, "grad_norm": 1.8556065559387207, "learning_rate": 4.261321359846056e-06, "loss": 0.9418, "step": 15620 }, { "epoch": 0.19042570046189658, "grad_norm": 1.8336971998214722, "learning_rate": 4.261000641436819e-06, "loss": 0.8516, "step": 15625 }, { "epoch": 0.19048663668604437, "grad_norm": 1.9564869403839111, "learning_rate": 4.260679923027582e-06, "loss": 0.9205, "step": 15630 }, { "epoch": 0.1905475729101922, "grad_norm": 2.429898738861084, "learning_rate": 4.260359204618346e-06, "loss": 0.9053, "step": 15635 }, { "epoch": 0.19060850913434, "grad_norm": 2.3221704959869385, "learning_rate": 4.260038486209109e-06, "loss": 0.8426, "step": 15640 }, { "epoch": 0.19066944535848782, "grad_norm": 2.0725677013397217, "learning_rate": 4.259717767799872e-06, "loss": 0.9134, "step": 15645 }, { "epoch": 0.1907303815826356, "grad_norm": 2.4821255207061768, "learning_rate": 4.2593970493906355e-06, "loss": 0.9144, "step": 15650 }, { "epoch": 0.19079131780678343, "grad_norm": 2.066131591796875, "learning_rate": 4.2590763309813985e-06, "loss": 0.9335, "step": 15655 }, { "epoch": 0.19085225403093123, "grad_norm": 1.881731390953064, "learning_rate": 4.2587556125721616e-06, "loss": 0.8914, "step": 15660 }, { "epoch": 0.19091319025507902, "grad_norm": 2.0638322830200195, "learning_rate": 4.2584348941629254e-06, "loss": 0.8605, "step": 15665 }, { "epoch": 0.19097412647922685, "grad_norm": 2.1127593517303467, "learning_rate": 4.2581141757536884e-06, "loss": 0.9364, "step": 15670 }, { "epoch": 0.19103506270337464, "grad_norm": 2.2802159786224365, "learning_rate": 4.2577934573444515e-06, "loss": 0.9317, "step": 15675 }, { "epoch": 0.19109599892752246, "grad_norm": 1.8619166612625122, "learning_rate": 4.257472738935215e-06, "loss": 0.7961, "step": 15680 }, { "epoch": 0.19115693515167026, "grad_norm": 1.9588971138000488, "learning_rate": 4.257152020525978e-06, "loss": 0.9041, "step": 15685 }, { "epoch": 0.19121787137581808, "grad_norm": 2.426818609237671, "learning_rate": 4.256831302116742e-06, "loss": 0.8888, "step": 15690 }, { "epoch": 0.19127880759996588, "grad_norm": 1.9946779012680054, "learning_rate": 4.256510583707505e-06, "loss": 0.8657, "step": 15695 }, { "epoch": 0.19133974382411367, "grad_norm": 2.0939443111419678, "learning_rate": 4.256189865298268e-06, "loss": 0.856, "step": 15700 }, { "epoch": 0.1914006800482615, "grad_norm": 2.084307909011841, "learning_rate": 4.255869146889032e-06, "loss": 0.9307, "step": 15705 }, { "epoch": 0.1914616162724093, "grad_norm": 2.5558810234069824, "learning_rate": 4.255548428479795e-06, "loss": 0.8348, "step": 15710 }, { "epoch": 0.1915225524965571, "grad_norm": 1.7789262533187866, "learning_rate": 4.255227710070559e-06, "loss": 0.8145, "step": 15715 }, { "epoch": 0.1915834887207049, "grad_norm": 2.0492231845855713, "learning_rate": 4.254906991661322e-06, "loss": 0.9101, "step": 15720 }, { "epoch": 0.1916444249448527, "grad_norm": 2.3802835941314697, "learning_rate": 4.254586273252085e-06, "loss": 0.9292, "step": 15725 }, { "epoch": 0.19170536116900053, "grad_norm": 1.914099931716919, "learning_rate": 4.254265554842849e-06, "loss": 0.9667, "step": 15730 }, { "epoch": 0.19176629739314832, "grad_norm": 1.6555861234664917, "learning_rate": 4.253944836433612e-06, "loss": 0.859, "step": 15735 }, { "epoch": 0.19182723361729614, "grad_norm": 2.3988263607025146, "learning_rate": 4.253624118024375e-06, "loss": 0.8342, "step": 15740 }, { "epoch": 0.19188816984144394, "grad_norm": 1.8918031454086304, "learning_rate": 4.253303399615139e-06, "loss": 0.8676, "step": 15745 }, { "epoch": 0.19194910606559176, "grad_norm": 1.7446492910385132, "learning_rate": 4.252982681205902e-06, "loss": 0.8877, "step": 15750 }, { "epoch": 0.19201004228973956, "grad_norm": 1.943986177444458, "learning_rate": 4.252661962796665e-06, "loss": 0.8593, "step": 15755 }, { "epoch": 0.19207097851388735, "grad_norm": 1.794163465499878, "learning_rate": 4.252341244387429e-06, "loss": 0.8478, "step": 15760 }, { "epoch": 0.19213191473803518, "grad_norm": 2.1820895671844482, "learning_rate": 4.252020525978192e-06, "loss": 0.9145, "step": 15765 }, { "epoch": 0.19219285096218297, "grad_norm": 1.947081446647644, "learning_rate": 4.251699807568955e-06, "loss": 0.9374, "step": 15770 }, { "epoch": 0.1922537871863308, "grad_norm": 1.7274298667907715, "learning_rate": 4.251379089159718e-06, "loss": 0.9101, "step": 15775 }, { "epoch": 0.1923147234104786, "grad_norm": 2.1714532375335693, "learning_rate": 4.251058370750482e-06, "loss": 0.8404, "step": 15780 }, { "epoch": 0.1923756596346264, "grad_norm": 2.1333892345428467, "learning_rate": 4.250737652341245e-06, "loss": 0.8403, "step": 15785 }, { "epoch": 0.1924365958587742, "grad_norm": 1.6011557579040527, "learning_rate": 4.250416933932008e-06, "loss": 0.9544, "step": 15790 }, { "epoch": 0.192497532082922, "grad_norm": 2.1072654724121094, "learning_rate": 4.2500962155227715e-06, "loss": 0.9656, "step": 15795 }, { "epoch": 0.19255846830706982, "grad_norm": 1.867164969444275, "learning_rate": 4.2497754971135345e-06, "loss": 0.9458, "step": 15800 }, { "epoch": 0.19261940453121762, "grad_norm": 1.8906365633010864, "learning_rate": 4.2494547787042975e-06, "loss": 0.9126, "step": 15805 }, { "epoch": 0.19268034075536544, "grad_norm": 1.961229681968689, "learning_rate": 4.249134060295061e-06, "loss": 0.898, "step": 15810 }, { "epoch": 0.19274127697951324, "grad_norm": 2.0233659744262695, "learning_rate": 4.248813341885824e-06, "loss": 0.8861, "step": 15815 }, { "epoch": 0.19280221320366106, "grad_norm": 1.7784194946289062, "learning_rate": 4.248492623476587e-06, "loss": 0.8519, "step": 15820 }, { "epoch": 0.19286314942780886, "grad_norm": 2.0899415016174316, "learning_rate": 4.248171905067351e-06, "loss": 0.8485, "step": 15825 }, { "epoch": 0.19292408565195665, "grad_norm": 2.1190285682678223, "learning_rate": 4.247851186658114e-06, "loss": 0.9069, "step": 15830 }, { "epoch": 0.19298502187610447, "grad_norm": 1.6494641304016113, "learning_rate": 4.247530468248877e-06, "loss": 0.8669, "step": 15835 }, { "epoch": 0.19304595810025227, "grad_norm": 1.8949304819107056, "learning_rate": 4.247209749839641e-06, "loss": 0.9033, "step": 15840 }, { "epoch": 0.1931068943244001, "grad_norm": 2.1505086421966553, "learning_rate": 4.246889031430404e-06, "loss": 0.8997, "step": 15845 }, { "epoch": 0.1931678305485479, "grad_norm": 1.8797680139541626, "learning_rate": 4.246568313021168e-06, "loss": 0.8739, "step": 15850 }, { "epoch": 0.1932287667726957, "grad_norm": 2.2047722339630127, "learning_rate": 4.246247594611931e-06, "loss": 0.8404, "step": 15855 }, { "epoch": 0.1932897029968435, "grad_norm": 2.2226593494415283, "learning_rate": 4.245926876202694e-06, "loss": 0.9251, "step": 15860 }, { "epoch": 0.1933506392209913, "grad_norm": 1.9028066396713257, "learning_rate": 4.245606157793458e-06, "loss": 0.8496, "step": 15865 }, { "epoch": 0.19341157544513912, "grad_norm": 1.8328427076339722, "learning_rate": 4.245285439384221e-06, "loss": 0.8946, "step": 15870 }, { "epoch": 0.19347251166928692, "grad_norm": 2.046934127807617, "learning_rate": 4.244964720974985e-06, "loss": 0.8647, "step": 15875 }, { "epoch": 0.19353344789343474, "grad_norm": 1.7228503227233887, "learning_rate": 4.244644002565748e-06, "loss": 0.8042, "step": 15880 }, { "epoch": 0.19359438411758254, "grad_norm": 1.9422346353530884, "learning_rate": 4.244323284156511e-06, "loss": 0.9414, "step": 15885 }, { "epoch": 0.19365532034173036, "grad_norm": 2.2193942070007324, "learning_rate": 4.244002565747275e-06, "loss": 0.8638, "step": 15890 }, { "epoch": 0.19371625656587815, "grad_norm": 2.1505796909332275, "learning_rate": 4.243681847338038e-06, "loss": 0.8962, "step": 15895 }, { "epoch": 0.19377719279002595, "grad_norm": 1.8845655918121338, "learning_rate": 4.243361128928801e-06, "loss": 0.8693, "step": 15900 }, { "epoch": 0.19383812901417377, "grad_norm": 1.914885401725769, "learning_rate": 4.243040410519565e-06, "loss": 0.8664, "step": 15905 }, { "epoch": 0.19389906523832157, "grad_norm": 1.8663921356201172, "learning_rate": 4.242719692110328e-06, "loss": 0.8338, "step": 15910 }, { "epoch": 0.1939600014624694, "grad_norm": 1.7843011617660522, "learning_rate": 4.242398973701091e-06, "loss": 0.8748, "step": 15915 }, { "epoch": 0.19402093768661718, "grad_norm": 1.8748029470443726, "learning_rate": 4.2420782552918545e-06, "loss": 0.9004, "step": 15920 }, { "epoch": 0.194081873910765, "grad_norm": 2.075092315673828, "learning_rate": 4.2417575368826175e-06, "loss": 0.9122, "step": 15925 }, { "epoch": 0.1941428101349128, "grad_norm": 1.6427233219146729, "learning_rate": 4.2414368184733806e-06, "loss": 0.7962, "step": 15930 }, { "epoch": 0.1942037463590606, "grad_norm": 1.9246731996536255, "learning_rate": 4.241116100064144e-06, "loss": 0.922, "step": 15935 }, { "epoch": 0.19426468258320842, "grad_norm": 1.8617990016937256, "learning_rate": 4.2407953816549074e-06, "loss": 0.8812, "step": 15940 }, { "epoch": 0.19432561880735622, "grad_norm": 2.0789496898651123, "learning_rate": 4.2404746632456705e-06, "loss": 0.8841, "step": 15945 }, { "epoch": 0.19438655503150404, "grad_norm": 2.1577136516571045, "learning_rate": 4.2401539448364335e-06, "loss": 0.8495, "step": 15950 }, { "epoch": 0.19444749125565183, "grad_norm": 1.8614068031311035, "learning_rate": 4.239833226427197e-06, "loss": 0.8451, "step": 15955 }, { "epoch": 0.19450842747979963, "grad_norm": 2.4284427165985107, "learning_rate": 4.23951250801796e-06, "loss": 0.9314, "step": 15960 }, { "epoch": 0.19456936370394745, "grad_norm": 2.1300048828125, "learning_rate": 4.239191789608723e-06, "loss": 0.8924, "step": 15965 }, { "epoch": 0.19463029992809525, "grad_norm": 1.828952431678772, "learning_rate": 4.238871071199487e-06, "loss": 0.9048, "step": 15970 }, { "epoch": 0.19469123615224307, "grad_norm": 2.443664073944092, "learning_rate": 4.23855035279025e-06, "loss": 0.9021, "step": 15975 }, { "epoch": 0.19475217237639086, "grad_norm": 1.9524742364883423, "learning_rate": 4.238229634381013e-06, "loss": 0.8765, "step": 15980 }, { "epoch": 0.1948131086005387, "grad_norm": 1.926123857498169, "learning_rate": 4.237908915971777e-06, "loss": 0.8921, "step": 15985 }, { "epoch": 0.19487404482468648, "grad_norm": 2.3749921321868896, "learning_rate": 4.23758819756254e-06, "loss": 0.8742, "step": 15990 }, { "epoch": 0.19493498104883428, "grad_norm": 1.81062912940979, "learning_rate": 4.237267479153304e-06, "loss": 0.8622, "step": 15995 }, { "epoch": 0.1949959172729821, "grad_norm": 2.101733446121216, "learning_rate": 4.236946760744067e-06, "loss": 0.8455, "step": 16000 }, { "epoch": 0.1950568534971299, "grad_norm": 1.8069807291030884, "learning_rate": 4.23662604233483e-06, "loss": 0.8988, "step": 16005 }, { "epoch": 0.19511778972127772, "grad_norm": 1.908180594444275, "learning_rate": 4.236305323925594e-06, "loss": 0.9121, "step": 16010 }, { "epoch": 0.1951787259454255, "grad_norm": 1.9596781730651855, "learning_rate": 4.235984605516357e-06, "loss": 0.8064, "step": 16015 }, { "epoch": 0.19523966216957334, "grad_norm": 1.893222689628601, "learning_rate": 4.235663887107121e-06, "loss": 0.8921, "step": 16020 }, { "epoch": 0.19530059839372113, "grad_norm": 2.1065587997436523, "learning_rate": 4.235343168697884e-06, "loss": 0.884, "step": 16025 }, { "epoch": 0.19536153461786893, "grad_norm": 1.8898683786392212, "learning_rate": 4.235022450288647e-06, "loss": 0.8194, "step": 16030 }, { "epoch": 0.19542247084201675, "grad_norm": 2.333632230758667, "learning_rate": 4.234701731879411e-06, "loss": 0.8581, "step": 16035 }, { "epoch": 0.19548340706616454, "grad_norm": 1.789111852645874, "learning_rate": 4.234381013470174e-06, "loss": 0.8776, "step": 16040 }, { "epoch": 0.19554434329031237, "grad_norm": 2.063626289367676, "learning_rate": 4.234060295060937e-06, "loss": 0.903, "step": 16045 }, { "epoch": 0.19560527951446016, "grad_norm": 1.9054601192474365, "learning_rate": 4.233739576651701e-06, "loss": 0.8643, "step": 16050 }, { "epoch": 0.19566621573860798, "grad_norm": 1.9247181415557861, "learning_rate": 4.233418858242464e-06, "loss": 0.8882, "step": 16055 }, { "epoch": 0.19572715196275578, "grad_norm": 1.7835419178009033, "learning_rate": 4.233098139833227e-06, "loss": 0.8862, "step": 16060 }, { "epoch": 0.19578808818690357, "grad_norm": 1.6966357231140137, "learning_rate": 4.2327774214239905e-06, "loss": 0.9062, "step": 16065 }, { "epoch": 0.1958490244110514, "grad_norm": 2.077061653137207, "learning_rate": 4.2324567030147535e-06, "loss": 0.9432, "step": 16070 }, { "epoch": 0.1959099606351992, "grad_norm": 2.0074126720428467, "learning_rate": 4.2321359846055165e-06, "loss": 0.8828, "step": 16075 }, { "epoch": 0.19597089685934702, "grad_norm": 2.1855626106262207, "learning_rate": 4.23181526619628e-06, "loss": 0.8726, "step": 16080 }, { "epoch": 0.1960318330834948, "grad_norm": 1.9110034704208374, "learning_rate": 4.231494547787043e-06, "loss": 0.8943, "step": 16085 }, { "epoch": 0.19609276930764263, "grad_norm": 2.2805840969085693, "learning_rate": 4.231173829377806e-06, "loss": 0.9346, "step": 16090 }, { "epoch": 0.19615370553179043, "grad_norm": 2.025665044784546, "learning_rate": 4.23085311096857e-06, "loss": 0.8893, "step": 16095 }, { "epoch": 0.19621464175593822, "grad_norm": 2.103926181793213, "learning_rate": 4.230532392559333e-06, "loss": 0.8692, "step": 16100 }, { "epoch": 0.19627557798008605, "grad_norm": 1.8590692281723022, "learning_rate": 4.230211674150096e-06, "loss": 0.8635, "step": 16105 }, { "epoch": 0.19633651420423384, "grad_norm": 2.096388578414917, "learning_rate": 4.229890955740859e-06, "loss": 0.911, "step": 16110 }, { "epoch": 0.19639745042838166, "grad_norm": 2.037658214569092, "learning_rate": 4.229570237331623e-06, "loss": 0.8581, "step": 16115 }, { "epoch": 0.19645838665252946, "grad_norm": 1.9090862274169922, "learning_rate": 4.229249518922386e-06, "loss": 0.9013, "step": 16120 }, { "epoch": 0.19651932287667728, "grad_norm": 1.8740880489349365, "learning_rate": 4.228928800513149e-06, "loss": 0.824, "step": 16125 }, { "epoch": 0.19658025910082508, "grad_norm": 2.4553699493408203, "learning_rate": 4.228608082103913e-06, "loss": 0.9347, "step": 16130 }, { "epoch": 0.19664119532497287, "grad_norm": 2.031949996948242, "learning_rate": 4.228287363694676e-06, "loss": 0.8722, "step": 16135 }, { "epoch": 0.1967021315491207, "grad_norm": 2.07942533493042, "learning_rate": 4.227966645285439e-06, "loss": 0.8725, "step": 16140 }, { "epoch": 0.1967630677732685, "grad_norm": 2.2183735370635986, "learning_rate": 4.227645926876203e-06, "loss": 0.8281, "step": 16145 }, { "epoch": 0.1968240039974163, "grad_norm": 1.9186512231826782, "learning_rate": 4.227325208466966e-06, "loss": 0.8845, "step": 16150 }, { "epoch": 0.1968849402215641, "grad_norm": 2.2052085399627686, "learning_rate": 4.22700449005773e-06, "loss": 0.8459, "step": 16155 }, { "epoch": 0.19694587644571193, "grad_norm": 2.016158103942871, "learning_rate": 4.226683771648493e-06, "loss": 0.9118, "step": 16160 }, { "epoch": 0.19700681266985973, "grad_norm": 2.1276421546936035, "learning_rate": 4.226363053239257e-06, "loss": 0.9167, "step": 16165 }, { "epoch": 0.19706774889400752, "grad_norm": 1.9196264743804932, "learning_rate": 4.22604233483002e-06, "loss": 0.8884, "step": 16170 }, { "epoch": 0.19712868511815534, "grad_norm": 2.1291182041168213, "learning_rate": 4.225721616420783e-06, "loss": 0.868, "step": 16175 }, { "epoch": 0.19718962134230314, "grad_norm": 1.850292444229126, "learning_rate": 4.225400898011547e-06, "loss": 0.8706, "step": 16180 }, { "epoch": 0.19725055756645096, "grad_norm": 2.027531147003174, "learning_rate": 4.22508017960231e-06, "loss": 0.8369, "step": 16185 }, { "epoch": 0.19731149379059876, "grad_norm": 2.162165641784668, "learning_rate": 4.224759461193073e-06, "loss": 0.9111, "step": 16190 }, { "epoch": 0.19737243001474655, "grad_norm": 2.2781150341033936, "learning_rate": 4.2244387427838365e-06, "loss": 0.9167, "step": 16195 }, { "epoch": 0.19743336623889438, "grad_norm": 1.8275439739227295, "learning_rate": 4.2241180243745996e-06, "loss": 0.8775, "step": 16200 }, { "epoch": 0.19749430246304217, "grad_norm": 1.7643226385116577, "learning_rate": 4.223797305965363e-06, "loss": 0.8884, "step": 16205 }, { "epoch": 0.19755523868719, "grad_norm": 2.4395463466644287, "learning_rate": 4.2234765875561264e-06, "loss": 0.9809, "step": 16210 }, { "epoch": 0.1976161749113378, "grad_norm": 2.599837303161621, "learning_rate": 4.2231558691468895e-06, "loss": 0.8677, "step": 16215 }, { "epoch": 0.1976771111354856, "grad_norm": 2.3326776027679443, "learning_rate": 4.2228351507376525e-06, "loss": 0.8263, "step": 16220 }, { "epoch": 0.1977380473596334, "grad_norm": 1.8257206678390503, "learning_rate": 4.222514432328416e-06, "loss": 0.9263, "step": 16225 }, { "epoch": 0.1977989835837812, "grad_norm": 2.1053237915039062, "learning_rate": 4.222193713919179e-06, "loss": 0.8673, "step": 16230 }, { "epoch": 0.19785991980792902, "grad_norm": 2.1341307163238525, "learning_rate": 4.221872995509942e-06, "loss": 0.8969, "step": 16235 }, { "epoch": 0.19792085603207682, "grad_norm": 1.9388829469680786, "learning_rate": 4.221552277100706e-06, "loss": 0.8926, "step": 16240 }, { "epoch": 0.19798179225622464, "grad_norm": 1.8609181642532349, "learning_rate": 4.221231558691469e-06, "loss": 0.8959, "step": 16245 }, { "epoch": 0.19804272848037244, "grad_norm": 2.078596830368042, "learning_rate": 4.220910840282232e-06, "loss": 0.8838, "step": 16250 }, { "epoch": 0.19810366470452026, "grad_norm": 1.9684627056121826, "learning_rate": 4.220590121872996e-06, "loss": 0.9383, "step": 16255 }, { "epoch": 0.19816460092866806, "grad_norm": 1.9858537912368774, "learning_rate": 4.220269403463759e-06, "loss": 0.8601, "step": 16260 }, { "epoch": 0.19822553715281585, "grad_norm": 2.094733715057373, "learning_rate": 4.219948685054522e-06, "loss": 0.9228, "step": 16265 }, { "epoch": 0.19828647337696367, "grad_norm": 2.1664066314697266, "learning_rate": 4.219627966645285e-06, "loss": 0.8642, "step": 16270 }, { "epoch": 0.19834740960111147, "grad_norm": 1.7882568836212158, "learning_rate": 4.219307248236049e-06, "loss": 0.849, "step": 16275 }, { "epoch": 0.1984083458252593, "grad_norm": 1.8324748277664185, "learning_rate": 4.218986529826812e-06, "loss": 0.8718, "step": 16280 }, { "epoch": 0.1984692820494071, "grad_norm": 2.0675160884857178, "learning_rate": 4.218665811417575e-06, "loss": 0.8881, "step": 16285 }, { "epoch": 0.1985302182735549, "grad_norm": 1.7776471376419067, "learning_rate": 4.218345093008339e-06, "loss": 0.9209, "step": 16290 }, { "epoch": 0.1985911544977027, "grad_norm": 1.8061330318450928, "learning_rate": 4.218024374599102e-06, "loss": 0.8842, "step": 16295 }, { "epoch": 0.1986520907218505, "grad_norm": 2.079749345779419, "learning_rate": 4.217703656189866e-06, "loss": 0.8306, "step": 16300 }, { "epoch": 0.19871302694599832, "grad_norm": 2.3581156730651855, "learning_rate": 4.217382937780629e-06, "loss": 0.9539, "step": 16305 }, { "epoch": 0.19877396317014612, "grad_norm": 1.959204077720642, "learning_rate": 4.217062219371392e-06, "loss": 0.9156, "step": 16310 }, { "epoch": 0.19883489939429394, "grad_norm": 2.0899174213409424, "learning_rate": 4.216741500962156e-06, "loss": 0.9381, "step": 16315 }, { "epoch": 0.19889583561844174, "grad_norm": 1.845528483390808, "learning_rate": 4.216420782552919e-06, "loss": 0.9212, "step": 16320 }, { "epoch": 0.19895677184258956, "grad_norm": 1.9259430170059204, "learning_rate": 4.216100064143683e-06, "loss": 0.8369, "step": 16325 }, { "epoch": 0.19901770806673735, "grad_norm": 2.0883772373199463, "learning_rate": 4.215779345734446e-06, "loss": 0.8876, "step": 16330 }, { "epoch": 0.19907864429088515, "grad_norm": 2.1338913440704346, "learning_rate": 4.215458627325209e-06, "loss": 0.8941, "step": 16335 }, { "epoch": 0.19913958051503297, "grad_norm": 1.8288977146148682, "learning_rate": 4.2151379089159725e-06, "loss": 0.8912, "step": 16340 }, { "epoch": 0.19920051673918077, "grad_norm": 2.153421401977539, "learning_rate": 4.2148171905067355e-06, "loss": 0.9484, "step": 16345 }, { "epoch": 0.1992614529633286, "grad_norm": 2.0253288745880127, "learning_rate": 4.214496472097499e-06, "loss": 0.9145, "step": 16350 }, { "epoch": 0.19932238918747638, "grad_norm": 2.167879343032837, "learning_rate": 4.214175753688262e-06, "loss": 0.9333, "step": 16355 }, { "epoch": 0.1993833254116242, "grad_norm": 2.123940944671631, "learning_rate": 4.213855035279025e-06, "loss": 0.9173, "step": 16360 }, { "epoch": 0.199444261635772, "grad_norm": 1.6681427955627441, "learning_rate": 4.2135343168697884e-06, "loss": 0.9548, "step": 16365 }, { "epoch": 0.1995051978599198, "grad_norm": 2.270803928375244, "learning_rate": 4.213213598460552e-06, "loss": 0.9419, "step": 16370 }, { "epoch": 0.19956613408406762, "grad_norm": 1.7472552061080933, "learning_rate": 4.212892880051315e-06, "loss": 0.86, "step": 16375 }, { "epoch": 0.19962707030821542, "grad_norm": 2.100055694580078, "learning_rate": 4.212572161642078e-06, "loss": 0.9575, "step": 16380 }, { "epoch": 0.19968800653236324, "grad_norm": 1.7265535593032837, "learning_rate": 4.212251443232842e-06, "loss": 0.8961, "step": 16385 }, { "epoch": 0.19974894275651103, "grad_norm": 1.9631962776184082, "learning_rate": 4.211930724823605e-06, "loss": 0.8104, "step": 16390 }, { "epoch": 0.19980987898065886, "grad_norm": 1.7806841135025024, "learning_rate": 4.211610006414368e-06, "loss": 0.8771, "step": 16395 }, { "epoch": 0.19987081520480665, "grad_norm": 1.853209376335144, "learning_rate": 4.211289288005132e-06, "loss": 0.8725, "step": 16400 }, { "epoch": 0.19993175142895445, "grad_norm": 1.8364111185073853, "learning_rate": 4.210968569595895e-06, "loss": 0.9069, "step": 16405 }, { "epoch": 0.19999268765310227, "grad_norm": 2.154554843902588, "learning_rate": 4.210647851186658e-06, "loss": 0.8384, "step": 16410 }, { "epoch": 0.20005362387725006, "grad_norm": 1.8910144567489624, "learning_rate": 4.210327132777422e-06, "loss": 0.8503, "step": 16415 }, { "epoch": 0.2001145601013979, "grad_norm": 1.8913133144378662, "learning_rate": 4.210006414368185e-06, "loss": 0.8738, "step": 16420 }, { "epoch": 0.20017549632554568, "grad_norm": 1.7631756067276, "learning_rate": 4.209685695958948e-06, "loss": 0.828, "step": 16425 }, { "epoch": 0.20023643254969348, "grad_norm": 2.348623514175415, "learning_rate": 4.209364977549712e-06, "loss": 0.795, "step": 16430 }, { "epoch": 0.2002973687738413, "grad_norm": 2.1520895957946777, "learning_rate": 4.209044259140475e-06, "loss": 0.9138, "step": 16435 }, { "epoch": 0.2003583049979891, "grad_norm": 2.184619426727295, "learning_rate": 4.208723540731238e-06, "loss": 0.8626, "step": 16440 }, { "epoch": 0.20041924122213692, "grad_norm": 1.882468342781067, "learning_rate": 4.208402822322002e-06, "loss": 0.8588, "step": 16445 }, { "epoch": 0.2004801774462847, "grad_norm": 2.1761507987976074, "learning_rate": 4.208082103912765e-06, "loss": 0.8812, "step": 16450 }, { "epoch": 0.20054111367043254, "grad_norm": 2.0650832653045654, "learning_rate": 4.207761385503528e-06, "loss": 0.9163, "step": 16455 }, { "epoch": 0.20060204989458033, "grad_norm": 1.7202366590499878, "learning_rate": 4.207440667094292e-06, "loss": 0.8359, "step": 16460 }, { "epoch": 0.20066298611872813, "grad_norm": 1.8348828554153442, "learning_rate": 4.207119948685055e-06, "loss": 0.8622, "step": 16465 }, { "epoch": 0.20072392234287595, "grad_norm": 2.068424940109253, "learning_rate": 4.2067992302758186e-06, "loss": 0.8782, "step": 16470 }, { "epoch": 0.20078485856702374, "grad_norm": 2.1605942249298096, "learning_rate": 4.206478511866582e-06, "loss": 0.889, "step": 16475 }, { "epoch": 0.20084579479117157, "grad_norm": 1.9241219758987427, "learning_rate": 4.206157793457345e-06, "loss": 0.8737, "step": 16480 }, { "epoch": 0.20090673101531936, "grad_norm": 2.0053598880767822, "learning_rate": 4.2058370750481085e-06, "loss": 0.838, "step": 16485 }, { "epoch": 0.20096766723946718, "grad_norm": 4.610044002532959, "learning_rate": 4.2055163566388715e-06, "loss": 0.984, "step": 16490 }, { "epoch": 0.20102860346361498, "grad_norm": 1.9293017387390137, "learning_rate": 4.205195638229635e-06, "loss": 0.8773, "step": 16495 }, { "epoch": 0.20108953968776277, "grad_norm": 2.159635066986084, "learning_rate": 4.204874919820398e-06, "loss": 0.85, "step": 16500 }, { "epoch": 0.2011504759119106, "grad_norm": 1.9864912033081055, "learning_rate": 4.204554201411161e-06, "loss": 0.9544, "step": 16505 }, { "epoch": 0.2012114121360584, "grad_norm": 2.2427122592926025, "learning_rate": 4.204233483001925e-06, "loss": 0.9364, "step": 16510 }, { "epoch": 0.20127234836020622, "grad_norm": 2.2715108394622803, "learning_rate": 4.203912764592688e-06, "loss": 0.9125, "step": 16515 }, { "epoch": 0.201333284584354, "grad_norm": 1.912777066230774, "learning_rate": 4.203592046183451e-06, "loss": 0.8934, "step": 16520 }, { "epoch": 0.20139422080850183, "grad_norm": 1.8411221504211426, "learning_rate": 4.203271327774214e-06, "loss": 0.7704, "step": 16525 }, { "epoch": 0.20145515703264963, "grad_norm": 2.036008834838867, "learning_rate": 4.202950609364978e-06, "loss": 0.903, "step": 16530 }, { "epoch": 0.20151609325679742, "grad_norm": 1.949437141418457, "learning_rate": 4.202629890955741e-06, "loss": 0.8902, "step": 16535 }, { "epoch": 0.20157702948094525, "grad_norm": 2.1990647315979004, "learning_rate": 4.202309172546504e-06, "loss": 0.9053, "step": 16540 }, { "epoch": 0.20163796570509304, "grad_norm": 2.0643999576568604, "learning_rate": 4.201988454137268e-06, "loss": 0.8752, "step": 16545 }, { "epoch": 0.20169890192924086, "grad_norm": 1.9077281951904297, "learning_rate": 4.201667735728031e-06, "loss": 0.9334, "step": 16550 }, { "epoch": 0.20175983815338866, "grad_norm": 1.934283971786499, "learning_rate": 4.201347017318794e-06, "loss": 0.9486, "step": 16555 }, { "epoch": 0.20182077437753648, "grad_norm": 2.5552399158477783, "learning_rate": 4.201026298909558e-06, "loss": 0.9007, "step": 16560 }, { "epoch": 0.20188171060168428, "grad_norm": 1.760643720626831, "learning_rate": 4.200705580500321e-06, "loss": 0.8775, "step": 16565 }, { "epoch": 0.20194264682583207, "grad_norm": 2.374652862548828, "learning_rate": 4.200384862091084e-06, "loss": 0.8874, "step": 16570 }, { "epoch": 0.2020035830499799, "grad_norm": 1.866028070449829, "learning_rate": 4.200064143681848e-06, "loss": 0.913, "step": 16575 }, { "epoch": 0.2020645192741277, "grad_norm": 1.8555762767791748, "learning_rate": 4.199743425272611e-06, "loss": 0.8476, "step": 16580 }, { "epoch": 0.2021254554982755, "grad_norm": 2.1774511337280273, "learning_rate": 4.199422706863374e-06, "loss": 0.8918, "step": 16585 }, { "epoch": 0.2021863917224233, "grad_norm": 1.9642870426177979, "learning_rate": 4.199101988454138e-06, "loss": 0.8152, "step": 16590 }, { "epoch": 0.20224732794657113, "grad_norm": 1.8995976448059082, "learning_rate": 4.198781270044901e-06, "loss": 0.8627, "step": 16595 }, { "epoch": 0.20230826417071893, "grad_norm": 2.1210641860961914, "learning_rate": 4.198460551635664e-06, "loss": 0.8606, "step": 16600 }, { "epoch": 0.20236920039486672, "grad_norm": 1.8151023387908936, "learning_rate": 4.198139833226428e-06, "loss": 0.8751, "step": 16605 }, { "epoch": 0.20243013661901454, "grad_norm": 1.6881028413772583, "learning_rate": 4.197819114817191e-06, "loss": 0.8567, "step": 16610 }, { "epoch": 0.20249107284316234, "grad_norm": 2.1337344646453857, "learning_rate": 4.197498396407954e-06, "loss": 0.9088, "step": 16615 }, { "epoch": 0.20255200906731016, "grad_norm": 1.7117395401000977, "learning_rate": 4.1971776779987175e-06, "loss": 0.8856, "step": 16620 }, { "epoch": 0.20261294529145796, "grad_norm": 1.8112841844558716, "learning_rate": 4.1968569595894806e-06, "loss": 0.8334, "step": 16625 }, { "epoch": 0.20267388151560578, "grad_norm": 2.0061724185943604, "learning_rate": 4.196536241180244e-06, "loss": 0.8561, "step": 16630 }, { "epoch": 0.20273481773975358, "grad_norm": 1.6923209428787231, "learning_rate": 4.1962155227710074e-06, "loss": 0.8638, "step": 16635 }, { "epoch": 0.20279575396390137, "grad_norm": 1.772507905960083, "learning_rate": 4.195894804361771e-06, "loss": 0.9037, "step": 16640 }, { "epoch": 0.2028566901880492, "grad_norm": 1.9144657850265503, "learning_rate": 4.195574085952534e-06, "loss": 0.9449, "step": 16645 }, { "epoch": 0.202917626412197, "grad_norm": 2.0187325477600098, "learning_rate": 4.195253367543297e-06, "loss": 0.9103, "step": 16650 }, { "epoch": 0.2029785626363448, "grad_norm": 2.0778589248657227, "learning_rate": 4.194932649134061e-06, "loss": 0.8959, "step": 16655 }, { "epoch": 0.2030394988604926, "grad_norm": 2.1767570972442627, "learning_rate": 4.194611930724824e-06, "loss": 0.9242, "step": 16660 }, { "epoch": 0.2031004350846404, "grad_norm": 1.8117562532424927, "learning_rate": 4.194291212315587e-06, "loss": 0.9525, "step": 16665 }, { "epoch": 0.20316137130878822, "grad_norm": 1.875399112701416, "learning_rate": 4.193970493906351e-06, "loss": 0.9385, "step": 16670 }, { "epoch": 0.20322230753293602, "grad_norm": 2.611640453338623, "learning_rate": 4.193649775497114e-06, "loss": 0.8325, "step": 16675 }, { "epoch": 0.20328324375708384, "grad_norm": 2.1732969284057617, "learning_rate": 4.193329057087877e-06, "loss": 0.8904, "step": 16680 }, { "epoch": 0.20334417998123164, "grad_norm": 2.4266717433929443, "learning_rate": 4.193008338678641e-06, "loss": 0.894, "step": 16685 }, { "epoch": 0.20340511620537946, "grad_norm": 1.9225730895996094, "learning_rate": 4.192687620269404e-06, "loss": 0.8935, "step": 16690 }, { "epoch": 0.20346605242952726, "grad_norm": 1.9017305374145508, "learning_rate": 4.192366901860167e-06, "loss": 0.9161, "step": 16695 }, { "epoch": 0.20352698865367505, "grad_norm": 1.9505215883255005, "learning_rate": 4.19204618345093e-06, "loss": 0.9437, "step": 16700 }, { "epoch": 0.20358792487782287, "grad_norm": 1.8537030220031738, "learning_rate": 4.191725465041694e-06, "loss": 0.8823, "step": 16705 }, { "epoch": 0.20364886110197067, "grad_norm": 2.548154354095459, "learning_rate": 4.191404746632457e-06, "loss": 0.8799, "step": 16710 }, { "epoch": 0.2037097973261185, "grad_norm": 1.9541535377502441, "learning_rate": 4.19108402822322e-06, "loss": 0.9565, "step": 16715 }, { "epoch": 0.2037707335502663, "grad_norm": 2.1872241497039795, "learning_rate": 4.190763309813984e-06, "loss": 0.8758, "step": 16720 }, { "epoch": 0.2038316697744141, "grad_norm": 1.9996916055679321, "learning_rate": 4.190442591404747e-06, "loss": 0.8837, "step": 16725 }, { "epoch": 0.2038926059985619, "grad_norm": 2.031409978866577, "learning_rate": 4.19012187299551e-06, "loss": 0.8503, "step": 16730 }, { "epoch": 0.2039535422227097, "grad_norm": 1.6467665433883667, "learning_rate": 4.189801154586274e-06, "loss": 0.8927, "step": 16735 }, { "epoch": 0.20401447844685752, "grad_norm": 1.9687063694000244, "learning_rate": 4.189480436177037e-06, "loss": 0.8855, "step": 16740 }, { "epoch": 0.20407541467100532, "grad_norm": 2.11948561668396, "learning_rate": 4.1891597177678e-06, "loss": 0.9691, "step": 16745 }, { "epoch": 0.20413635089515314, "grad_norm": 1.9687703847885132, "learning_rate": 4.188838999358564e-06, "loss": 0.9597, "step": 16750 }, { "epoch": 0.20419728711930094, "grad_norm": 2.105482816696167, "learning_rate": 4.188518280949327e-06, "loss": 0.848, "step": 16755 }, { "epoch": 0.20425822334344876, "grad_norm": 2.1111881732940674, "learning_rate": 4.18819756254009e-06, "loss": 0.9097, "step": 16760 }, { "epoch": 0.20431915956759655, "grad_norm": 2.149077892303467, "learning_rate": 4.1878768441308535e-06, "loss": 0.9101, "step": 16765 }, { "epoch": 0.20438009579174435, "grad_norm": 2.094764232635498, "learning_rate": 4.1875561257216165e-06, "loss": 0.8321, "step": 16770 }, { "epoch": 0.20444103201589217, "grad_norm": 2.349414110183716, "learning_rate": 4.18723540731238e-06, "loss": 0.7922, "step": 16775 }, { "epoch": 0.20450196824003997, "grad_norm": 2.1803596019744873, "learning_rate": 4.186914688903143e-06, "loss": 0.8216, "step": 16780 }, { "epoch": 0.2045629044641878, "grad_norm": 1.7015438079833984, "learning_rate": 4.186593970493906e-06, "loss": 0.8259, "step": 16785 }, { "epoch": 0.20462384068833558, "grad_norm": 2.1524624824523926, "learning_rate": 4.18627325208467e-06, "loss": 0.9055, "step": 16790 }, { "epoch": 0.2046847769124834, "grad_norm": 2.094156265258789, "learning_rate": 4.185952533675433e-06, "loss": 0.9298, "step": 16795 }, { "epoch": 0.2047457131366312, "grad_norm": 2.109301805496216, "learning_rate": 4.185631815266197e-06, "loss": 0.9199, "step": 16800 }, { "epoch": 0.204806649360779, "grad_norm": 1.9885907173156738, "learning_rate": 4.18531109685696e-06, "loss": 0.8598, "step": 16805 }, { "epoch": 0.20486758558492682, "grad_norm": 2.2987797260284424, "learning_rate": 4.184990378447723e-06, "loss": 0.9126, "step": 16810 }, { "epoch": 0.20492852180907462, "grad_norm": 1.929304599761963, "learning_rate": 4.184669660038487e-06, "loss": 0.9355, "step": 16815 }, { "epoch": 0.20498945803322244, "grad_norm": 1.9497166872024536, "learning_rate": 4.18434894162925e-06, "loss": 0.8868, "step": 16820 }, { "epoch": 0.20505039425737023, "grad_norm": 2.142125368118286, "learning_rate": 4.184028223220013e-06, "loss": 0.881, "step": 16825 }, { "epoch": 0.20511133048151806, "grad_norm": 1.9856572151184082, "learning_rate": 4.183707504810777e-06, "loss": 0.9297, "step": 16830 }, { "epoch": 0.20517226670566585, "grad_norm": 2.2805943489074707, "learning_rate": 4.18338678640154e-06, "loss": 0.891, "step": 16835 }, { "epoch": 0.20523320292981365, "grad_norm": 1.7185442447662354, "learning_rate": 4.183066067992303e-06, "loss": 0.8491, "step": 16840 }, { "epoch": 0.20529413915396147, "grad_norm": 1.8989806175231934, "learning_rate": 4.182745349583067e-06, "loss": 0.8853, "step": 16845 }, { "epoch": 0.20535507537810926, "grad_norm": 1.777017593383789, "learning_rate": 4.18242463117383e-06, "loss": 0.8979, "step": 16850 }, { "epoch": 0.2054160116022571, "grad_norm": 1.928650140762329, "learning_rate": 4.182103912764593e-06, "loss": 0.8565, "step": 16855 }, { "epoch": 0.20547694782640488, "grad_norm": 1.8969041109085083, "learning_rate": 4.181783194355356e-06, "loss": 0.9051, "step": 16860 }, { "epoch": 0.2055378840505527, "grad_norm": 1.9820752143859863, "learning_rate": 4.18146247594612e-06, "loss": 0.8409, "step": 16865 }, { "epoch": 0.2055988202747005, "grad_norm": 2.236576795578003, "learning_rate": 4.181141757536883e-06, "loss": 0.9466, "step": 16870 }, { "epoch": 0.2056597564988483, "grad_norm": 1.8278011083602905, "learning_rate": 4.180821039127646e-06, "loss": 0.8403, "step": 16875 }, { "epoch": 0.20572069272299612, "grad_norm": 2.0311119556427, "learning_rate": 4.18050032071841e-06, "loss": 0.8901, "step": 16880 }, { "epoch": 0.2057816289471439, "grad_norm": 1.88718843460083, "learning_rate": 4.180179602309173e-06, "loss": 0.8704, "step": 16885 }, { "epoch": 0.20584256517129174, "grad_norm": 1.740905523300171, "learning_rate": 4.179858883899936e-06, "loss": 0.8663, "step": 16890 }, { "epoch": 0.20590350139543953, "grad_norm": 1.914090633392334, "learning_rate": 4.1795381654906996e-06, "loss": 0.8284, "step": 16895 }, { "epoch": 0.20596443761958733, "grad_norm": 2.0765748023986816, "learning_rate": 4.1792174470814626e-06, "loss": 0.8844, "step": 16900 }, { "epoch": 0.20602537384373515, "grad_norm": 2.3500592708587646, "learning_rate": 4.178896728672226e-06, "loss": 0.8975, "step": 16905 }, { "epoch": 0.20608631006788294, "grad_norm": 2.377950668334961, "learning_rate": 4.1785760102629894e-06, "loss": 0.8585, "step": 16910 }, { "epoch": 0.20614724629203077, "grad_norm": 1.9568225145339966, "learning_rate": 4.1782552918537525e-06, "loss": 0.9237, "step": 16915 }, { "epoch": 0.20620818251617856, "grad_norm": 1.851586103439331, "learning_rate": 4.177934573444516e-06, "loss": 0.8326, "step": 16920 }, { "epoch": 0.20626911874032638, "grad_norm": 2.236213445663452, "learning_rate": 4.177613855035279e-06, "loss": 0.8416, "step": 16925 }, { "epoch": 0.20633005496447418, "grad_norm": 1.7091834545135498, "learning_rate": 4.177293136626042e-06, "loss": 0.8466, "step": 16930 }, { "epoch": 0.20639099118862198, "grad_norm": 1.848411202430725, "learning_rate": 4.176972418216806e-06, "loss": 0.8837, "step": 16935 }, { "epoch": 0.2064519274127698, "grad_norm": 2.028148889541626, "learning_rate": 4.176651699807569e-06, "loss": 0.9444, "step": 16940 }, { "epoch": 0.2065128636369176, "grad_norm": 1.8120754957199097, "learning_rate": 4.176330981398333e-06, "loss": 0.8228, "step": 16945 }, { "epoch": 0.20657379986106542, "grad_norm": 2.094942808151245, "learning_rate": 4.176010262989096e-06, "loss": 0.8388, "step": 16950 }, { "epoch": 0.2066347360852132, "grad_norm": 1.4686329364776611, "learning_rate": 4.175689544579859e-06, "loss": 0.8051, "step": 16955 }, { "epoch": 0.20669567230936103, "grad_norm": 1.9267277717590332, "learning_rate": 4.175368826170623e-06, "loss": 0.8382, "step": 16960 }, { "epoch": 0.20675660853350883, "grad_norm": 1.8630284070968628, "learning_rate": 4.175048107761386e-06, "loss": 0.8124, "step": 16965 }, { "epoch": 0.20681754475765662, "grad_norm": 2.235525369644165, "learning_rate": 4.174727389352149e-06, "loss": 0.9286, "step": 16970 }, { "epoch": 0.20687848098180445, "grad_norm": 1.8506383895874023, "learning_rate": 4.174406670942913e-06, "loss": 0.8102, "step": 16975 }, { "epoch": 0.20693941720595224, "grad_norm": 1.9298791885375977, "learning_rate": 4.174085952533676e-06, "loss": 0.9384, "step": 16980 }, { "epoch": 0.20700035343010006, "grad_norm": 1.8287118673324585, "learning_rate": 4.173765234124439e-06, "loss": 0.8215, "step": 16985 }, { "epoch": 0.20706128965424786, "grad_norm": 1.986133337020874, "learning_rate": 4.173444515715203e-06, "loss": 0.845, "step": 16990 }, { "epoch": 0.20712222587839568, "grad_norm": 1.7598787546157837, "learning_rate": 4.173123797305966e-06, "loss": 0.8385, "step": 16995 }, { "epoch": 0.20718316210254348, "grad_norm": 1.9064847230911255, "learning_rate": 4.172803078896729e-06, "loss": 0.791, "step": 17000 }, { "epoch": 0.20724409832669127, "grad_norm": 1.8266502618789673, "learning_rate": 4.172482360487493e-06, "loss": 0.9236, "step": 17005 }, { "epoch": 0.2073050345508391, "grad_norm": 1.7115603685379028, "learning_rate": 4.172161642078256e-06, "loss": 0.9233, "step": 17010 }, { "epoch": 0.2073659707749869, "grad_norm": 1.7650386095046997, "learning_rate": 4.171840923669019e-06, "loss": 0.957, "step": 17015 }, { "epoch": 0.2074269069991347, "grad_norm": 1.883495569229126, "learning_rate": 4.171520205259783e-06, "loss": 0.8602, "step": 17020 }, { "epoch": 0.2074878432232825, "grad_norm": 2.656250238418579, "learning_rate": 4.171199486850546e-06, "loss": 0.884, "step": 17025 }, { "epoch": 0.20754877944743033, "grad_norm": 1.7400496006011963, "learning_rate": 4.170878768441309e-06, "loss": 0.8828, "step": 17030 }, { "epoch": 0.20760971567157813, "grad_norm": 1.7304176092147827, "learning_rate": 4.170558050032072e-06, "loss": 0.925, "step": 17035 }, { "epoch": 0.20767065189572592, "grad_norm": 1.8679571151733398, "learning_rate": 4.1702373316228355e-06, "loss": 0.8721, "step": 17040 }, { "epoch": 0.20773158811987374, "grad_norm": 1.9227876663208008, "learning_rate": 4.1699166132135985e-06, "loss": 0.846, "step": 17045 }, { "epoch": 0.20779252434402154, "grad_norm": 2.3377106189727783, "learning_rate": 4.1695958948043615e-06, "loss": 0.869, "step": 17050 }, { "epoch": 0.20785346056816936, "grad_norm": 2.125443935394287, "learning_rate": 4.169275176395125e-06, "loss": 0.8989, "step": 17055 }, { "epoch": 0.20791439679231716, "grad_norm": 1.598902702331543, "learning_rate": 4.168954457985888e-06, "loss": 0.8017, "step": 17060 }, { "epoch": 0.20797533301646498, "grad_norm": 1.8302689790725708, "learning_rate": 4.1686337395766514e-06, "loss": 0.9041, "step": 17065 }, { "epoch": 0.20803626924061278, "grad_norm": 1.719777226448059, "learning_rate": 4.168313021167415e-06, "loss": 0.8797, "step": 17070 }, { "epoch": 0.20809720546476057, "grad_norm": 1.8653956651687622, "learning_rate": 4.167992302758178e-06, "loss": 0.8456, "step": 17075 }, { "epoch": 0.2081581416889084, "grad_norm": 1.7316405773162842, "learning_rate": 4.167671584348942e-06, "loss": 0.9165, "step": 17080 }, { "epoch": 0.2082190779130562, "grad_norm": 1.9624031782150269, "learning_rate": 4.167350865939705e-06, "loss": 0.8535, "step": 17085 }, { "epoch": 0.208280014137204, "grad_norm": 1.8525102138519287, "learning_rate": 4.167030147530468e-06, "loss": 0.8735, "step": 17090 }, { "epoch": 0.2083409503613518, "grad_norm": 1.9875954389572144, "learning_rate": 4.166709429121232e-06, "loss": 0.9068, "step": 17095 }, { "epoch": 0.20840188658549963, "grad_norm": 1.8483600616455078, "learning_rate": 4.166388710711995e-06, "loss": 0.8446, "step": 17100 }, { "epoch": 0.20846282280964742, "grad_norm": 2.1604249477386475, "learning_rate": 4.166067992302759e-06, "loss": 0.819, "step": 17105 }, { "epoch": 0.20852375903379522, "grad_norm": 1.8137258291244507, "learning_rate": 4.165747273893522e-06, "loss": 0.8701, "step": 17110 }, { "epoch": 0.20858469525794304, "grad_norm": 2.144484043121338, "learning_rate": 4.165426555484285e-06, "loss": 0.8115, "step": 17115 }, { "epoch": 0.20864563148209084, "grad_norm": 2.188398838043213, "learning_rate": 4.165105837075049e-06, "loss": 0.9592, "step": 17120 }, { "epoch": 0.20870656770623866, "grad_norm": 1.8355721235275269, "learning_rate": 4.164785118665812e-06, "loss": 0.9399, "step": 17125 }, { "epoch": 0.20876750393038646, "grad_norm": 2.137237071990967, "learning_rate": 4.164464400256575e-06, "loss": 0.9053, "step": 17130 }, { "epoch": 0.20882844015453425, "grad_norm": 1.9933382272720337, "learning_rate": 4.164143681847339e-06, "loss": 0.804, "step": 17135 }, { "epoch": 0.20888937637868207, "grad_norm": 1.8670004606246948, "learning_rate": 4.163822963438102e-06, "loss": 0.8619, "step": 17140 }, { "epoch": 0.20895031260282987, "grad_norm": 1.8269551992416382, "learning_rate": 4.163502245028865e-06, "loss": 0.9147, "step": 17145 }, { "epoch": 0.2090112488269777, "grad_norm": 1.6207104921340942, "learning_rate": 4.163181526619629e-06, "loss": 0.8573, "step": 17150 }, { "epoch": 0.2090721850511255, "grad_norm": 1.8887079954147339, "learning_rate": 4.162860808210392e-06, "loss": 0.9119, "step": 17155 }, { "epoch": 0.2091331212752733, "grad_norm": 2.0683672428131104, "learning_rate": 4.162540089801155e-06, "loss": 0.8935, "step": 17160 }, { "epoch": 0.2091940574994211, "grad_norm": 1.9226617813110352, "learning_rate": 4.1622193713919185e-06, "loss": 0.8831, "step": 17165 }, { "epoch": 0.2092549937235689, "grad_norm": 2.1852941513061523, "learning_rate": 4.1618986529826816e-06, "loss": 0.905, "step": 17170 }, { "epoch": 0.20931592994771672, "grad_norm": 2.1307644844055176, "learning_rate": 4.161577934573445e-06, "loss": 0.8623, "step": 17175 }, { "epoch": 0.20937686617186452, "grad_norm": 2.044948101043701, "learning_rate": 4.1612572161642084e-06, "loss": 0.8796, "step": 17180 }, { "epoch": 0.20943780239601234, "grad_norm": 2.0141305923461914, "learning_rate": 4.1609364977549715e-06, "loss": 0.8776, "step": 17185 }, { "epoch": 0.20949873862016014, "grad_norm": 2.036104917526245, "learning_rate": 4.1606157793457345e-06, "loss": 0.8761, "step": 17190 }, { "epoch": 0.20955967484430796, "grad_norm": 2.27956223487854, "learning_rate": 4.1602950609364975e-06, "loss": 0.9292, "step": 17195 }, { "epoch": 0.20962061106845575, "grad_norm": 2.097045421600342, "learning_rate": 4.159974342527261e-06, "loss": 0.8942, "step": 17200 }, { "epoch": 0.20968154729260355, "grad_norm": 2.027460813522339, "learning_rate": 4.159653624118024e-06, "loss": 0.8818, "step": 17205 }, { "epoch": 0.20974248351675137, "grad_norm": 1.7622114419937134, "learning_rate": 4.159332905708787e-06, "loss": 0.8449, "step": 17210 }, { "epoch": 0.20980341974089917, "grad_norm": 2.00451922416687, "learning_rate": 4.159012187299551e-06, "loss": 0.8763, "step": 17215 }, { "epoch": 0.209864355965047, "grad_norm": 1.7320820093154907, "learning_rate": 4.158691468890314e-06, "loss": 0.8976, "step": 17220 }, { "epoch": 0.20992529218919478, "grad_norm": 1.8527708053588867, "learning_rate": 4.158370750481078e-06, "loss": 0.8562, "step": 17225 }, { "epoch": 0.2099862284133426, "grad_norm": 1.8498002290725708, "learning_rate": 4.158050032071841e-06, "loss": 0.873, "step": 17230 }, { "epoch": 0.2100471646374904, "grad_norm": 2.0738577842712402, "learning_rate": 4.157729313662604e-06, "loss": 0.8662, "step": 17235 }, { "epoch": 0.2101081008616382, "grad_norm": 1.993117094039917, "learning_rate": 4.157408595253368e-06, "loss": 0.9051, "step": 17240 }, { "epoch": 0.21016903708578602, "grad_norm": 1.9805625677108765, "learning_rate": 4.157087876844131e-06, "loss": 0.8616, "step": 17245 }, { "epoch": 0.21022997330993382, "grad_norm": 2.0174074172973633, "learning_rate": 4.156767158434895e-06, "loss": 0.9559, "step": 17250 }, { "epoch": 0.21029090953408164, "grad_norm": 1.8655389547348022, "learning_rate": 4.156446440025658e-06, "loss": 0.8543, "step": 17255 }, { "epoch": 0.21035184575822943, "grad_norm": 1.9586137533187866, "learning_rate": 4.156125721616421e-06, "loss": 0.9028, "step": 17260 }, { "epoch": 0.21041278198237726, "grad_norm": 1.926591157913208, "learning_rate": 4.155805003207185e-06, "loss": 0.919, "step": 17265 }, { "epoch": 0.21047371820652505, "grad_norm": 1.8485149145126343, "learning_rate": 4.155484284797948e-06, "loss": 0.9412, "step": 17270 }, { "epoch": 0.21053465443067285, "grad_norm": 2.2446506023406982, "learning_rate": 4.155163566388712e-06, "loss": 0.855, "step": 17275 }, { "epoch": 0.21059559065482067, "grad_norm": 1.9081284999847412, "learning_rate": 4.154842847979475e-06, "loss": 0.8032, "step": 17280 }, { "epoch": 0.21065652687896846, "grad_norm": 1.8087908029556274, "learning_rate": 4.154522129570238e-06, "loss": 0.8701, "step": 17285 }, { "epoch": 0.2107174631031163, "grad_norm": 2.057039737701416, "learning_rate": 4.154201411161001e-06, "loss": 0.8865, "step": 17290 }, { "epoch": 0.21077839932726408, "grad_norm": 1.789391279220581, "learning_rate": 4.153880692751765e-06, "loss": 0.8543, "step": 17295 }, { "epoch": 0.2108393355514119, "grad_norm": 1.898881435394287, "learning_rate": 4.153559974342528e-06, "loss": 0.8796, "step": 17300 }, { "epoch": 0.2109002717755597, "grad_norm": 2.1199588775634766, "learning_rate": 4.153239255933291e-06, "loss": 0.841, "step": 17305 }, { "epoch": 0.2109612079997075, "grad_norm": 2.360732078552246, "learning_rate": 4.1529185375240545e-06, "loss": 0.8813, "step": 17310 }, { "epoch": 0.21102214422385532, "grad_norm": 2.297603130340576, "learning_rate": 4.1525978191148175e-06, "loss": 0.8814, "step": 17315 }, { "epoch": 0.2110830804480031, "grad_norm": 2.133236885070801, "learning_rate": 4.1522771007055805e-06, "loss": 0.8655, "step": 17320 }, { "epoch": 0.21114401667215094, "grad_norm": 1.8333098888397217, "learning_rate": 4.151956382296344e-06, "loss": 0.8812, "step": 17325 }, { "epoch": 0.21120495289629873, "grad_norm": 2.028838634490967, "learning_rate": 4.151635663887107e-06, "loss": 0.8899, "step": 17330 }, { "epoch": 0.21126588912044655, "grad_norm": 2.1574769020080566, "learning_rate": 4.1513149454778704e-06, "loss": 0.877, "step": 17335 }, { "epoch": 0.21132682534459435, "grad_norm": 2.116297483444214, "learning_rate": 4.150994227068634e-06, "loss": 0.9083, "step": 17340 }, { "epoch": 0.21138776156874214, "grad_norm": 1.9916949272155762, "learning_rate": 4.150673508659397e-06, "loss": 0.8589, "step": 17345 }, { "epoch": 0.21144869779288997, "grad_norm": 2.3868119716644287, "learning_rate": 4.15035279025016e-06, "loss": 0.8705, "step": 17350 }, { "epoch": 0.21150963401703776, "grad_norm": 1.9119997024536133, "learning_rate": 4.150032071840924e-06, "loss": 0.8501, "step": 17355 }, { "epoch": 0.21157057024118558, "grad_norm": 1.9646400213241577, "learning_rate": 4.149711353431687e-06, "loss": 0.8694, "step": 17360 }, { "epoch": 0.21163150646533338, "grad_norm": 1.8413679599761963, "learning_rate": 4.14939063502245e-06, "loss": 0.9314, "step": 17365 }, { "epoch": 0.2116924426894812, "grad_norm": 1.8354178667068481, "learning_rate": 4.149069916613214e-06, "loss": 0.841, "step": 17370 }, { "epoch": 0.211753378913629, "grad_norm": 1.973196029663086, "learning_rate": 4.148749198203977e-06, "loss": 0.9169, "step": 17375 }, { "epoch": 0.2118143151377768, "grad_norm": 2.2381327152252197, "learning_rate": 4.14842847979474e-06, "loss": 0.9323, "step": 17380 }, { "epoch": 0.21187525136192462, "grad_norm": 2.3496038913726807, "learning_rate": 4.148107761385504e-06, "loss": 0.8779, "step": 17385 }, { "epoch": 0.2119361875860724, "grad_norm": 1.9139381647109985, "learning_rate": 4.147787042976267e-06, "loss": 0.8892, "step": 17390 }, { "epoch": 0.21199712381022023, "grad_norm": 2.156839370727539, "learning_rate": 4.147466324567031e-06, "loss": 0.955, "step": 17395 }, { "epoch": 0.21205806003436803, "grad_norm": 1.7961173057556152, "learning_rate": 4.147145606157794e-06, "loss": 0.8998, "step": 17400 }, { "epoch": 0.21211899625851582, "grad_norm": 1.8968576192855835, "learning_rate": 4.146824887748557e-06, "loss": 0.8312, "step": 17405 }, { "epoch": 0.21217993248266365, "grad_norm": 2.427314281463623, "learning_rate": 4.146504169339321e-06, "loss": 0.8911, "step": 17410 }, { "epoch": 0.21224086870681144, "grad_norm": 1.7443344593048096, "learning_rate": 4.146183450930084e-06, "loss": 0.8843, "step": 17415 }, { "epoch": 0.21230180493095926, "grad_norm": 1.8356901407241821, "learning_rate": 4.145862732520848e-06, "loss": 0.8585, "step": 17420 }, { "epoch": 0.21236274115510706, "grad_norm": 2.053771495819092, "learning_rate": 4.145542014111611e-06, "loss": 0.8566, "step": 17425 }, { "epoch": 0.21242367737925488, "grad_norm": 1.8725169897079468, "learning_rate": 4.145221295702374e-06, "loss": 0.8422, "step": 17430 }, { "epoch": 0.21248461360340268, "grad_norm": 2.3797895908355713, "learning_rate": 4.1449005772931375e-06, "loss": 0.856, "step": 17435 }, { "epoch": 0.21254554982755047, "grad_norm": 1.8971103429794312, "learning_rate": 4.1445798588839006e-06, "loss": 0.9365, "step": 17440 }, { "epoch": 0.2126064860516983, "grad_norm": 2.1803407669067383, "learning_rate": 4.144259140474664e-06, "loss": 0.8252, "step": 17445 }, { "epoch": 0.2126674222758461, "grad_norm": 2.354532480239868, "learning_rate": 4.143938422065427e-06, "loss": 0.8388, "step": 17450 }, { "epoch": 0.2127283584999939, "grad_norm": 1.9926872253417969, "learning_rate": 4.1436177036561905e-06, "loss": 0.8849, "step": 17455 }, { "epoch": 0.2127892947241417, "grad_norm": 2.002636194229126, "learning_rate": 4.1432969852469535e-06, "loss": 0.8201, "step": 17460 }, { "epoch": 0.21285023094828953, "grad_norm": 2.202286958694458, "learning_rate": 4.1429762668377165e-06, "loss": 0.8128, "step": 17465 }, { "epoch": 0.21291116717243733, "grad_norm": 2.101954936981201, "learning_rate": 4.14265554842848e-06, "loss": 0.8435, "step": 17470 }, { "epoch": 0.21297210339658512, "grad_norm": 1.7851638793945312, "learning_rate": 4.142334830019243e-06, "loss": 0.891, "step": 17475 }, { "epoch": 0.21303303962073294, "grad_norm": 2.1725809574127197, "learning_rate": 4.142014111610006e-06, "loss": 0.8127, "step": 17480 }, { "epoch": 0.21309397584488074, "grad_norm": 1.99630606174469, "learning_rate": 4.14169339320077e-06, "loss": 0.9396, "step": 17485 }, { "epoch": 0.21315491206902856, "grad_norm": 2.403376817703247, "learning_rate": 4.141372674791533e-06, "loss": 0.9089, "step": 17490 }, { "epoch": 0.21321584829317636, "grad_norm": 1.948241949081421, "learning_rate": 4.141051956382296e-06, "loss": 0.894, "step": 17495 }, { "epoch": 0.21327678451732418, "grad_norm": 1.9280542135238647, "learning_rate": 4.14073123797306e-06, "loss": 0.927, "step": 17500 }, { "epoch": 0.21333772074147198, "grad_norm": 2.084998369216919, "learning_rate": 4.140410519563823e-06, "loss": 0.8328, "step": 17505 }, { "epoch": 0.21339865696561977, "grad_norm": 1.736448884010315, "learning_rate": 4.140089801154586e-06, "loss": 0.9094, "step": 17510 }, { "epoch": 0.2134595931897676, "grad_norm": 1.896498680114746, "learning_rate": 4.13976908274535e-06, "loss": 0.8423, "step": 17515 }, { "epoch": 0.2135205294139154, "grad_norm": 1.9065732955932617, "learning_rate": 4.139448364336113e-06, "loss": 0.9041, "step": 17520 }, { "epoch": 0.2135814656380632, "grad_norm": 2.11919903755188, "learning_rate": 4.139127645926876e-06, "loss": 0.9393, "step": 17525 }, { "epoch": 0.213642401862211, "grad_norm": 1.795918583869934, "learning_rate": 4.13880692751764e-06, "loss": 0.8043, "step": 17530 }, { "epoch": 0.21370333808635883, "grad_norm": 1.86165189743042, "learning_rate": 4.138486209108403e-06, "loss": 0.894, "step": 17535 }, { "epoch": 0.21376427431050662, "grad_norm": 2.2190158367156982, "learning_rate": 4.138165490699166e-06, "loss": 0.8936, "step": 17540 }, { "epoch": 0.21382521053465442, "grad_norm": 2.2312827110290527, "learning_rate": 4.13784477228993e-06, "loss": 0.8626, "step": 17545 }, { "epoch": 0.21388614675880224, "grad_norm": 1.7725579738616943, "learning_rate": 4.137524053880693e-06, "loss": 0.935, "step": 17550 }, { "epoch": 0.21394708298295004, "grad_norm": 1.6796648502349854, "learning_rate": 4.137203335471457e-06, "loss": 0.8639, "step": 17555 }, { "epoch": 0.21400801920709786, "grad_norm": 1.8138118982315063, "learning_rate": 4.13688261706222e-06, "loss": 0.8571, "step": 17560 }, { "epoch": 0.21406895543124566, "grad_norm": 2.229142427444458, "learning_rate": 4.136561898652984e-06, "loss": 0.8369, "step": 17565 }, { "epoch": 0.21412989165539348, "grad_norm": 2.2302677631378174, "learning_rate": 4.136241180243747e-06, "loss": 0.9244, "step": 17570 }, { "epoch": 0.21419082787954127, "grad_norm": 2.193307876586914, "learning_rate": 4.13592046183451e-06, "loss": 0.9867, "step": 17575 }, { "epoch": 0.21425176410368907, "grad_norm": 2.0120599269866943, "learning_rate": 4.1355997434252735e-06, "loss": 0.9404, "step": 17580 }, { "epoch": 0.2143127003278369, "grad_norm": 2.2766213417053223, "learning_rate": 4.1352790250160365e-06, "loss": 0.8543, "step": 17585 }, { "epoch": 0.2143736365519847, "grad_norm": 2.383143663406372, "learning_rate": 4.1349583066067995e-06, "loss": 0.8073, "step": 17590 }, { "epoch": 0.2144345727761325, "grad_norm": 1.9638217687606812, "learning_rate": 4.134637588197563e-06, "loss": 0.8848, "step": 17595 }, { "epoch": 0.2144955090002803, "grad_norm": 2.044341564178467, "learning_rate": 4.134316869788326e-06, "loss": 0.9349, "step": 17600 }, { "epoch": 0.21455644522442813, "grad_norm": 2.125574827194214, "learning_rate": 4.1339961513790894e-06, "loss": 0.821, "step": 17605 }, { "epoch": 0.21461738144857592, "grad_norm": 2.392261028289795, "learning_rate": 4.133675432969853e-06, "loss": 0.9057, "step": 17610 }, { "epoch": 0.21467831767272372, "grad_norm": 1.722838282585144, "learning_rate": 4.133354714560616e-06, "loss": 0.9152, "step": 17615 }, { "epoch": 0.21473925389687154, "grad_norm": 2.1045022010803223, "learning_rate": 4.133033996151379e-06, "loss": 0.9272, "step": 17620 }, { "epoch": 0.21480019012101934, "grad_norm": 1.9755399227142334, "learning_rate": 4.132713277742142e-06, "loss": 0.8421, "step": 17625 }, { "epoch": 0.21486112634516716, "grad_norm": 1.989750862121582, "learning_rate": 4.132392559332906e-06, "loss": 0.8789, "step": 17630 }, { "epoch": 0.21492206256931495, "grad_norm": 2.0002458095550537, "learning_rate": 4.132071840923669e-06, "loss": 0.8986, "step": 17635 }, { "epoch": 0.21498299879346275, "grad_norm": 1.9932528734207153, "learning_rate": 4.131751122514432e-06, "loss": 0.8961, "step": 17640 }, { "epoch": 0.21504393501761057, "grad_norm": 1.9664937257766724, "learning_rate": 4.131430404105196e-06, "loss": 0.9412, "step": 17645 }, { "epoch": 0.21510487124175837, "grad_norm": 1.8221852779388428, "learning_rate": 4.131109685695959e-06, "loss": 0.8655, "step": 17650 }, { "epoch": 0.2151658074659062, "grad_norm": 1.9679737091064453, "learning_rate": 4.130788967286722e-06, "loss": 0.9447, "step": 17655 }, { "epoch": 0.21522674369005398, "grad_norm": 1.8357226848602295, "learning_rate": 4.130468248877486e-06, "loss": 0.8637, "step": 17660 }, { "epoch": 0.2152876799142018, "grad_norm": 2.0051493644714355, "learning_rate": 4.130147530468249e-06, "loss": 0.8523, "step": 17665 }, { "epoch": 0.2153486161383496, "grad_norm": 2.300915002822876, "learning_rate": 4.129826812059012e-06, "loss": 0.9554, "step": 17670 }, { "epoch": 0.2154095523624974, "grad_norm": 2.108787775039673, "learning_rate": 4.129506093649776e-06, "loss": 0.9364, "step": 17675 }, { "epoch": 0.21547048858664522, "grad_norm": 2.1220128536224365, "learning_rate": 4.129185375240539e-06, "loss": 0.9253, "step": 17680 }, { "epoch": 0.21553142481079302, "grad_norm": 2.390918731689453, "learning_rate": 4.128864656831302e-06, "loss": 0.8977, "step": 17685 }, { "epoch": 0.21559236103494084, "grad_norm": 1.7909048795700073, "learning_rate": 4.128543938422066e-06, "loss": 0.8834, "step": 17690 }, { "epoch": 0.21565329725908863, "grad_norm": 1.95281183719635, "learning_rate": 4.128223220012829e-06, "loss": 0.9209, "step": 17695 }, { "epoch": 0.21571423348323646, "grad_norm": 2.229186773300171, "learning_rate": 4.127902501603593e-06, "loss": 0.8533, "step": 17700 }, { "epoch": 0.21577516970738425, "grad_norm": 2.0130181312561035, "learning_rate": 4.127581783194356e-06, "loss": 0.876, "step": 17705 }, { "epoch": 0.21583610593153205, "grad_norm": 2.056150197982788, "learning_rate": 4.127261064785119e-06, "loss": 0.8399, "step": 17710 }, { "epoch": 0.21589704215567987, "grad_norm": 2.0718533992767334, "learning_rate": 4.126940346375883e-06, "loss": 0.9136, "step": 17715 }, { "epoch": 0.21595797837982766, "grad_norm": 2.009554147720337, "learning_rate": 4.126619627966646e-06, "loss": 0.8881, "step": 17720 }, { "epoch": 0.2160189146039755, "grad_norm": 2.1057045459747314, "learning_rate": 4.1262989095574095e-06, "loss": 0.9419, "step": 17725 }, { "epoch": 0.21607985082812328, "grad_norm": 1.7294977903366089, "learning_rate": 4.1259781911481725e-06, "loss": 0.9207, "step": 17730 }, { "epoch": 0.2161407870522711, "grad_norm": 2.01702880859375, "learning_rate": 4.1256574727389355e-06, "loss": 0.8886, "step": 17735 }, { "epoch": 0.2162017232764189, "grad_norm": 1.8389573097229004, "learning_rate": 4.125336754329699e-06, "loss": 0.9611, "step": 17740 }, { "epoch": 0.2162626595005667, "grad_norm": 2.0945777893066406, "learning_rate": 4.125016035920462e-06, "loss": 0.9329, "step": 17745 }, { "epoch": 0.21632359572471452, "grad_norm": 2.0709526538848877, "learning_rate": 4.124695317511225e-06, "loss": 0.9237, "step": 17750 }, { "epoch": 0.2163845319488623, "grad_norm": 2.870168685913086, "learning_rate": 4.124374599101989e-06, "loss": 0.9584, "step": 17755 }, { "epoch": 0.21644546817301014, "grad_norm": 1.7942447662353516, "learning_rate": 4.124053880692752e-06, "loss": 0.9291, "step": 17760 }, { "epoch": 0.21650640439715793, "grad_norm": 1.8725335597991943, "learning_rate": 4.123733162283515e-06, "loss": 0.8302, "step": 17765 }, { "epoch": 0.21656734062130575, "grad_norm": 2.255483627319336, "learning_rate": 4.123412443874279e-06, "loss": 0.8652, "step": 17770 }, { "epoch": 0.21662827684545355, "grad_norm": 1.8293601274490356, "learning_rate": 4.123091725465042e-06, "loss": 0.9349, "step": 17775 }, { "epoch": 0.21668921306960134, "grad_norm": 2.054145574569702, "learning_rate": 4.122771007055805e-06, "loss": 0.9391, "step": 17780 }, { "epoch": 0.21675014929374917, "grad_norm": 2.7492687702178955, "learning_rate": 4.122450288646568e-06, "loss": 0.8728, "step": 17785 }, { "epoch": 0.21681108551789696, "grad_norm": 2.17423677444458, "learning_rate": 4.122129570237332e-06, "loss": 0.9491, "step": 17790 }, { "epoch": 0.21687202174204478, "grad_norm": 1.8603415489196777, "learning_rate": 4.121808851828095e-06, "loss": 0.8641, "step": 17795 }, { "epoch": 0.21693295796619258, "grad_norm": 1.8843415975570679, "learning_rate": 4.121488133418858e-06, "loss": 0.897, "step": 17800 }, { "epoch": 0.2169938941903404, "grad_norm": 1.7402758598327637, "learning_rate": 4.121167415009622e-06, "loss": 0.9458, "step": 17805 }, { "epoch": 0.2170548304144882, "grad_norm": 2.1321558952331543, "learning_rate": 4.120846696600385e-06, "loss": 0.8994, "step": 17810 }, { "epoch": 0.217115766638636, "grad_norm": 1.8353912830352783, "learning_rate": 4.120525978191148e-06, "loss": 0.9153, "step": 17815 }, { "epoch": 0.21717670286278382, "grad_norm": 1.9356993436813354, "learning_rate": 4.120205259781912e-06, "loss": 0.8627, "step": 17820 }, { "epoch": 0.2172376390869316, "grad_norm": 1.7653886079788208, "learning_rate": 4.119884541372675e-06, "loss": 0.8679, "step": 17825 }, { "epoch": 0.21729857531107943, "grad_norm": 2.136221408843994, "learning_rate": 4.119563822963438e-06, "loss": 0.8783, "step": 17830 }, { "epoch": 0.21735951153522723, "grad_norm": 2.0547242164611816, "learning_rate": 4.119243104554202e-06, "loss": 0.9094, "step": 17835 }, { "epoch": 0.21742044775937505, "grad_norm": 2.041372537612915, "learning_rate": 4.118922386144965e-06, "loss": 0.8793, "step": 17840 }, { "epoch": 0.21748138398352285, "grad_norm": 2.196425199508667, "learning_rate": 4.118601667735729e-06, "loss": 0.9272, "step": 17845 }, { "epoch": 0.21754232020767064, "grad_norm": 2.089946746826172, "learning_rate": 4.118280949326492e-06, "loss": 0.8926, "step": 17850 }, { "epoch": 0.21760325643181846, "grad_norm": 1.9034703969955444, "learning_rate": 4.117960230917255e-06, "loss": 0.806, "step": 17855 }, { "epoch": 0.21766419265596626, "grad_norm": 1.9723349809646606, "learning_rate": 4.1176395125080185e-06, "loss": 0.8829, "step": 17860 }, { "epoch": 0.21772512888011408, "grad_norm": 1.8899086713790894, "learning_rate": 4.1173187940987816e-06, "loss": 0.8585, "step": 17865 }, { "epoch": 0.21778606510426188, "grad_norm": 1.6727169752120972, "learning_rate": 4.116998075689545e-06, "loss": 0.9663, "step": 17870 }, { "epoch": 0.21784700132840967, "grad_norm": 2.1939642429351807, "learning_rate": 4.1166773572803084e-06, "loss": 0.8672, "step": 17875 }, { "epoch": 0.2179079375525575, "grad_norm": 1.750778317451477, "learning_rate": 4.1163566388710715e-06, "loss": 0.8755, "step": 17880 }, { "epoch": 0.2179688737767053, "grad_norm": 2.0561065673828125, "learning_rate": 4.116035920461835e-06, "loss": 0.889, "step": 17885 }, { "epoch": 0.2180298100008531, "grad_norm": 1.7558873891830444, "learning_rate": 4.115715202052598e-06, "loss": 0.8801, "step": 17890 }, { "epoch": 0.2180907462250009, "grad_norm": 2.0563879013061523, "learning_rate": 4.115394483643361e-06, "loss": 0.8241, "step": 17895 }, { "epoch": 0.21815168244914873, "grad_norm": 1.787316083908081, "learning_rate": 4.115073765234125e-06, "loss": 0.8709, "step": 17900 }, { "epoch": 0.21821261867329653, "grad_norm": 1.9721559286117554, "learning_rate": 4.114753046824888e-06, "loss": 0.9565, "step": 17905 }, { "epoch": 0.21827355489744432, "grad_norm": 1.8141756057739258, "learning_rate": 4.114432328415651e-06, "loss": 0.9182, "step": 17910 }, { "epoch": 0.21833449112159214, "grad_norm": 2.1122922897338867, "learning_rate": 4.114111610006415e-06, "loss": 0.892, "step": 17915 }, { "epoch": 0.21839542734573994, "grad_norm": 1.962934136390686, "learning_rate": 4.113790891597178e-06, "loss": 0.9193, "step": 17920 }, { "epoch": 0.21845636356988776, "grad_norm": 2.033832550048828, "learning_rate": 4.113470173187941e-06, "loss": 0.854, "step": 17925 }, { "epoch": 0.21851729979403556, "grad_norm": 2.113100290298462, "learning_rate": 4.113149454778705e-06, "loss": 0.8857, "step": 17930 }, { "epoch": 0.21857823601818338, "grad_norm": 1.9407566785812378, "learning_rate": 4.112828736369468e-06, "loss": 0.8324, "step": 17935 }, { "epoch": 0.21863917224233118, "grad_norm": 1.7969400882720947, "learning_rate": 4.112508017960231e-06, "loss": 0.8104, "step": 17940 }, { "epoch": 0.21870010846647897, "grad_norm": 1.794188380241394, "learning_rate": 4.112187299550995e-06, "loss": 0.9647, "step": 17945 }, { "epoch": 0.2187610446906268, "grad_norm": 2.0103251934051514, "learning_rate": 4.111866581141758e-06, "loss": 0.8818, "step": 17950 }, { "epoch": 0.2188219809147746, "grad_norm": 1.8841632604599, "learning_rate": 4.111545862732521e-06, "loss": 0.9533, "step": 17955 }, { "epoch": 0.2188829171389224, "grad_norm": 2.1026952266693115, "learning_rate": 4.111225144323284e-06, "loss": 0.9081, "step": 17960 }, { "epoch": 0.2189438533630702, "grad_norm": 1.9672614336013794, "learning_rate": 4.110904425914048e-06, "loss": 0.8886, "step": 17965 }, { "epoch": 0.21900478958721803, "grad_norm": 1.8586525917053223, "learning_rate": 4.110583707504811e-06, "loss": 0.8214, "step": 17970 }, { "epoch": 0.21906572581136582, "grad_norm": 2.181394338607788, "learning_rate": 4.110262989095574e-06, "loss": 0.8864, "step": 17975 }, { "epoch": 0.21912666203551362, "grad_norm": 2.1164302825927734, "learning_rate": 4.109942270686338e-06, "loss": 0.7892, "step": 17980 }, { "epoch": 0.21918759825966144, "grad_norm": 1.9844961166381836, "learning_rate": 4.109621552277101e-06, "loss": 0.9089, "step": 17985 }, { "epoch": 0.21924853448380924, "grad_norm": 2.1954047679901123, "learning_rate": 4.109300833867864e-06, "loss": 0.9561, "step": 17990 }, { "epoch": 0.21930947070795706, "grad_norm": 1.9202206134796143, "learning_rate": 4.108980115458628e-06, "loss": 0.8641, "step": 17995 }, { "epoch": 0.21937040693210486, "grad_norm": 2.2452640533447266, "learning_rate": 4.108659397049391e-06, "loss": 0.9006, "step": 18000 }, { "epoch": 0.21943134315625268, "grad_norm": 2.3391823768615723, "learning_rate": 4.1083386786401545e-06, "loss": 0.9248, "step": 18005 }, { "epoch": 0.21949227938040047, "grad_norm": 2.1901357173919678, "learning_rate": 4.1080179602309175e-06, "loss": 0.8884, "step": 18010 }, { "epoch": 0.21955321560454827, "grad_norm": 1.706552505493164, "learning_rate": 4.1076972418216805e-06, "loss": 0.8994, "step": 18015 }, { "epoch": 0.2196141518286961, "grad_norm": 2.522939682006836, "learning_rate": 4.107376523412444e-06, "loss": 0.8812, "step": 18020 }, { "epoch": 0.2196750880528439, "grad_norm": 2.4288995265960693, "learning_rate": 4.107055805003207e-06, "loss": 0.9188, "step": 18025 }, { "epoch": 0.2197360242769917, "grad_norm": 1.8150837421417236, "learning_rate": 4.106735086593971e-06, "loss": 0.8255, "step": 18030 }, { "epoch": 0.2197969605011395, "grad_norm": 2.0974607467651367, "learning_rate": 4.106414368184734e-06, "loss": 0.8917, "step": 18035 }, { "epoch": 0.21985789672528733, "grad_norm": 2.1310765743255615, "learning_rate": 4.106093649775497e-06, "loss": 0.9145, "step": 18040 }, { "epoch": 0.21991883294943512, "grad_norm": 1.8272844552993774, "learning_rate": 4.105772931366261e-06, "loss": 0.8481, "step": 18045 }, { "epoch": 0.21997976917358292, "grad_norm": 2.091437578201294, "learning_rate": 4.105452212957024e-06, "loss": 0.8111, "step": 18050 }, { "epoch": 0.22004070539773074, "grad_norm": 2.8782575130462646, "learning_rate": 4.105131494547787e-06, "loss": 0.9049, "step": 18055 }, { "epoch": 0.22010164162187854, "grad_norm": 2.008329153060913, "learning_rate": 4.104810776138551e-06, "loss": 0.8432, "step": 18060 }, { "epoch": 0.22016257784602636, "grad_norm": 2.082728862762451, "learning_rate": 4.104490057729314e-06, "loss": 0.8443, "step": 18065 }, { "epoch": 0.22022351407017415, "grad_norm": 1.8102703094482422, "learning_rate": 4.104169339320077e-06, "loss": 0.871, "step": 18070 }, { "epoch": 0.22028445029432198, "grad_norm": 1.8688764572143555, "learning_rate": 4.103848620910841e-06, "loss": 0.8655, "step": 18075 }, { "epoch": 0.22034538651846977, "grad_norm": 2.3001155853271484, "learning_rate": 4.103527902501604e-06, "loss": 0.8688, "step": 18080 }, { "epoch": 0.22040632274261757, "grad_norm": 1.7593904733657837, "learning_rate": 4.103207184092367e-06, "loss": 0.9181, "step": 18085 }, { "epoch": 0.2204672589667654, "grad_norm": 1.7394533157348633, "learning_rate": 4.102886465683131e-06, "loss": 0.8796, "step": 18090 }, { "epoch": 0.22052819519091318, "grad_norm": 2.044811487197876, "learning_rate": 4.102565747273894e-06, "loss": 0.9524, "step": 18095 }, { "epoch": 0.220589131415061, "grad_norm": 2.0522594451904297, "learning_rate": 4.102245028864657e-06, "loss": 0.91, "step": 18100 }, { "epoch": 0.2206500676392088, "grad_norm": 2.03743577003479, "learning_rate": 4.101924310455421e-06, "loss": 0.8568, "step": 18105 }, { "epoch": 0.2207110038633566, "grad_norm": 2.0433199405670166, "learning_rate": 4.101603592046184e-06, "loss": 0.8392, "step": 18110 }, { "epoch": 0.22077194008750442, "grad_norm": 2.2596683502197266, "learning_rate": 4.101282873636947e-06, "loss": 0.9141, "step": 18115 }, { "epoch": 0.22083287631165222, "grad_norm": 1.8860251903533936, "learning_rate": 4.10096215522771e-06, "loss": 0.8862, "step": 18120 }, { "epoch": 0.22089381253580004, "grad_norm": 1.7525330781936646, "learning_rate": 4.100641436818474e-06, "loss": 0.8937, "step": 18125 }, { "epoch": 0.22095474875994783, "grad_norm": 2.199387311935425, "learning_rate": 4.100320718409237e-06, "loss": 0.8561, "step": 18130 }, { "epoch": 0.22101568498409566, "grad_norm": 2.1044132709503174, "learning_rate": 4.1e-06, "loss": 0.8566, "step": 18135 }, { "epoch": 0.22107662120824345, "grad_norm": 2.073502779006958, "learning_rate": 4.0996792815907636e-06, "loss": 0.9606, "step": 18140 }, { "epoch": 0.22113755743239125, "grad_norm": 1.809332251548767, "learning_rate": 4.099358563181527e-06, "loss": 0.817, "step": 18145 }, { "epoch": 0.22119849365653907, "grad_norm": 2.031007766723633, "learning_rate": 4.0990378447722905e-06, "loss": 0.9155, "step": 18150 }, { "epoch": 0.22125942988068686, "grad_norm": 2.363571882247925, "learning_rate": 4.0987171263630535e-06, "loss": 0.8229, "step": 18155 }, { "epoch": 0.2213203661048347, "grad_norm": 2.4070169925689697, "learning_rate": 4.0983964079538165e-06, "loss": 0.8784, "step": 18160 }, { "epoch": 0.22138130232898248, "grad_norm": 2.4995505809783936, "learning_rate": 4.09807568954458e-06, "loss": 0.8592, "step": 18165 }, { "epoch": 0.2214422385531303, "grad_norm": 1.9970680475234985, "learning_rate": 4.097754971135343e-06, "loss": 0.9374, "step": 18170 }, { "epoch": 0.2215031747772781, "grad_norm": 1.9879381656646729, "learning_rate": 4.097434252726107e-06, "loss": 0.8682, "step": 18175 }, { "epoch": 0.2215641110014259, "grad_norm": 1.9312009811401367, "learning_rate": 4.09711353431687e-06, "loss": 0.7983, "step": 18180 }, { "epoch": 0.22162504722557372, "grad_norm": 1.6004709005355835, "learning_rate": 4.096792815907633e-06, "loss": 0.908, "step": 18185 }, { "epoch": 0.2216859834497215, "grad_norm": 1.9038137197494507, "learning_rate": 4.096472097498397e-06, "loss": 0.9193, "step": 18190 }, { "epoch": 0.22174691967386934, "grad_norm": 1.7515835762023926, "learning_rate": 4.09615137908916e-06, "loss": 0.8509, "step": 18195 }, { "epoch": 0.22180785589801713, "grad_norm": 1.9004697799682617, "learning_rate": 4.095830660679924e-06, "loss": 0.9948, "step": 18200 }, { "epoch": 0.22186879212216495, "grad_norm": 1.9074057340621948, "learning_rate": 4.095509942270687e-06, "loss": 0.8838, "step": 18205 }, { "epoch": 0.22192972834631275, "grad_norm": 1.8511967658996582, "learning_rate": 4.09518922386145e-06, "loss": 0.8586, "step": 18210 }, { "epoch": 0.22199066457046054, "grad_norm": 2.006120443344116, "learning_rate": 4.094868505452213e-06, "loss": 0.8528, "step": 18215 }, { "epoch": 0.22205160079460837, "grad_norm": 2.056440830230713, "learning_rate": 4.094547787042977e-06, "loss": 0.8994, "step": 18220 }, { "epoch": 0.22211253701875616, "grad_norm": 2.2527143955230713, "learning_rate": 4.09422706863374e-06, "loss": 0.9037, "step": 18225 }, { "epoch": 0.22217347324290398, "grad_norm": 2.174570322036743, "learning_rate": 4.093906350224503e-06, "loss": 0.8539, "step": 18230 }, { "epoch": 0.22223440946705178, "grad_norm": 1.7675323486328125, "learning_rate": 4.093585631815267e-06, "loss": 0.875, "step": 18235 }, { "epoch": 0.2222953456911996, "grad_norm": 2.085939407348633, "learning_rate": 4.09326491340603e-06, "loss": 0.9273, "step": 18240 }, { "epoch": 0.2223562819153474, "grad_norm": 2.0107383728027344, "learning_rate": 4.092944194996793e-06, "loss": 0.9219, "step": 18245 }, { "epoch": 0.2224172181394952, "grad_norm": 2.061772584915161, "learning_rate": 4.092623476587557e-06, "loss": 0.8617, "step": 18250 }, { "epoch": 0.22247815436364302, "grad_norm": 2.293616533279419, "learning_rate": 4.09230275817832e-06, "loss": 0.8798, "step": 18255 }, { "epoch": 0.2225390905877908, "grad_norm": 1.692225456237793, "learning_rate": 4.091982039769083e-06, "loss": 0.942, "step": 18260 }, { "epoch": 0.22260002681193863, "grad_norm": 1.9439769983291626, "learning_rate": 4.091661321359847e-06, "loss": 0.863, "step": 18265 }, { "epoch": 0.22266096303608643, "grad_norm": 2.005455732345581, "learning_rate": 4.09134060295061e-06, "loss": 0.8498, "step": 18270 }, { "epoch": 0.22272189926023425, "grad_norm": 2.5364437103271484, "learning_rate": 4.091019884541373e-06, "loss": 0.9771, "step": 18275 }, { "epoch": 0.22278283548438205, "grad_norm": 1.8755429983139038, "learning_rate": 4.0906991661321365e-06, "loss": 0.8711, "step": 18280 }, { "epoch": 0.22284377170852984, "grad_norm": 2.207592487335205, "learning_rate": 4.0903784477228995e-06, "loss": 0.887, "step": 18285 }, { "epoch": 0.22290470793267766, "grad_norm": 1.986769199371338, "learning_rate": 4.0900577293136625e-06, "loss": 0.8697, "step": 18290 }, { "epoch": 0.22296564415682546, "grad_norm": 1.8161230087280273, "learning_rate": 4.0897370109044256e-06, "loss": 0.8622, "step": 18295 }, { "epoch": 0.22302658038097328, "grad_norm": 1.7536661624908447, "learning_rate": 4.0894162924951894e-06, "loss": 0.8596, "step": 18300 }, { "epoch": 0.22308751660512108, "grad_norm": 2.548983573913574, "learning_rate": 4.0890955740859524e-06, "loss": 0.8621, "step": 18305 }, { "epoch": 0.2231484528292689, "grad_norm": 1.9488954544067383, "learning_rate": 4.088774855676716e-06, "loss": 0.8959, "step": 18310 }, { "epoch": 0.2232093890534167, "grad_norm": 2.1989078521728516, "learning_rate": 4.088454137267479e-06, "loss": 0.8727, "step": 18315 }, { "epoch": 0.2232703252775645, "grad_norm": 1.8675305843353271, "learning_rate": 4.088133418858243e-06, "loss": 0.9067, "step": 18320 }, { "epoch": 0.2233312615017123, "grad_norm": 2.052051544189453, "learning_rate": 4.087812700449006e-06, "loss": 0.9113, "step": 18325 }, { "epoch": 0.2233921977258601, "grad_norm": 2.1658780574798584, "learning_rate": 4.087491982039769e-06, "loss": 0.8636, "step": 18330 }, { "epoch": 0.22345313395000793, "grad_norm": 2.089599370956421, "learning_rate": 4.087171263630533e-06, "loss": 0.87, "step": 18335 }, { "epoch": 0.22351407017415573, "grad_norm": 1.7273662090301514, "learning_rate": 4.086850545221296e-06, "loss": 0.9193, "step": 18340 }, { "epoch": 0.22357500639830352, "grad_norm": 2.1278719902038574, "learning_rate": 4.08652982681206e-06, "loss": 0.9028, "step": 18345 }, { "epoch": 0.22363594262245134, "grad_norm": 1.9058865308761597, "learning_rate": 4.086209108402823e-06, "loss": 0.8993, "step": 18350 }, { "epoch": 0.22369687884659914, "grad_norm": 1.884377360343933, "learning_rate": 4.085888389993586e-06, "loss": 0.8174, "step": 18355 }, { "epoch": 0.22375781507074696, "grad_norm": 1.8960529565811157, "learning_rate": 4.08556767158435e-06, "loss": 0.9158, "step": 18360 }, { "epoch": 0.22381875129489476, "grad_norm": 1.9552644491195679, "learning_rate": 4.085246953175113e-06, "loss": 0.8938, "step": 18365 }, { "epoch": 0.22387968751904258, "grad_norm": 2.1133878231048584, "learning_rate": 4.084926234765876e-06, "loss": 0.892, "step": 18370 }, { "epoch": 0.22394062374319038, "grad_norm": 1.9535918235778809, "learning_rate": 4.084605516356639e-06, "loss": 0.8839, "step": 18375 }, { "epoch": 0.22400155996733817, "grad_norm": 1.9596607685089111, "learning_rate": 4.084284797947403e-06, "loss": 0.9071, "step": 18380 }, { "epoch": 0.224062496191486, "grad_norm": 2.1998846530914307, "learning_rate": 4.083964079538166e-06, "loss": 0.8953, "step": 18385 }, { "epoch": 0.2241234324156338, "grad_norm": 1.8979617357254028, "learning_rate": 4.083643361128929e-06, "loss": 0.8751, "step": 18390 }, { "epoch": 0.2241843686397816, "grad_norm": 2.276129722595215, "learning_rate": 4.083322642719693e-06, "loss": 0.8333, "step": 18395 }, { "epoch": 0.2242453048639294, "grad_norm": 1.975268006324768, "learning_rate": 4.083001924310456e-06, "loss": 0.8709, "step": 18400 }, { "epoch": 0.22430624108807723, "grad_norm": 1.9017359018325806, "learning_rate": 4.082681205901219e-06, "loss": 0.8443, "step": 18405 }, { "epoch": 0.22436717731222502, "grad_norm": 1.8970489501953125, "learning_rate": 4.0823604874919826e-06, "loss": 0.8698, "step": 18410 }, { "epoch": 0.22442811353637282, "grad_norm": 1.9006593227386475, "learning_rate": 4.082039769082746e-06, "loss": 0.8481, "step": 18415 }, { "epoch": 0.22448904976052064, "grad_norm": 2.1589481830596924, "learning_rate": 4.081719050673509e-06, "loss": 0.8133, "step": 18420 }, { "epoch": 0.22454998598466844, "grad_norm": 1.7363035678863525, "learning_rate": 4.0813983322642725e-06, "loss": 0.9101, "step": 18425 }, { "epoch": 0.22461092220881626, "grad_norm": 2.0817058086395264, "learning_rate": 4.0810776138550355e-06, "loss": 0.8289, "step": 18430 }, { "epoch": 0.22467185843296406, "grad_norm": 1.9503456354141235, "learning_rate": 4.0807568954457985e-06, "loss": 0.9415, "step": 18435 }, { "epoch": 0.22473279465711188, "grad_norm": 2.0543816089630127, "learning_rate": 4.080436177036562e-06, "loss": 0.8897, "step": 18440 }, { "epoch": 0.22479373088125967, "grad_norm": 2.1593472957611084, "learning_rate": 4.080115458627325e-06, "loss": 0.9247, "step": 18445 }, { "epoch": 0.22485466710540747, "grad_norm": 1.6522455215454102, "learning_rate": 4.079794740218088e-06, "loss": 0.917, "step": 18450 }, { "epoch": 0.2249156033295553, "grad_norm": 1.9598925113677979, "learning_rate": 4.079474021808852e-06, "loss": 0.9081, "step": 18455 }, { "epoch": 0.2249765395537031, "grad_norm": 1.816156268119812, "learning_rate": 4.079153303399615e-06, "loss": 0.869, "step": 18460 }, { "epoch": 0.2250374757778509, "grad_norm": 2.3678317070007324, "learning_rate": 4.078832584990378e-06, "loss": 0.8641, "step": 18465 }, { "epoch": 0.2250984120019987, "grad_norm": 1.9792490005493164, "learning_rate": 4.078511866581142e-06, "loss": 0.8743, "step": 18470 }, { "epoch": 0.22515934822614653, "grad_norm": 1.825754165649414, "learning_rate": 4.078191148171905e-06, "loss": 0.8442, "step": 18475 }, { "epoch": 0.22522028445029432, "grad_norm": 1.7864313125610352, "learning_rate": 4.077870429762669e-06, "loss": 0.8843, "step": 18480 }, { "epoch": 0.22528122067444212, "grad_norm": 2.3652265071868896, "learning_rate": 4.077549711353432e-06, "loss": 0.9203, "step": 18485 }, { "epoch": 0.22534215689858994, "grad_norm": 1.8567465543746948, "learning_rate": 4.077228992944195e-06, "loss": 0.8868, "step": 18490 }, { "epoch": 0.22540309312273774, "grad_norm": 2.314399003982544, "learning_rate": 4.076908274534959e-06, "loss": 0.8353, "step": 18495 }, { "epoch": 0.22546402934688556, "grad_norm": 1.6915572881698608, "learning_rate": 4.076587556125722e-06, "loss": 0.7726, "step": 18500 }, { "epoch": 0.22552496557103335, "grad_norm": 2.5605907440185547, "learning_rate": 4.076266837716486e-06, "loss": 0.8318, "step": 18505 }, { "epoch": 0.22558590179518118, "grad_norm": 2.0045876502990723, "learning_rate": 4.075946119307249e-06, "loss": 0.8944, "step": 18510 }, { "epoch": 0.22564683801932897, "grad_norm": 2.5041942596435547, "learning_rate": 4.075625400898012e-06, "loss": 0.9003, "step": 18515 }, { "epoch": 0.22570777424347677, "grad_norm": 1.8751829862594604, "learning_rate": 4.075304682488776e-06, "loss": 0.8913, "step": 18520 }, { "epoch": 0.2257687104676246, "grad_norm": 2.3126230239868164, "learning_rate": 4.074983964079539e-06, "loss": 0.8387, "step": 18525 }, { "epoch": 0.22582964669177238, "grad_norm": 2.2267889976501465, "learning_rate": 4.074663245670302e-06, "loss": 0.8745, "step": 18530 }, { "epoch": 0.2258905829159202, "grad_norm": 1.8829874992370605, "learning_rate": 4.074342527261066e-06, "loss": 0.9007, "step": 18535 }, { "epoch": 0.225951519140068, "grad_norm": 2.3226494789123535, "learning_rate": 4.074021808851829e-06, "loss": 0.856, "step": 18540 }, { "epoch": 0.22601245536421583, "grad_norm": 1.8850692510604858, "learning_rate": 4.073701090442592e-06, "loss": 0.8676, "step": 18545 }, { "epoch": 0.22607339158836362, "grad_norm": 2.3104631900787354, "learning_rate": 4.073380372033355e-06, "loss": 0.9179, "step": 18550 }, { "epoch": 0.22613432781251142, "grad_norm": 2.0387580394744873, "learning_rate": 4.0730596536241185e-06, "loss": 0.9111, "step": 18555 }, { "epoch": 0.22619526403665924, "grad_norm": 1.9442495107650757, "learning_rate": 4.0727389352148815e-06, "loss": 0.9612, "step": 18560 }, { "epoch": 0.22625620026080703, "grad_norm": 2.2045578956604004, "learning_rate": 4.0724182168056446e-06, "loss": 0.8734, "step": 18565 }, { "epoch": 0.22631713648495486, "grad_norm": 1.7898119688034058, "learning_rate": 4.0720974983964084e-06, "loss": 0.7658, "step": 18570 }, { "epoch": 0.22637807270910265, "grad_norm": 1.7798659801483154, "learning_rate": 4.0717767799871714e-06, "loss": 0.8507, "step": 18575 }, { "epoch": 0.22643900893325045, "grad_norm": 1.9451755285263062, "learning_rate": 4.0714560615779345e-06, "loss": 0.8198, "step": 18580 }, { "epoch": 0.22649994515739827, "grad_norm": 2.2020411491394043, "learning_rate": 4.071135343168698e-06, "loss": 0.8541, "step": 18585 }, { "epoch": 0.22656088138154606, "grad_norm": 2.1313562393188477, "learning_rate": 4.070814624759461e-06, "loss": 0.9071, "step": 18590 }, { "epoch": 0.2266218176056939, "grad_norm": 2.1227848529815674, "learning_rate": 4.070493906350224e-06, "loss": 0.9683, "step": 18595 }, { "epoch": 0.22668275382984168, "grad_norm": 2.0284276008605957, "learning_rate": 4.070173187940988e-06, "loss": 0.9085, "step": 18600 }, { "epoch": 0.2267436900539895, "grad_norm": 2.2955024242401123, "learning_rate": 4.069852469531751e-06, "loss": 0.8851, "step": 18605 }, { "epoch": 0.2268046262781373, "grad_norm": 2.0142884254455566, "learning_rate": 4.069531751122514e-06, "loss": 0.8206, "step": 18610 }, { "epoch": 0.2268655625022851, "grad_norm": 2.08488130569458, "learning_rate": 4.069211032713278e-06, "loss": 0.8277, "step": 18615 }, { "epoch": 0.22692649872643292, "grad_norm": 2.063392400741577, "learning_rate": 4.068890314304041e-06, "loss": 0.9601, "step": 18620 }, { "epoch": 0.2269874349505807, "grad_norm": 1.8094795942306519, "learning_rate": 4.068569595894805e-06, "loss": 0.9115, "step": 18625 }, { "epoch": 0.22704837117472854, "grad_norm": 2.2189977169036865, "learning_rate": 4.068248877485568e-06, "loss": 0.8547, "step": 18630 }, { "epoch": 0.22710930739887633, "grad_norm": 2.1369683742523193, "learning_rate": 4.067928159076331e-06, "loss": 0.8688, "step": 18635 }, { "epoch": 0.22717024362302415, "grad_norm": 1.782845139503479, "learning_rate": 4.067607440667095e-06, "loss": 0.8939, "step": 18640 }, { "epoch": 0.22723117984717195, "grad_norm": 1.9179855585098267, "learning_rate": 4.067286722257858e-06, "loss": 0.8975, "step": 18645 }, { "epoch": 0.22729211607131974, "grad_norm": 2.0977866649627686, "learning_rate": 4.066966003848622e-06, "loss": 0.8138, "step": 18650 }, { "epoch": 0.22735305229546757, "grad_norm": 2.358106851577759, "learning_rate": 4.066645285439385e-06, "loss": 0.8178, "step": 18655 }, { "epoch": 0.22741398851961536, "grad_norm": 1.906372308731079, "learning_rate": 4.066324567030148e-06, "loss": 0.8993, "step": 18660 }, { "epoch": 0.22747492474376318, "grad_norm": 1.988754391670227, "learning_rate": 4.066003848620912e-06, "loss": 0.8823, "step": 18665 }, { "epoch": 0.22753586096791098, "grad_norm": 1.6789741516113281, "learning_rate": 4.065683130211675e-06, "loss": 0.8973, "step": 18670 }, { "epoch": 0.2275967971920588, "grad_norm": 1.8093193769454956, "learning_rate": 4.065362411802438e-06, "loss": 0.8892, "step": 18675 }, { "epoch": 0.2276577334162066, "grad_norm": 2.135869264602661, "learning_rate": 4.0650416933932016e-06, "loss": 0.9302, "step": 18680 }, { "epoch": 0.2277186696403544, "grad_norm": 1.8576438426971436, "learning_rate": 4.064720974983965e-06, "loss": 0.8149, "step": 18685 }, { "epoch": 0.22777960586450222, "grad_norm": 1.80885648727417, "learning_rate": 4.064400256574728e-06, "loss": 0.8395, "step": 18690 }, { "epoch": 0.22784054208865, "grad_norm": 2.0269346237182617, "learning_rate": 4.0640795381654915e-06, "loss": 0.8384, "step": 18695 }, { "epoch": 0.22790147831279783, "grad_norm": 1.7699217796325684, "learning_rate": 4.0637588197562545e-06, "loss": 0.841, "step": 18700 }, { "epoch": 0.22796241453694563, "grad_norm": 1.7684005498886108, "learning_rate": 4.0634381013470175e-06, "loss": 0.9369, "step": 18705 }, { "epoch": 0.22802335076109345, "grad_norm": 1.8490349054336548, "learning_rate": 4.0631173829377805e-06, "loss": 0.9672, "step": 18710 }, { "epoch": 0.22808428698524125, "grad_norm": 1.902923583984375, "learning_rate": 4.062796664528544e-06, "loss": 0.8688, "step": 18715 }, { "epoch": 0.22814522320938904, "grad_norm": 1.9959057569503784, "learning_rate": 4.062475946119307e-06, "loss": 0.892, "step": 18720 }, { "epoch": 0.22820615943353686, "grad_norm": 1.6424075365066528, "learning_rate": 4.06215522771007e-06, "loss": 0.8485, "step": 18725 }, { "epoch": 0.22826709565768466, "grad_norm": 1.9835093021392822, "learning_rate": 4.061834509300834e-06, "loss": 0.9359, "step": 18730 }, { "epoch": 0.22832803188183248, "grad_norm": 1.8274366855621338, "learning_rate": 4.061513790891597e-06, "loss": 0.8298, "step": 18735 }, { "epoch": 0.22838896810598028, "grad_norm": 2.2263247966766357, "learning_rate": 4.06119307248236e-06, "loss": 0.9324, "step": 18740 }, { "epoch": 0.2284499043301281, "grad_norm": 2.0848135948181152, "learning_rate": 4.060872354073124e-06, "loss": 0.8488, "step": 18745 }, { "epoch": 0.2285108405542759, "grad_norm": 1.826112151145935, "learning_rate": 4.060551635663887e-06, "loss": 0.9209, "step": 18750 }, { "epoch": 0.2285717767784237, "grad_norm": 2.085092067718506, "learning_rate": 4.06023091725465e-06, "loss": 0.8698, "step": 18755 }, { "epoch": 0.2286327130025715, "grad_norm": 3.089172601699829, "learning_rate": 4.059910198845414e-06, "loss": 0.9481, "step": 18760 }, { "epoch": 0.2286936492267193, "grad_norm": 1.7150256633758545, "learning_rate": 4.059589480436177e-06, "loss": 0.8991, "step": 18765 }, { "epoch": 0.22875458545086713, "grad_norm": 1.8193168640136719, "learning_rate": 4.059268762026941e-06, "loss": 0.825, "step": 18770 }, { "epoch": 0.22881552167501493, "grad_norm": 2.931201934814453, "learning_rate": 4.058948043617704e-06, "loss": 0.892, "step": 18775 }, { "epoch": 0.22887645789916275, "grad_norm": 1.8065613508224487, "learning_rate": 4.058627325208467e-06, "loss": 0.8308, "step": 18780 }, { "epoch": 0.22893739412331054, "grad_norm": 1.9462066888809204, "learning_rate": 4.058306606799231e-06, "loss": 0.8958, "step": 18785 }, { "epoch": 0.22899833034745834, "grad_norm": 2.2274255752563477, "learning_rate": 4.057985888389994e-06, "loss": 0.9046, "step": 18790 }, { "epoch": 0.22905926657160616, "grad_norm": 1.8896260261535645, "learning_rate": 4.057665169980758e-06, "loss": 0.8855, "step": 18795 }, { "epoch": 0.22912020279575396, "grad_norm": 2.0042734146118164, "learning_rate": 4.057344451571521e-06, "loss": 0.8992, "step": 18800 }, { "epoch": 0.22918113901990178, "grad_norm": 1.7484396696090698, "learning_rate": 4.057023733162284e-06, "loss": 0.834, "step": 18805 }, { "epoch": 0.22924207524404958, "grad_norm": 2.1819825172424316, "learning_rate": 4.056703014753048e-06, "loss": 0.9025, "step": 18810 }, { "epoch": 0.22930301146819737, "grad_norm": 2.1243326663970947, "learning_rate": 4.056382296343811e-06, "loss": 0.9416, "step": 18815 }, { "epoch": 0.2293639476923452, "grad_norm": 2.0837624073028564, "learning_rate": 4.056061577934574e-06, "loss": 0.9133, "step": 18820 }, { "epoch": 0.229424883916493, "grad_norm": 1.9610379934310913, "learning_rate": 4.0557408595253375e-06, "loss": 0.9031, "step": 18825 }, { "epoch": 0.2294858201406408, "grad_norm": 1.8000502586364746, "learning_rate": 4.0554201411161005e-06, "loss": 0.8944, "step": 18830 }, { "epoch": 0.2295467563647886, "grad_norm": 1.9317063093185425, "learning_rate": 4.0550994227068636e-06, "loss": 0.8473, "step": 18835 }, { "epoch": 0.22960769258893643, "grad_norm": 1.8544806241989136, "learning_rate": 4.0547787042976274e-06, "loss": 0.8441, "step": 18840 }, { "epoch": 0.22966862881308422, "grad_norm": 2.062992572784424, "learning_rate": 4.0544579858883904e-06, "loss": 0.9191, "step": 18845 }, { "epoch": 0.22972956503723202, "grad_norm": 1.813502550125122, "learning_rate": 4.0541372674791535e-06, "loss": 0.874, "step": 18850 }, { "epoch": 0.22979050126137984, "grad_norm": 2.020822525024414, "learning_rate": 4.053816549069917e-06, "loss": 0.8829, "step": 18855 }, { "epoch": 0.22985143748552764, "grad_norm": 2.1997385025024414, "learning_rate": 4.05349583066068e-06, "loss": 0.9506, "step": 18860 }, { "epoch": 0.22991237370967546, "grad_norm": 1.7038267850875854, "learning_rate": 4.053175112251443e-06, "loss": 0.9042, "step": 18865 }, { "epoch": 0.22997330993382326, "grad_norm": 1.7566843032836914, "learning_rate": 4.052854393842207e-06, "loss": 0.7879, "step": 18870 }, { "epoch": 0.23003424615797108, "grad_norm": 2.013050079345703, "learning_rate": 4.05253367543297e-06, "loss": 0.8802, "step": 18875 }, { "epoch": 0.23009518238211887, "grad_norm": 2.4459240436553955, "learning_rate": 4.052212957023733e-06, "loss": 0.8572, "step": 18880 }, { "epoch": 0.23015611860626667, "grad_norm": 2.0020289421081543, "learning_rate": 4.051892238614496e-06, "loss": 0.8558, "step": 18885 }, { "epoch": 0.2302170548304145, "grad_norm": 2.543050765991211, "learning_rate": 4.05157152020526e-06, "loss": 0.8561, "step": 18890 }, { "epoch": 0.2302779910545623, "grad_norm": 2.1493020057678223, "learning_rate": 4.051250801796023e-06, "loss": 0.8502, "step": 18895 }, { "epoch": 0.2303389272787101, "grad_norm": 1.7473671436309814, "learning_rate": 4.050930083386786e-06, "loss": 0.8788, "step": 18900 }, { "epoch": 0.2303998635028579, "grad_norm": 2.9007344245910645, "learning_rate": 4.05060936497755e-06, "loss": 0.8946, "step": 18905 }, { "epoch": 0.23046079972700573, "grad_norm": 2.5781397819519043, "learning_rate": 4.050288646568313e-06, "loss": 0.8895, "step": 18910 }, { "epoch": 0.23052173595115352, "grad_norm": 1.9815616607666016, "learning_rate": 4.049967928159076e-06, "loss": 0.8958, "step": 18915 }, { "epoch": 0.23058267217530132, "grad_norm": 2.1954421997070312, "learning_rate": 4.04964720974984e-06, "loss": 0.9427, "step": 18920 }, { "epoch": 0.23064360839944914, "grad_norm": 2.0206873416900635, "learning_rate": 4.049326491340603e-06, "loss": 0.8391, "step": 18925 }, { "epoch": 0.23070454462359694, "grad_norm": 2.61572003364563, "learning_rate": 4.049005772931367e-06, "loss": 0.898, "step": 18930 }, { "epoch": 0.23076548084774476, "grad_norm": 1.6862818002700806, "learning_rate": 4.04868505452213e-06, "loss": 0.896, "step": 18935 }, { "epoch": 0.23082641707189255, "grad_norm": 2.3050875663757324, "learning_rate": 4.048364336112893e-06, "loss": 0.8795, "step": 18940 }, { "epoch": 0.23088735329604038, "grad_norm": 1.9299064874649048, "learning_rate": 4.048043617703657e-06, "loss": 0.8776, "step": 18945 }, { "epoch": 0.23094828952018817, "grad_norm": 1.6675465106964111, "learning_rate": 4.04772289929442e-06, "loss": 0.8833, "step": 18950 }, { "epoch": 0.23100922574433597, "grad_norm": 1.8693588972091675, "learning_rate": 4.047402180885184e-06, "loss": 0.8212, "step": 18955 }, { "epoch": 0.2310701619684838, "grad_norm": 2.2904629707336426, "learning_rate": 4.047081462475947e-06, "loss": 0.8875, "step": 18960 }, { "epoch": 0.23113109819263158, "grad_norm": 1.7707929611206055, "learning_rate": 4.04676074406671e-06, "loss": 0.9845, "step": 18965 }, { "epoch": 0.2311920344167794, "grad_norm": 2.10990834236145, "learning_rate": 4.0464400256574735e-06, "loss": 0.8262, "step": 18970 }, { "epoch": 0.2312529706409272, "grad_norm": 1.862497329711914, "learning_rate": 4.0461193072482365e-06, "loss": 0.8725, "step": 18975 }, { "epoch": 0.23131390686507503, "grad_norm": 2.0436222553253174, "learning_rate": 4.0457985888389995e-06, "loss": 0.8781, "step": 18980 }, { "epoch": 0.23137484308922282, "grad_norm": 2.290731191635132, "learning_rate": 4.045477870429763e-06, "loss": 0.8487, "step": 18985 }, { "epoch": 0.23143577931337062, "grad_norm": 1.7921335697174072, "learning_rate": 4.045157152020526e-06, "loss": 0.8741, "step": 18990 }, { "epoch": 0.23149671553751844, "grad_norm": 2.1930716037750244, "learning_rate": 4.044836433611289e-06, "loss": 0.7647, "step": 18995 }, { "epoch": 0.23155765176166623, "grad_norm": 2.106440782546997, "learning_rate": 4.044515715202053e-06, "loss": 0.8568, "step": 19000 }, { "epoch": 0.23161858798581406, "grad_norm": 2.200526475906372, "learning_rate": 4.044194996792816e-06, "loss": 0.8258, "step": 19005 }, { "epoch": 0.23167952420996185, "grad_norm": 1.6792786121368408, "learning_rate": 4.043874278383579e-06, "loss": 0.9148, "step": 19010 }, { "epoch": 0.23174046043410967, "grad_norm": 1.8476219177246094, "learning_rate": 4.043553559974343e-06, "loss": 0.9357, "step": 19015 }, { "epoch": 0.23180139665825747, "grad_norm": 1.985257625579834, "learning_rate": 4.043232841565106e-06, "loss": 0.9065, "step": 19020 }, { "epoch": 0.23186233288240526, "grad_norm": 1.9394415616989136, "learning_rate": 4.042912123155869e-06, "loss": 0.8429, "step": 19025 }, { "epoch": 0.2319232691065531, "grad_norm": 1.8831167221069336, "learning_rate": 4.042591404746633e-06, "loss": 0.8922, "step": 19030 }, { "epoch": 0.23198420533070088, "grad_norm": 1.829221248626709, "learning_rate": 4.042270686337396e-06, "loss": 0.9389, "step": 19035 }, { "epoch": 0.2320451415548487, "grad_norm": 2.003382682800293, "learning_rate": 4.041949967928159e-06, "loss": 0.9135, "step": 19040 }, { "epoch": 0.2321060777789965, "grad_norm": 2.2165162563323975, "learning_rate": 4.041629249518923e-06, "loss": 0.9339, "step": 19045 }, { "epoch": 0.23216701400314432, "grad_norm": 2.077637195587158, "learning_rate": 4.041308531109686e-06, "loss": 0.8991, "step": 19050 }, { "epoch": 0.23222795022729212, "grad_norm": 1.986889123916626, "learning_rate": 4.040987812700449e-06, "loss": 0.8375, "step": 19055 }, { "epoch": 0.2322888864514399, "grad_norm": 2.1489853858947754, "learning_rate": 4.040667094291212e-06, "loss": 0.8869, "step": 19060 }, { "epoch": 0.23234982267558774, "grad_norm": 1.7901289463043213, "learning_rate": 4.040346375881976e-06, "loss": 0.8885, "step": 19065 }, { "epoch": 0.23241075889973553, "grad_norm": 1.9034841060638428, "learning_rate": 4.040025657472739e-06, "loss": 0.9246, "step": 19070 }, { "epoch": 0.23247169512388335, "grad_norm": 2.066168785095215, "learning_rate": 4.039704939063503e-06, "loss": 0.8952, "step": 19075 }, { "epoch": 0.23253263134803115, "grad_norm": 2.4637813568115234, "learning_rate": 4.039384220654266e-06, "loss": 0.8981, "step": 19080 }, { "epoch": 0.23259356757217894, "grad_norm": 2.1242446899414062, "learning_rate": 4.039063502245029e-06, "loss": 0.8988, "step": 19085 }, { "epoch": 0.23265450379632677, "grad_norm": 1.852339506149292, "learning_rate": 4.038742783835793e-06, "loss": 0.8754, "step": 19090 }, { "epoch": 0.23271544002047456, "grad_norm": 1.8886655569076538, "learning_rate": 4.038422065426556e-06, "loss": 0.8881, "step": 19095 }, { "epoch": 0.23277637624462238, "grad_norm": 2.200305700302124, "learning_rate": 4.0381013470173195e-06, "loss": 0.9248, "step": 19100 }, { "epoch": 0.23283731246877018, "grad_norm": 1.8842766284942627, "learning_rate": 4.0377806286080826e-06, "loss": 0.8403, "step": 19105 }, { "epoch": 0.232898248692918, "grad_norm": 2.144730806350708, "learning_rate": 4.037459910198846e-06, "loss": 0.8677, "step": 19110 }, { "epoch": 0.2329591849170658, "grad_norm": 2.100003242492676, "learning_rate": 4.0371391917896094e-06, "loss": 0.8779, "step": 19115 }, { "epoch": 0.2330201211412136, "grad_norm": 1.6623374223709106, "learning_rate": 4.0368184733803725e-06, "loss": 0.9425, "step": 19120 }, { "epoch": 0.23308105736536142, "grad_norm": 1.9977933168411255, "learning_rate": 4.036497754971136e-06, "loss": 0.8547, "step": 19125 }, { "epoch": 0.2331419935895092, "grad_norm": 1.913067102432251, "learning_rate": 4.036177036561899e-06, "loss": 0.8752, "step": 19130 }, { "epoch": 0.23320292981365703, "grad_norm": 1.9767109155654907, "learning_rate": 4.035856318152662e-06, "loss": 0.8703, "step": 19135 }, { "epoch": 0.23326386603780483, "grad_norm": 2.0110366344451904, "learning_rate": 4.035535599743425e-06, "loss": 0.8887, "step": 19140 }, { "epoch": 0.23332480226195265, "grad_norm": 2.059478759765625, "learning_rate": 4.035214881334189e-06, "loss": 0.8958, "step": 19145 }, { "epoch": 0.23338573848610045, "grad_norm": 1.9879804849624634, "learning_rate": 4.034894162924952e-06, "loss": 0.9545, "step": 19150 }, { "epoch": 0.23344667471024824, "grad_norm": 2.3867151737213135, "learning_rate": 4.034573444515715e-06, "loss": 0.9308, "step": 19155 }, { "epoch": 0.23350761093439606, "grad_norm": 2.043147325515747, "learning_rate": 4.034252726106479e-06, "loss": 0.8365, "step": 19160 }, { "epoch": 0.23356854715854386, "grad_norm": 2.1009654998779297, "learning_rate": 4.033932007697242e-06, "loss": 0.7881, "step": 19165 }, { "epoch": 0.23362948338269168, "grad_norm": 2.114675998687744, "learning_rate": 4.033611289288005e-06, "loss": 0.8563, "step": 19170 }, { "epoch": 0.23369041960683948, "grad_norm": 2.0699009895324707, "learning_rate": 4.033290570878769e-06, "loss": 0.83, "step": 19175 }, { "epoch": 0.2337513558309873, "grad_norm": 1.8192275762557983, "learning_rate": 4.032969852469532e-06, "loss": 0.8846, "step": 19180 }, { "epoch": 0.2338122920551351, "grad_norm": 2.1480891704559326, "learning_rate": 4.032649134060295e-06, "loss": 0.8367, "step": 19185 }, { "epoch": 0.2338732282792829, "grad_norm": 1.9304670095443726, "learning_rate": 4.032328415651059e-06, "loss": 0.8548, "step": 19190 }, { "epoch": 0.2339341645034307, "grad_norm": 2.0386898517608643, "learning_rate": 4.032007697241822e-06, "loss": 0.8805, "step": 19195 }, { "epoch": 0.2339951007275785, "grad_norm": 2.0920305252075195, "learning_rate": 4.031686978832585e-06, "loss": 0.9302, "step": 19200 }, { "epoch": 0.23405603695172633, "grad_norm": 1.973512887954712, "learning_rate": 4.031366260423349e-06, "loss": 0.8426, "step": 19205 }, { "epoch": 0.23411697317587413, "grad_norm": 2.167384386062622, "learning_rate": 4.031045542014112e-06, "loss": 0.9247, "step": 19210 }, { "epoch": 0.23417790940002195, "grad_norm": 2.578805923461914, "learning_rate": 4.030724823604875e-06, "loss": 0.9656, "step": 19215 }, { "epoch": 0.23423884562416974, "grad_norm": 2.080512523651123, "learning_rate": 4.030404105195638e-06, "loss": 0.8633, "step": 19220 }, { "epoch": 0.23429978184831754, "grad_norm": 1.8811191320419312, "learning_rate": 4.030083386786402e-06, "loss": 0.8075, "step": 19225 }, { "epoch": 0.23436071807246536, "grad_norm": 1.7614099979400635, "learning_rate": 4.029762668377165e-06, "loss": 0.946, "step": 19230 }, { "epoch": 0.23442165429661316, "grad_norm": 1.743265151977539, "learning_rate": 4.029441949967929e-06, "loss": 0.8878, "step": 19235 }, { "epoch": 0.23448259052076098, "grad_norm": 1.9967552423477173, "learning_rate": 4.029121231558692e-06, "loss": 0.8893, "step": 19240 }, { "epoch": 0.23454352674490878, "grad_norm": 2.0141263008117676, "learning_rate": 4.0288005131494555e-06, "loss": 0.9209, "step": 19245 }, { "epoch": 0.2346044629690566, "grad_norm": 2.2635092735290527, "learning_rate": 4.0284797947402185e-06, "loss": 0.8131, "step": 19250 }, { "epoch": 0.2346653991932044, "grad_norm": 1.730208396911621, "learning_rate": 4.0281590763309815e-06, "loss": 0.8884, "step": 19255 }, { "epoch": 0.2347263354173522, "grad_norm": 1.9363346099853516, "learning_rate": 4.027838357921745e-06, "loss": 0.8987, "step": 19260 }, { "epoch": 0.2347872716415, "grad_norm": 1.9152450561523438, "learning_rate": 4.027517639512508e-06, "loss": 0.9163, "step": 19265 }, { "epoch": 0.2348482078656478, "grad_norm": 2.0238704681396484, "learning_rate": 4.027196921103272e-06, "loss": 0.918, "step": 19270 }, { "epoch": 0.23490914408979563, "grad_norm": 1.9846101999282837, "learning_rate": 4.026876202694035e-06, "loss": 0.8737, "step": 19275 }, { "epoch": 0.23497008031394342, "grad_norm": 1.9023425579071045, "learning_rate": 4.026555484284798e-06, "loss": 0.9154, "step": 19280 }, { "epoch": 0.23503101653809125, "grad_norm": 1.9178861379623413, "learning_rate": 4.026234765875562e-06, "loss": 0.9091, "step": 19285 }, { "epoch": 0.23509195276223904, "grad_norm": 2.0213935375213623, "learning_rate": 4.025914047466325e-06, "loss": 0.8769, "step": 19290 }, { "epoch": 0.23515288898638684, "grad_norm": 2.1544463634490967, "learning_rate": 4.025593329057088e-06, "loss": 0.9105, "step": 19295 }, { "epoch": 0.23521382521053466, "grad_norm": 4.049418926239014, "learning_rate": 4.025272610647851e-06, "loss": 0.9208, "step": 19300 }, { "epoch": 0.23527476143468246, "grad_norm": 1.9825299978256226, "learning_rate": 4.024951892238615e-06, "loss": 0.8001, "step": 19305 }, { "epoch": 0.23533569765883028, "grad_norm": 1.8540376424789429, "learning_rate": 4.024631173829378e-06, "loss": 0.842, "step": 19310 }, { "epoch": 0.23539663388297807, "grad_norm": 1.7594516277313232, "learning_rate": 4.024310455420141e-06, "loss": 0.8028, "step": 19315 }, { "epoch": 0.23545757010712587, "grad_norm": 2.068488359451294, "learning_rate": 4.023989737010905e-06, "loss": 0.8909, "step": 19320 }, { "epoch": 0.2355185063312737, "grad_norm": 1.8367512226104736, "learning_rate": 4.023669018601668e-06, "loss": 0.8616, "step": 19325 }, { "epoch": 0.2355794425554215, "grad_norm": 1.9074393510818481, "learning_rate": 4.023348300192431e-06, "loss": 0.9034, "step": 19330 }, { "epoch": 0.2356403787795693, "grad_norm": 1.8943524360656738, "learning_rate": 4.023027581783195e-06, "loss": 0.9423, "step": 19335 }, { "epoch": 0.2357013150037171, "grad_norm": 2.1920840740203857, "learning_rate": 4.022706863373958e-06, "loss": 0.8526, "step": 19340 }, { "epoch": 0.23576225122786493, "grad_norm": 2.074545383453369, "learning_rate": 4.022386144964721e-06, "loss": 0.8733, "step": 19345 }, { "epoch": 0.23582318745201272, "grad_norm": 1.980629324913025, "learning_rate": 4.022065426555485e-06, "loss": 0.8943, "step": 19350 }, { "epoch": 0.23588412367616052, "grad_norm": 1.7809118032455444, "learning_rate": 4.021744708146248e-06, "loss": 0.8291, "step": 19355 }, { "epoch": 0.23594505990030834, "grad_norm": 1.8069618940353394, "learning_rate": 4.021423989737011e-06, "loss": 0.8957, "step": 19360 }, { "epoch": 0.23600599612445614, "grad_norm": 1.675106167793274, "learning_rate": 4.021103271327775e-06, "loss": 0.8432, "step": 19365 }, { "epoch": 0.23606693234860396, "grad_norm": 2.264627695083618, "learning_rate": 4.020782552918538e-06, "loss": 0.9192, "step": 19370 }, { "epoch": 0.23612786857275175, "grad_norm": 2.0999538898468018, "learning_rate": 4.020461834509301e-06, "loss": 0.9539, "step": 19375 }, { "epoch": 0.23618880479689958, "grad_norm": 2.0917320251464844, "learning_rate": 4.020141116100065e-06, "loss": 0.8394, "step": 19380 }, { "epoch": 0.23624974102104737, "grad_norm": 1.7635769844055176, "learning_rate": 4.019820397690828e-06, "loss": 0.8739, "step": 19385 }, { "epoch": 0.23631067724519517, "grad_norm": 1.7041583061218262, "learning_rate": 4.019499679281591e-06, "loss": 0.8738, "step": 19390 }, { "epoch": 0.236371613469343, "grad_norm": 2.1134426593780518, "learning_rate": 4.0191789608723545e-06, "loss": 0.8876, "step": 19395 }, { "epoch": 0.23643254969349078, "grad_norm": 1.805847406387329, "learning_rate": 4.0188582424631175e-06, "loss": 0.9219, "step": 19400 }, { "epoch": 0.2364934859176386, "grad_norm": 2.3414418697357178, "learning_rate": 4.018537524053881e-06, "loss": 0.8534, "step": 19405 }, { "epoch": 0.2365544221417864, "grad_norm": 2.0079309940338135, "learning_rate": 4.018216805644644e-06, "loss": 0.8446, "step": 19410 }, { "epoch": 0.23661535836593423, "grad_norm": 1.9556564092636108, "learning_rate": 4.017896087235407e-06, "loss": 0.9075, "step": 19415 }, { "epoch": 0.23667629459008202, "grad_norm": 2.002699375152588, "learning_rate": 4.017575368826171e-06, "loss": 0.8478, "step": 19420 }, { "epoch": 0.23673723081422982, "grad_norm": 1.893278956413269, "learning_rate": 4.017254650416934e-06, "loss": 0.8566, "step": 19425 }, { "epoch": 0.23679816703837764, "grad_norm": 1.9062575101852417, "learning_rate": 4.016933932007698e-06, "loss": 0.8712, "step": 19430 }, { "epoch": 0.23685910326252543, "grad_norm": 1.9474724531173706, "learning_rate": 4.016613213598461e-06, "loss": 0.8396, "step": 19435 }, { "epoch": 0.23692003948667326, "grad_norm": 1.9483920335769653, "learning_rate": 4.016292495189224e-06, "loss": 0.8674, "step": 19440 }, { "epoch": 0.23698097571082105, "grad_norm": 1.6989622116088867, "learning_rate": 4.015971776779988e-06, "loss": 0.8392, "step": 19445 }, { "epoch": 0.23704191193496887, "grad_norm": 1.9777933359146118, "learning_rate": 4.015651058370751e-06, "loss": 0.8921, "step": 19450 }, { "epoch": 0.23710284815911667, "grad_norm": 1.9254441261291504, "learning_rate": 4.015330339961514e-06, "loss": 0.9778, "step": 19455 }, { "epoch": 0.23716378438326446, "grad_norm": 1.6993097066879272, "learning_rate": 4.015009621552278e-06, "loss": 0.8817, "step": 19460 }, { "epoch": 0.2372247206074123, "grad_norm": 1.9372388124465942, "learning_rate": 4.014688903143041e-06, "loss": 0.8897, "step": 19465 }, { "epoch": 0.23728565683156008, "grad_norm": 2.1233344078063965, "learning_rate": 4.014368184733804e-06, "loss": 0.8636, "step": 19470 }, { "epoch": 0.2373465930557079, "grad_norm": 1.7518115043640137, "learning_rate": 4.014047466324567e-06, "loss": 0.8888, "step": 19475 }, { "epoch": 0.2374075292798557, "grad_norm": 2.0013086795806885, "learning_rate": 4.013726747915331e-06, "loss": 0.9021, "step": 19480 }, { "epoch": 0.23746846550400352, "grad_norm": 1.9258863925933838, "learning_rate": 4.013406029506094e-06, "loss": 0.9057, "step": 19485 }, { "epoch": 0.23752940172815132, "grad_norm": 2.2018041610717773, "learning_rate": 4.013085311096857e-06, "loss": 0.932, "step": 19490 }, { "epoch": 0.2375903379522991, "grad_norm": 2.309478998184204, "learning_rate": 4.012764592687621e-06, "loss": 0.885, "step": 19495 }, { "epoch": 0.23765127417644694, "grad_norm": 2.59360933303833, "learning_rate": 4.012443874278384e-06, "loss": 0.9626, "step": 19500 }, { "epoch": 0.23771221040059473, "grad_norm": 1.9368363618850708, "learning_rate": 4.012123155869147e-06, "loss": 0.8423, "step": 19505 }, { "epoch": 0.23777314662474255, "grad_norm": 2.1206226348876953, "learning_rate": 4.011802437459911e-06, "loss": 0.8602, "step": 19510 }, { "epoch": 0.23783408284889035, "grad_norm": 1.8798877000808716, "learning_rate": 4.011481719050674e-06, "loss": 0.8928, "step": 19515 }, { "epoch": 0.23789501907303817, "grad_norm": 1.9864128828048706, "learning_rate": 4.011161000641437e-06, "loss": 0.8907, "step": 19520 }, { "epoch": 0.23795595529718597, "grad_norm": 2.00439190864563, "learning_rate": 4.0108402822322005e-06, "loss": 0.8418, "step": 19525 }, { "epoch": 0.23801689152133376, "grad_norm": 2.281449556350708, "learning_rate": 4.0105195638229636e-06, "loss": 0.8244, "step": 19530 }, { "epoch": 0.23807782774548158, "grad_norm": 2.044358491897583, "learning_rate": 4.0101988454137266e-06, "loss": 0.8859, "step": 19535 }, { "epoch": 0.23813876396962938, "grad_norm": 2.1041839122772217, "learning_rate": 4.0098781270044904e-06, "loss": 0.8949, "step": 19540 }, { "epoch": 0.2381997001937772, "grad_norm": 2.0951030254364014, "learning_rate": 4.0095574085952534e-06, "loss": 0.852, "step": 19545 }, { "epoch": 0.238260636417925, "grad_norm": 1.8152157068252563, "learning_rate": 4.009236690186017e-06, "loss": 0.8786, "step": 19550 }, { "epoch": 0.2383215726420728, "grad_norm": 2.643256902694702, "learning_rate": 4.00891597177678e-06, "loss": 0.9262, "step": 19555 }, { "epoch": 0.23838250886622062, "grad_norm": 1.947596788406372, "learning_rate": 4.008595253367543e-06, "loss": 0.8982, "step": 19560 }, { "epoch": 0.2384434450903684, "grad_norm": 1.8872803449630737, "learning_rate": 4.008274534958307e-06, "loss": 0.8384, "step": 19565 }, { "epoch": 0.23850438131451623, "grad_norm": 2.297006130218506, "learning_rate": 4.00795381654907e-06, "loss": 0.8774, "step": 19570 }, { "epoch": 0.23856531753866403, "grad_norm": 1.7532614469528198, "learning_rate": 4.007633098139834e-06, "loss": 0.9205, "step": 19575 }, { "epoch": 0.23862625376281185, "grad_norm": 1.7776975631713867, "learning_rate": 4.007312379730597e-06, "loss": 0.9186, "step": 19580 }, { "epoch": 0.23868718998695965, "grad_norm": 1.8209669589996338, "learning_rate": 4.00699166132136e-06, "loss": 0.9156, "step": 19585 }, { "epoch": 0.23874812621110744, "grad_norm": 1.8052096366882324, "learning_rate": 4.006670942912124e-06, "loss": 0.9209, "step": 19590 }, { "epoch": 0.23880906243525526, "grad_norm": 1.964107632637024, "learning_rate": 4.006350224502887e-06, "loss": 0.8831, "step": 19595 }, { "epoch": 0.23886999865940306, "grad_norm": 1.9629448652267456, "learning_rate": 4.00602950609365e-06, "loss": 0.8126, "step": 19600 }, { "epoch": 0.23893093488355088, "grad_norm": 1.9754939079284668, "learning_rate": 4.005708787684414e-06, "loss": 0.8199, "step": 19605 }, { "epoch": 0.23899187110769868, "grad_norm": 1.7719682455062866, "learning_rate": 4.005388069275177e-06, "loss": 0.8466, "step": 19610 }, { "epoch": 0.2390528073318465, "grad_norm": 1.8914289474487305, "learning_rate": 4.00506735086594e-06, "loss": 0.9004, "step": 19615 }, { "epoch": 0.2391137435559943, "grad_norm": 1.9305906295776367, "learning_rate": 4.004746632456704e-06, "loss": 0.8391, "step": 19620 }, { "epoch": 0.2391746797801421, "grad_norm": 2.1399343013763428, "learning_rate": 4.004425914047467e-06, "loss": 0.8982, "step": 19625 }, { "epoch": 0.2392356160042899, "grad_norm": 1.6834115982055664, "learning_rate": 4.00410519563823e-06, "loss": 0.775, "step": 19630 }, { "epoch": 0.2392965522284377, "grad_norm": 2.2513301372528076, "learning_rate": 4.003784477228994e-06, "loss": 0.8715, "step": 19635 }, { "epoch": 0.23935748845258553, "grad_norm": 1.9266995191574097, "learning_rate": 4.003463758819757e-06, "loss": 0.8653, "step": 19640 }, { "epoch": 0.23941842467673333, "grad_norm": 2.3599326610565186, "learning_rate": 4.00314304041052e-06, "loss": 0.8412, "step": 19645 }, { "epoch": 0.23947936090088115, "grad_norm": 1.809096097946167, "learning_rate": 4.002822322001283e-06, "loss": 0.8873, "step": 19650 }, { "epoch": 0.23954029712502894, "grad_norm": 2.5084924697875977, "learning_rate": 4.002501603592047e-06, "loss": 0.8635, "step": 19655 }, { "epoch": 0.23960123334917674, "grad_norm": 2.088798761367798, "learning_rate": 4.00218088518281e-06, "loss": 0.8453, "step": 19660 }, { "epoch": 0.23966216957332456, "grad_norm": 2.319300651550293, "learning_rate": 4.001860166773573e-06, "loss": 0.8498, "step": 19665 }, { "epoch": 0.23972310579747236, "grad_norm": 2.0062782764434814, "learning_rate": 4.0015394483643365e-06, "loss": 0.7841, "step": 19670 }, { "epoch": 0.23978404202162018, "grad_norm": 1.7981988191604614, "learning_rate": 4.0012187299550995e-06, "loss": 0.8752, "step": 19675 }, { "epoch": 0.23984497824576798, "grad_norm": 2.2647299766540527, "learning_rate": 4.0008980115458625e-06, "loss": 0.9357, "step": 19680 }, { "epoch": 0.2399059144699158, "grad_norm": 2.162989377975464, "learning_rate": 4.000577293136626e-06, "loss": 0.9308, "step": 19685 }, { "epoch": 0.2399668506940636, "grad_norm": 1.930297613143921, "learning_rate": 4.000256574727389e-06, "loss": 0.8764, "step": 19690 }, { "epoch": 0.2400277869182114, "grad_norm": 2.0165371894836426, "learning_rate": 3.999935856318152e-06, "loss": 0.8836, "step": 19695 }, { "epoch": 0.2400887231423592, "grad_norm": 1.9487308263778687, "learning_rate": 3.999615137908916e-06, "loss": 0.8587, "step": 19700 }, { "epoch": 0.240149659366507, "grad_norm": 2.5292553901672363, "learning_rate": 3.999294419499679e-06, "loss": 0.874, "step": 19705 }, { "epoch": 0.24021059559065483, "grad_norm": 1.802607536315918, "learning_rate": 3.998973701090443e-06, "loss": 0.9202, "step": 19710 }, { "epoch": 0.24027153181480262, "grad_norm": 1.876206874847412, "learning_rate": 3.998652982681206e-06, "loss": 0.8284, "step": 19715 }, { "epoch": 0.24033246803895045, "grad_norm": 2.2910537719726562, "learning_rate": 3.99833226427197e-06, "loss": 0.8838, "step": 19720 }, { "epoch": 0.24039340426309824, "grad_norm": 1.9431051015853882, "learning_rate": 3.998011545862733e-06, "loss": 0.8037, "step": 19725 }, { "epoch": 0.24045434048724604, "grad_norm": 2.1569340229034424, "learning_rate": 3.997690827453496e-06, "loss": 0.9333, "step": 19730 }, { "epoch": 0.24051527671139386, "grad_norm": 1.9525327682495117, "learning_rate": 3.99737010904426e-06, "loss": 0.9189, "step": 19735 }, { "epoch": 0.24057621293554166, "grad_norm": 1.9462357759475708, "learning_rate": 3.997049390635023e-06, "loss": 0.8542, "step": 19740 }, { "epoch": 0.24063714915968948, "grad_norm": 2.131394147872925, "learning_rate": 3.996728672225786e-06, "loss": 0.9315, "step": 19745 }, { "epoch": 0.24069808538383727, "grad_norm": 2.070021390914917, "learning_rate": 3.99640795381655e-06, "loss": 0.9081, "step": 19750 }, { "epoch": 0.2407590216079851, "grad_norm": 1.8769460916519165, "learning_rate": 3.996087235407313e-06, "loss": 0.9275, "step": 19755 }, { "epoch": 0.2408199578321329, "grad_norm": 1.8724873065948486, "learning_rate": 3.995766516998076e-06, "loss": 0.9075, "step": 19760 }, { "epoch": 0.2408808940562807, "grad_norm": 1.8529818058013916, "learning_rate": 3.99544579858884e-06, "loss": 0.859, "step": 19765 }, { "epoch": 0.2409418302804285, "grad_norm": 1.8022525310516357, "learning_rate": 3.995125080179603e-06, "loss": 0.8196, "step": 19770 }, { "epoch": 0.2410027665045763, "grad_norm": 1.8967021703720093, "learning_rate": 3.994804361770366e-06, "loss": 0.8586, "step": 19775 }, { "epoch": 0.24106370272872413, "grad_norm": 1.9795292615890503, "learning_rate": 3.99448364336113e-06, "loss": 0.989, "step": 19780 }, { "epoch": 0.24112463895287192, "grad_norm": 2.0449512004852295, "learning_rate": 3.994162924951893e-06, "loss": 0.889, "step": 19785 }, { "epoch": 0.24118557517701972, "grad_norm": 2.0687899589538574, "learning_rate": 3.993842206542656e-06, "loss": 0.8761, "step": 19790 }, { "epoch": 0.24124651140116754, "grad_norm": 2.1603784561157227, "learning_rate": 3.9935214881334195e-06, "loss": 0.8362, "step": 19795 }, { "epoch": 0.24130744762531534, "grad_norm": 1.8878134489059448, "learning_rate": 3.9932007697241826e-06, "loss": 0.9081, "step": 19800 }, { "epoch": 0.24136838384946316, "grad_norm": 1.9257220029830933, "learning_rate": 3.9928800513149456e-06, "loss": 0.9294, "step": 19805 }, { "epoch": 0.24142932007361095, "grad_norm": 2.2955589294433594, "learning_rate": 3.992559332905709e-06, "loss": 0.9004, "step": 19810 }, { "epoch": 0.24149025629775878, "grad_norm": 1.7785775661468506, "learning_rate": 3.9922386144964724e-06, "loss": 0.9311, "step": 19815 }, { "epoch": 0.24155119252190657, "grad_norm": 2.1785717010498047, "learning_rate": 3.9919178960872355e-06, "loss": 0.9107, "step": 19820 }, { "epoch": 0.24161212874605437, "grad_norm": 1.924979329109192, "learning_rate": 3.9915971776779985e-06, "loss": 0.8662, "step": 19825 }, { "epoch": 0.2416730649702022, "grad_norm": 2.082184076309204, "learning_rate": 3.991276459268762e-06, "loss": 0.8461, "step": 19830 }, { "epoch": 0.24173400119434998, "grad_norm": 2.299363136291504, "learning_rate": 3.990955740859525e-06, "loss": 0.9235, "step": 19835 }, { "epoch": 0.2417949374184978, "grad_norm": 2.136199951171875, "learning_rate": 3.990635022450288e-06, "loss": 0.9228, "step": 19840 }, { "epoch": 0.2418558736426456, "grad_norm": 2.5030155181884766, "learning_rate": 3.990314304041052e-06, "loss": 0.914, "step": 19845 }, { "epoch": 0.24191680986679343, "grad_norm": 1.7807085514068604, "learning_rate": 3.989993585631815e-06, "loss": 0.8931, "step": 19850 }, { "epoch": 0.24197774609094122, "grad_norm": 2.07064151763916, "learning_rate": 3.989672867222579e-06, "loss": 0.9113, "step": 19855 }, { "epoch": 0.24203868231508902, "grad_norm": 2.2470507621765137, "learning_rate": 3.989352148813342e-06, "loss": 0.8528, "step": 19860 }, { "epoch": 0.24209961853923684, "grad_norm": 1.9494889974594116, "learning_rate": 3.989031430404105e-06, "loss": 0.8575, "step": 19865 }, { "epoch": 0.24216055476338463, "grad_norm": 2.101717233657837, "learning_rate": 3.988710711994869e-06, "loss": 0.8628, "step": 19870 }, { "epoch": 0.24222149098753246, "grad_norm": 2.1575353145599365, "learning_rate": 3.988389993585632e-06, "loss": 0.91, "step": 19875 }, { "epoch": 0.24228242721168025, "grad_norm": 1.7396844625473022, "learning_rate": 3.988069275176396e-06, "loss": 0.9207, "step": 19880 }, { "epoch": 0.24234336343582807, "grad_norm": 2.387840747833252, "learning_rate": 3.987748556767159e-06, "loss": 0.7951, "step": 19885 }, { "epoch": 0.24240429965997587, "grad_norm": 2.5955564975738525, "learning_rate": 3.987427838357922e-06, "loss": 0.8579, "step": 19890 }, { "epoch": 0.24246523588412366, "grad_norm": 2.0184342861175537, "learning_rate": 3.987107119948686e-06, "loss": 0.9065, "step": 19895 }, { "epoch": 0.2425261721082715, "grad_norm": 2.112006425857544, "learning_rate": 3.986786401539449e-06, "loss": 0.9236, "step": 19900 }, { "epoch": 0.24258710833241928, "grad_norm": 2.278360605239868, "learning_rate": 3.986465683130212e-06, "loss": 0.8946, "step": 19905 }, { "epoch": 0.2426480445565671, "grad_norm": 2.4140100479125977, "learning_rate": 3.986144964720976e-06, "loss": 0.8522, "step": 19910 }, { "epoch": 0.2427089807807149, "grad_norm": 2.118863582611084, "learning_rate": 3.985824246311739e-06, "loss": 0.9078, "step": 19915 }, { "epoch": 0.24276991700486272, "grad_norm": 2.1040682792663574, "learning_rate": 3.985503527902502e-06, "loss": 0.9014, "step": 19920 }, { "epoch": 0.24283085322901052, "grad_norm": 2.0255908966064453, "learning_rate": 3.985182809493266e-06, "loss": 0.8052, "step": 19925 }, { "epoch": 0.2428917894531583, "grad_norm": 2.2269234657287598, "learning_rate": 3.984862091084029e-06, "loss": 0.8766, "step": 19930 }, { "epoch": 0.24295272567730614, "grad_norm": 2.049142837524414, "learning_rate": 3.984541372674792e-06, "loss": 0.9171, "step": 19935 }, { "epoch": 0.24301366190145393, "grad_norm": 1.8715687990188599, "learning_rate": 3.9842206542655555e-06, "loss": 0.8155, "step": 19940 }, { "epoch": 0.24307459812560175, "grad_norm": 1.5779348611831665, "learning_rate": 3.9838999358563185e-06, "loss": 0.8151, "step": 19945 }, { "epoch": 0.24313553434974955, "grad_norm": 2.3220129013061523, "learning_rate": 3.9835792174470815e-06, "loss": 0.8888, "step": 19950 }, { "epoch": 0.24319647057389737, "grad_norm": 1.7542219161987305, "learning_rate": 3.983258499037845e-06, "loss": 0.8322, "step": 19955 }, { "epoch": 0.24325740679804517, "grad_norm": 1.9573475122451782, "learning_rate": 3.982937780628608e-06, "loss": 0.9381, "step": 19960 }, { "epoch": 0.24331834302219296, "grad_norm": 2.0149388313293457, "learning_rate": 3.982617062219371e-06, "loss": 0.8716, "step": 19965 }, { "epoch": 0.24337927924634079, "grad_norm": 2.1830317974090576, "learning_rate": 3.982296343810135e-06, "loss": 0.8436, "step": 19970 }, { "epoch": 0.24344021547048858, "grad_norm": 1.9577659368515015, "learning_rate": 3.981975625400898e-06, "loss": 0.9131, "step": 19975 }, { "epoch": 0.2435011516946364, "grad_norm": 1.6637682914733887, "learning_rate": 3.981654906991661e-06, "loss": 0.8732, "step": 19980 }, { "epoch": 0.2435620879187842, "grad_norm": 2.352210760116577, "learning_rate": 3.981334188582424e-06, "loss": 0.8549, "step": 19985 }, { "epoch": 0.24362302414293202, "grad_norm": 2.0176665782928467, "learning_rate": 3.981013470173188e-06, "loss": 0.7765, "step": 19990 }, { "epoch": 0.24368396036707982, "grad_norm": 2.2890865802764893, "learning_rate": 3.980692751763951e-06, "loss": 0.8402, "step": 19995 }, { "epoch": 0.2437448965912276, "grad_norm": 1.9091800451278687, "learning_rate": 3.980372033354715e-06, "loss": 0.8519, "step": 20000 }, { "epoch": 0.24380583281537543, "grad_norm": 1.8691318035125732, "learning_rate": 3.980051314945478e-06, "loss": 0.8494, "step": 20005 }, { "epoch": 0.24386676903952323, "grad_norm": 1.9709089994430542, "learning_rate": 3.979730596536241e-06, "loss": 0.9132, "step": 20010 }, { "epoch": 0.24392770526367105, "grad_norm": 1.8880181312561035, "learning_rate": 3.979409878127005e-06, "loss": 0.7938, "step": 20015 }, { "epoch": 0.24398864148781885, "grad_norm": 2.008845090866089, "learning_rate": 3.979089159717768e-06, "loss": 0.8741, "step": 20020 }, { "epoch": 0.24404957771196664, "grad_norm": 1.9876799583435059, "learning_rate": 3.978768441308532e-06, "loss": 0.874, "step": 20025 }, { "epoch": 0.24411051393611446, "grad_norm": 1.7073030471801758, "learning_rate": 3.978447722899295e-06, "loss": 0.8349, "step": 20030 }, { "epoch": 0.24417145016026226, "grad_norm": 2.1369051933288574, "learning_rate": 3.978127004490058e-06, "loss": 0.8933, "step": 20035 }, { "epoch": 0.24423238638441008, "grad_norm": 2.1208794116973877, "learning_rate": 3.977806286080822e-06, "loss": 0.9011, "step": 20040 }, { "epoch": 0.24429332260855788, "grad_norm": 1.958741545677185, "learning_rate": 3.977485567671585e-06, "loss": 0.9025, "step": 20045 }, { "epoch": 0.2443542588327057, "grad_norm": 2.0315122604370117, "learning_rate": 3.977164849262349e-06, "loss": 0.8883, "step": 20050 }, { "epoch": 0.2444151950568535, "grad_norm": 2.2664947509765625, "learning_rate": 3.976844130853112e-06, "loss": 0.896, "step": 20055 }, { "epoch": 0.2444761312810013, "grad_norm": 1.9126887321472168, "learning_rate": 3.976523412443875e-06, "loss": 0.9192, "step": 20060 }, { "epoch": 0.2445370675051491, "grad_norm": 2.025723695755005, "learning_rate": 3.976202694034638e-06, "loss": 0.8187, "step": 20065 }, { "epoch": 0.2445980037292969, "grad_norm": 2.688936948776245, "learning_rate": 3.9758819756254016e-06, "loss": 0.9174, "step": 20070 }, { "epoch": 0.24465893995344473, "grad_norm": 2.1247880458831787, "learning_rate": 3.9755612572161646e-06, "loss": 0.8444, "step": 20075 }, { "epoch": 0.24471987617759253, "grad_norm": 2.2528412342071533, "learning_rate": 3.975240538806928e-06, "loss": 0.8982, "step": 20080 }, { "epoch": 0.24478081240174035, "grad_norm": 2.2779693603515625, "learning_rate": 3.9749198203976914e-06, "loss": 0.8075, "step": 20085 }, { "epoch": 0.24484174862588814, "grad_norm": 2.0797970294952393, "learning_rate": 3.9745991019884545e-06, "loss": 0.848, "step": 20090 }, { "epoch": 0.24490268485003594, "grad_norm": 2.05043625831604, "learning_rate": 3.9742783835792175e-06, "loss": 0.8948, "step": 20095 }, { "epoch": 0.24496362107418376, "grad_norm": 1.9777876138687134, "learning_rate": 3.973957665169981e-06, "loss": 0.8526, "step": 20100 }, { "epoch": 0.24502455729833156, "grad_norm": 2.0058677196502686, "learning_rate": 3.973636946760744e-06, "loss": 0.8204, "step": 20105 }, { "epoch": 0.24508549352247938, "grad_norm": 1.8569234609603882, "learning_rate": 3.973316228351507e-06, "loss": 0.8432, "step": 20110 }, { "epoch": 0.24514642974662718, "grad_norm": 1.9338001012802124, "learning_rate": 3.972995509942271e-06, "loss": 0.8343, "step": 20115 }, { "epoch": 0.245207365970775, "grad_norm": 2.0897552967071533, "learning_rate": 3.972674791533034e-06, "loss": 0.8658, "step": 20120 }, { "epoch": 0.2452683021949228, "grad_norm": 2.000065803527832, "learning_rate": 3.972354073123797e-06, "loss": 0.9512, "step": 20125 }, { "epoch": 0.2453292384190706, "grad_norm": 1.9465367794036865, "learning_rate": 3.972033354714561e-06, "loss": 0.9417, "step": 20130 }, { "epoch": 0.2453901746432184, "grad_norm": 1.902441143989563, "learning_rate": 3.971712636305324e-06, "loss": 0.9003, "step": 20135 }, { "epoch": 0.2454511108673662, "grad_norm": 2.180697441101074, "learning_rate": 3.971391917896087e-06, "loss": 0.9826, "step": 20140 }, { "epoch": 0.24551204709151403, "grad_norm": 1.9007186889648438, "learning_rate": 3.97107119948685e-06, "loss": 0.8551, "step": 20145 }, { "epoch": 0.24557298331566182, "grad_norm": 2.0738272666931152, "learning_rate": 3.970750481077614e-06, "loss": 0.8564, "step": 20150 }, { "epoch": 0.24563391953980965, "grad_norm": 2.014742612838745, "learning_rate": 3.970429762668377e-06, "loss": 0.8951, "step": 20155 }, { "epoch": 0.24569485576395744, "grad_norm": 1.9230859279632568, "learning_rate": 3.970109044259141e-06, "loss": 0.9908, "step": 20160 }, { "epoch": 0.24575579198810524, "grad_norm": 2.3854997158050537, "learning_rate": 3.969788325849904e-06, "loss": 0.8182, "step": 20165 }, { "epoch": 0.24581672821225306, "grad_norm": 2.433908700942993, "learning_rate": 3.969467607440668e-06, "loss": 0.9225, "step": 20170 }, { "epoch": 0.24587766443640086, "grad_norm": 1.8016692399978638, "learning_rate": 3.969146889031431e-06, "loss": 0.8225, "step": 20175 }, { "epoch": 0.24593860066054868, "grad_norm": 2.2676737308502197, "learning_rate": 3.968826170622194e-06, "loss": 0.8761, "step": 20180 }, { "epoch": 0.24599953688469647, "grad_norm": 2.150299549102783, "learning_rate": 3.968505452212958e-06, "loss": 0.842, "step": 20185 }, { "epoch": 0.2460604731088443, "grad_norm": 1.950276255607605, "learning_rate": 3.968184733803721e-06, "loss": 0.8834, "step": 20190 }, { "epoch": 0.2461214093329921, "grad_norm": 2.007770538330078, "learning_rate": 3.967864015394485e-06, "loss": 0.9746, "step": 20195 }, { "epoch": 0.2461823455571399, "grad_norm": 2.214717388153076, "learning_rate": 3.967543296985248e-06, "loss": 0.867, "step": 20200 }, { "epoch": 0.2462432817812877, "grad_norm": 1.9611831903457642, "learning_rate": 3.967222578576011e-06, "loss": 0.8396, "step": 20205 }, { "epoch": 0.2463042180054355, "grad_norm": 2.0940115451812744, "learning_rate": 3.9669018601667745e-06, "loss": 0.8294, "step": 20210 }, { "epoch": 0.24636515422958333, "grad_norm": 2.2200958728790283, "learning_rate": 3.9665811417575375e-06, "loss": 0.883, "step": 20215 }, { "epoch": 0.24642609045373112, "grad_norm": 2.031003475189209, "learning_rate": 3.9662604233483005e-06, "loss": 0.9033, "step": 20220 }, { "epoch": 0.24648702667787895, "grad_norm": 1.9323420524597168, "learning_rate": 3.9659397049390635e-06, "loss": 0.9276, "step": 20225 }, { "epoch": 0.24654796290202674, "grad_norm": 2.2988052368164062, "learning_rate": 3.965618986529827e-06, "loss": 0.8823, "step": 20230 }, { "epoch": 0.24660889912617454, "grad_norm": 1.942533016204834, "learning_rate": 3.96529826812059e-06, "loss": 0.8387, "step": 20235 }, { "epoch": 0.24666983535032236, "grad_norm": 1.8823188543319702, "learning_rate": 3.9649775497113534e-06, "loss": 0.8563, "step": 20240 }, { "epoch": 0.24673077157447015, "grad_norm": 2.1672959327697754, "learning_rate": 3.964656831302117e-06, "loss": 0.8759, "step": 20245 }, { "epoch": 0.24679170779861798, "grad_norm": 1.948673129081726, "learning_rate": 3.96433611289288e-06, "loss": 0.8428, "step": 20250 }, { "epoch": 0.24685264402276577, "grad_norm": 1.7927563190460205, "learning_rate": 3.964015394483643e-06, "loss": 0.8717, "step": 20255 }, { "epoch": 0.24691358024691357, "grad_norm": 1.9722367525100708, "learning_rate": 3.963694676074407e-06, "loss": 0.8639, "step": 20260 }, { "epoch": 0.2469745164710614, "grad_norm": 2.210685968399048, "learning_rate": 3.96337395766517e-06, "loss": 0.8936, "step": 20265 }, { "epoch": 0.24703545269520918, "grad_norm": 1.7875977754592896, "learning_rate": 3.963053239255933e-06, "loss": 0.9203, "step": 20270 }, { "epoch": 0.247096388919357, "grad_norm": 2.491673707962036, "learning_rate": 3.962732520846697e-06, "loss": 0.8977, "step": 20275 }, { "epoch": 0.2471573251435048, "grad_norm": 2.0823559761047363, "learning_rate": 3.96241180243746e-06, "loss": 0.8404, "step": 20280 }, { "epoch": 0.24721826136765263, "grad_norm": 2.0201666355133057, "learning_rate": 3.962091084028223e-06, "loss": 0.9505, "step": 20285 }, { "epoch": 0.24727919759180042, "grad_norm": 2.3198788166046143, "learning_rate": 3.961770365618987e-06, "loss": 0.8384, "step": 20290 }, { "epoch": 0.24734013381594822, "grad_norm": 2.0088460445404053, "learning_rate": 3.96144964720975e-06, "loss": 0.8571, "step": 20295 }, { "epoch": 0.24740107004009604, "grad_norm": 1.7291736602783203, "learning_rate": 3.961128928800513e-06, "loss": 0.9184, "step": 20300 }, { "epoch": 0.24746200626424383, "grad_norm": 1.9681698083877563, "learning_rate": 3.960808210391277e-06, "loss": 0.8549, "step": 20305 }, { "epoch": 0.24752294248839166, "grad_norm": 1.8529369831085205, "learning_rate": 3.96048749198204e-06, "loss": 0.8296, "step": 20310 }, { "epoch": 0.24758387871253945, "grad_norm": 2.5736961364746094, "learning_rate": 3.960166773572803e-06, "loss": 0.9096, "step": 20315 }, { "epoch": 0.24764481493668727, "grad_norm": 2.189014196395874, "learning_rate": 3.959846055163567e-06, "loss": 0.9405, "step": 20320 }, { "epoch": 0.24770575116083507, "grad_norm": 1.9944453239440918, "learning_rate": 3.95952533675433e-06, "loss": 0.9078, "step": 20325 }, { "epoch": 0.24776668738498286, "grad_norm": 1.9755563735961914, "learning_rate": 3.959204618345094e-06, "loss": 0.8837, "step": 20330 }, { "epoch": 0.2478276236091307, "grad_norm": 1.896924376487732, "learning_rate": 3.958883899935857e-06, "loss": 0.9113, "step": 20335 }, { "epoch": 0.24788855983327848, "grad_norm": 2.043116569519043, "learning_rate": 3.95856318152662e-06, "loss": 0.8465, "step": 20340 }, { "epoch": 0.2479494960574263, "grad_norm": 1.91586172580719, "learning_rate": 3.9582424631173836e-06, "loss": 0.8644, "step": 20345 }, { "epoch": 0.2480104322815741, "grad_norm": 2.006510019302368, "learning_rate": 3.957921744708147e-06, "loss": 0.8906, "step": 20350 }, { "epoch": 0.24807136850572192, "grad_norm": 2.0193445682525635, "learning_rate": 3.9576010262989104e-06, "loss": 0.8835, "step": 20355 }, { "epoch": 0.24813230472986972, "grad_norm": 1.859091877937317, "learning_rate": 3.9572803078896735e-06, "loss": 0.8509, "step": 20360 }, { "epoch": 0.2481932409540175, "grad_norm": 1.648967981338501, "learning_rate": 3.9569595894804365e-06, "loss": 0.8926, "step": 20365 }, { "epoch": 0.24825417717816534, "grad_norm": 1.7524503469467163, "learning_rate": 3.9566388710712e-06, "loss": 0.8511, "step": 20370 }, { "epoch": 0.24831511340231313, "grad_norm": 2.2138733863830566, "learning_rate": 3.956318152661963e-06, "loss": 0.8871, "step": 20375 }, { "epoch": 0.24837604962646095, "grad_norm": 1.9140450954437256, "learning_rate": 3.955997434252726e-06, "loss": 0.8734, "step": 20380 }, { "epoch": 0.24843698585060875, "grad_norm": 2.267719268798828, "learning_rate": 3.95567671584349e-06, "loss": 0.881, "step": 20385 }, { "epoch": 0.24849792207475657, "grad_norm": 1.8980594873428345, "learning_rate": 3.955355997434253e-06, "loss": 0.9625, "step": 20390 }, { "epoch": 0.24855885829890437, "grad_norm": 1.8910057544708252, "learning_rate": 3.955035279025016e-06, "loss": 0.9117, "step": 20395 }, { "epoch": 0.24861979452305216, "grad_norm": 1.9217634201049805, "learning_rate": 3.954714560615779e-06, "loss": 0.8224, "step": 20400 }, { "epoch": 0.24868073074719999, "grad_norm": 1.837151288986206, "learning_rate": 3.954393842206543e-06, "loss": 0.8444, "step": 20405 }, { "epoch": 0.24874166697134778, "grad_norm": 1.9503278732299805, "learning_rate": 3.954073123797306e-06, "loss": 0.9352, "step": 20410 }, { "epoch": 0.2488026031954956, "grad_norm": 1.7696789503097534, "learning_rate": 3.953752405388069e-06, "loss": 0.8586, "step": 20415 }, { "epoch": 0.2488635394196434, "grad_norm": 1.8560528755187988, "learning_rate": 3.953431686978833e-06, "loss": 0.8484, "step": 20420 }, { "epoch": 0.24892447564379122, "grad_norm": 1.7925472259521484, "learning_rate": 3.953110968569596e-06, "loss": 0.8488, "step": 20425 }, { "epoch": 0.24898541186793902, "grad_norm": 1.9795596599578857, "learning_rate": 3.952790250160359e-06, "loss": 0.8826, "step": 20430 }, { "epoch": 0.2490463480920868, "grad_norm": 1.8094924688339233, "learning_rate": 3.952469531751123e-06, "loss": 0.9161, "step": 20435 }, { "epoch": 0.24910728431623463, "grad_norm": 1.9775927066802979, "learning_rate": 3.952148813341886e-06, "loss": 0.9202, "step": 20440 }, { "epoch": 0.24916822054038243, "grad_norm": 2.2411115169525146, "learning_rate": 3.951828094932649e-06, "loss": 0.8962, "step": 20445 }, { "epoch": 0.24922915676453025, "grad_norm": 2.1531503200531006, "learning_rate": 3.951507376523413e-06, "loss": 0.8646, "step": 20450 }, { "epoch": 0.24929009298867805, "grad_norm": 2.021334648132324, "learning_rate": 3.951186658114176e-06, "loss": 0.8808, "step": 20455 }, { "epoch": 0.24935102921282587, "grad_norm": 1.693812370300293, "learning_rate": 3.950865939704939e-06, "loss": 0.8719, "step": 20460 }, { "epoch": 0.24941196543697366, "grad_norm": 1.8971060514450073, "learning_rate": 3.950545221295703e-06, "loss": 0.9892, "step": 20465 }, { "epoch": 0.24947290166112146, "grad_norm": 2.141786575317383, "learning_rate": 3.950224502886466e-06, "loss": 0.8715, "step": 20470 }, { "epoch": 0.24953383788526928, "grad_norm": 2.08770751953125, "learning_rate": 3.94990378447723e-06, "loss": 0.8269, "step": 20475 }, { "epoch": 0.24959477410941708, "grad_norm": 1.9339854717254639, "learning_rate": 3.949583066067993e-06, "loss": 0.9447, "step": 20480 }, { "epoch": 0.2496557103335649, "grad_norm": 1.6781439781188965, "learning_rate": 3.949262347658756e-06, "loss": 0.8309, "step": 20485 }, { "epoch": 0.2497166465577127, "grad_norm": 1.8108927011489868, "learning_rate": 3.9489416292495195e-06, "loss": 0.8522, "step": 20490 }, { "epoch": 0.2497775827818605, "grad_norm": 1.8351284265518188, "learning_rate": 3.9486209108402825e-06, "loss": 0.8056, "step": 20495 }, { "epoch": 0.2498385190060083, "grad_norm": 1.9267460107803345, "learning_rate": 3.948300192431046e-06, "loss": 0.9167, "step": 20500 }, { "epoch": 0.2498994552301561, "grad_norm": 1.8438082933425903, "learning_rate": 3.947979474021809e-06, "loss": 0.798, "step": 20505 }, { "epoch": 0.24996039145430393, "grad_norm": 1.7657853364944458, "learning_rate": 3.9476587556125724e-06, "loss": 0.8883, "step": 20510 }, { "epoch": 0.25002132767845175, "grad_norm": 1.7015421390533447, "learning_rate": 3.947338037203336e-06, "loss": 0.8145, "step": 20515 }, { "epoch": 0.2500822639025995, "grad_norm": 2.134613275527954, "learning_rate": 3.947017318794099e-06, "loss": 0.9355, "step": 20520 }, { "epoch": 0.25014320012674734, "grad_norm": 1.731209397315979, "learning_rate": 3.946696600384862e-06, "loss": 0.8455, "step": 20525 }, { "epoch": 0.25020413635089517, "grad_norm": 1.5589996576309204, "learning_rate": 3.946375881975626e-06, "loss": 0.859, "step": 20530 }, { "epoch": 0.25026507257504294, "grad_norm": 1.8607933521270752, "learning_rate": 3.946055163566389e-06, "loss": 0.8734, "step": 20535 }, { "epoch": 0.25032600879919076, "grad_norm": 1.8821696043014526, "learning_rate": 3.945734445157152e-06, "loss": 0.8429, "step": 20540 }, { "epoch": 0.2503869450233386, "grad_norm": 1.7665280103683472, "learning_rate": 3.945413726747916e-06, "loss": 0.9025, "step": 20545 }, { "epoch": 0.2504478812474864, "grad_norm": 2.196408987045288, "learning_rate": 3.945093008338679e-06, "loss": 0.926, "step": 20550 }, { "epoch": 0.25050881747163417, "grad_norm": 2.541205644607544, "learning_rate": 3.944772289929442e-06, "loss": 0.8684, "step": 20555 }, { "epoch": 0.250569753695782, "grad_norm": 1.932938575744629, "learning_rate": 3.944451571520206e-06, "loss": 0.814, "step": 20560 }, { "epoch": 0.2506306899199298, "grad_norm": 1.7998932600021362, "learning_rate": 3.944130853110969e-06, "loss": 0.7843, "step": 20565 }, { "epoch": 0.2506916261440776, "grad_norm": 2.746328353881836, "learning_rate": 3.943810134701732e-06, "loss": 0.8831, "step": 20570 }, { "epoch": 0.2507525623682254, "grad_norm": 1.9027398824691772, "learning_rate": 3.943489416292495e-06, "loss": 0.8993, "step": 20575 }, { "epoch": 0.25081349859237323, "grad_norm": 1.9203308820724487, "learning_rate": 3.943168697883259e-06, "loss": 0.864, "step": 20580 }, { "epoch": 0.25087443481652105, "grad_norm": 2.0498862266540527, "learning_rate": 3.942847979474022e-06, "loss": 0.8787, "step": 20585 }, { "epoch": 0.2509353710406688, "grad_norm": 1.7748961448669434, "learning_rate": 3.942527261064785e-06, "loss": 0.9007, "step": 20590 }, { "epoch": 0.25099630726481664, "grad_norm": 2.1506032943725586, "learning_rate": 3.942206542655549e-06, "loss": 0.8595, "step": 20595 }, { "epoch": 0.25105724348896447, "grad_norm": 1.64803946018219, "learning_rate": 3.941885824246312e-06, "loss": 0.8766, "step": 20600 }, { "epoch": 0.25111817971311223, "grad_norm": 1.8626235723495483, "learning_rate": 3.941565105837075e-06, "loss": 0.8495, "step": 20605 }, { "epoch": 0.25117911593726006, "grad_norm": 1.7717108726501465, "learning_rate": 3.941244387427839e-06, "loss": 0.8705, "step": 20610 }, { "epoch": 0.2512400521614079, "grad_norm": 2.2535855770111084, "learning_rate": 3.940923669018602e-06, "loss": 0.8924, "step": 20615 }, { "epoch": 0.2513009883855557, "grad_norm": 1.8782999515533447, "learning_rate": 3.940602950609365e-06, "loss": 0.8802, "step": 20620 }, { "epoch": 0.25136192460970347, "grad_norm": 2.149448871612549, "learning_rate": 3.940282232200129e-06, "loss": 0.9617, "step": 20625 }, { "epoch": 0.2514228608338513, "grad_norm": 2.3051953315734863, "learning_rate": 3.939961513790892e-06, "loss": 0.8968, "step": 20630 }, { "epoch": 0.2514837970579991, "grad_norm": 2.1013078689575195, "learning_rate": 3.9396407953816555e-06, "loss": 0.8473, "step": 20635 }, { "epoch": 0.2515447332821469, "grad_norm": 1.6856300830841064, "learning_rate": 3.9393200769724185e-06, "loss": 0.8641, "step": 20640 }, { "epoch": 0.2516056695062947, "grad_norm": 2.1664674282073975, "learning_rate": 3.938999358563182e-06, "loss": 0.9705, "step": 20645 }, { "epoch": 0.2516666057304425, "grad_norm": 2.041362762451172, "learning_rate": 3.938678640153945e-06, "loss": 0.8528, "step": 20650 }, { "epoch": 0.25172754195459035, "grad_norm": 1.7515164613723755, "learning_rate": 3.938357921744708e-06, "loss": 0.8541, "step": 20655 }, { "epoch": 0.2517884781787381, "grad_norm": 2.056812286376953, "learning_rate": 3.938037203335472e-06, "loss": 0.8768, "step": 20660 }, { "epoch": 0.25184941440288594, "grad_norm": 1.934781789779663, "learning_rate": 3.937716484926235e-06, "loss": 0.85, "step": 20665 }, { "epoch": 0.25191035062703376, "grad_norm": 2.4457015991210938, "learning_rate": 3.937395766516998e-06, "loss": 0.8093, "step": 20670 }, { "epoch": 0.25197128685118153, "grad_norm": 1.9243896007537842, "learning_rate": 3.937075048107762e-06, "loss": 0.8676, "step": 20675 }, { "epoch": 0.25203222307532935, "grad_norm": 1.9590888023376465, "learning_rate": 3.936754329698525e-06, "loss": 0.8885, "step": 20680 }, { "epoch": 0.2520931592994772, "grad_norm": 1.6323838233947754, "learning_rate": 3.936433611289288e-06, "loss": 0.902, "step": 20685 }, { "epoch": 0.252154095523625, "grad_norm": 2.6709680557250977, "learning_rate": 3.936112892880052e-06, "loss": 0.8275, "step": 20690 }, { "epoch": 0.25221503174777277, "grad_norm": 1.7184879779815674, "learning_rate": 3.935792174470815e-06, "loss": 0.8619, "step": 20695 }, { "epoch": 0.2522759679719206, "grad_norm": 2.523366928100586, "learning_rate": 3.935471456061578e-06, "loss": 0.888, "step": 20700 }, { "epoch": 0.2523369041960684, "grad_norm": 2.2079198360443115, "learning_rate": 3.935150737652342e-06, "loss": 0.8792, "step": 20705 }, { "epoch": 0.2523978404202162, "grad_norm": 2.0134103298187256, "learning_rate": 3.934830019243105e-06, "loss": 0.801, "step": 20710 }, { "epoch": 0.252458776644364, "grad_norm": 1.836117148399353, "learning_rate": 3.934509300833868e-06, "loss": 0.8699, "step": 20715 }, { "epoch": 0.2525197128685118, "grad_norm": 2.3377437591552734, "learning_rate": 3.934188582424632e-06, "loss": 0.834, "step": 20720 }, { "epoch": 0.25258064909265965, "grad_norm": 1.9276518821716309, "learning_rate": 3.933867864015395e-06, "loss": 0.8083, "step": 20725 }, { "epoch": 0.2526415853168074, "grad_norm": 2.358793258666992, "learning_rate": 3.933547145606158e-06, "loss": 0.8417, "step": 20730 }, { "epoch": 0.25270252154095524, "grad_norm": 1.8790464401245117, "learning_rate": 3.933226427196921e-06, "loss": 0.9596, "step": 20735 }, { "epoch": 0.25276345776510306, "grad_norm": 2.186894416809082, "learning_rate": 3.932905708787685e-06, "loss": 0.8389, "step": 20740 }, { "epoch": 0.25282439398925083, "grad_norm": 2.097809076309204, "learning_rate": 3.932584990378448e-06, "loss": 0.8337, "step": 20745 }, { "epoch": 0.25288533021339865, "grad_norm": 2.286095380783081, "learning_rate": 3.932264271969211e-06, "loss": 0.9005, "step": 20750 }, { "epoch": 0.2529462664375465, "grad_norm": 1.6533986330032349, "learning_rate": 3.931943553559975e-06, "loss": 0.8187, "step": 20755 }, { "epoch": 0.2530072026616943, "grad_norm": 1.9368842840194702, "learning_rate": 3.931622835150738e-06, "loss": 0.8569, "step": 20760 }, { "epoch": 0.25306813888584206, "grad_norm": 2.249312400817871, "learning_rate": 3.931302116741501e-06, "loss": 0.827, "step": 20765 }, { "epoch": 0.2531290751099899, "grad_norm": 1.6471915245056152, "learning_rate": 3.9309813983322646e-06, "loss": 0.8195, "step": 20770 }, { "epoch": 0.2531900113341377, "grad_norm": 2.0871427059173584, "learning_rate": 3.9306606799230276e-06, "loss": 0.8357, "step": 20775 }, { "epoch": 0.2532509475582855, "grad_norm": 2.6644132137298584, "learning_rate": 3.9303399615137914e-06, "loss": 0.9108, "step": 20780 }, { "epoch": 0.2533118837824333, "grad_norm": 2.032841444015503, "learning_rate": 3.9300192431045545e-06, "loss": 0.8196, "step": 20785 }, { "epoch": 0.2533728200065811, "grad_norm": 2.1253113746643066, "learning_rate": 3.9296985246953175e-06, "loss": 0.9324, "step": 20790 }, { "epoch": 0.25343375623072895, "grad_norm": 1.952606201171875, "learning_rate": 3.929377806286081e-06, "loss": 0.8605, "step": 20795 }, { "epoch": 0.2534946924548767, "grad_norm": 2.139773368835449, "learning_rate": 3.929057087876844e-06, "loss": 0.8487, "step": 20800 }, { "epoch": 0.25355562867902454, "grad_norm": 1.924383282661438, "learning_rate": 3.928736369467608e-06, "loss": 0.8659, "step": 20805 }, { "epoch": 0.25361656490317236, "grad_norm": 1.8520333766937256, "learning_rate": 3.928415651058371e-06, "loss": 0.8272, "step": 20810 }, { "epoch": 0.2536775011273201, "grad_norm": 2.117647886276245, "learning_rate": 3.928094932649134e-06, "loss": 0.8489, "step": 20815 }, { "epoch": 0.25373843735146795, "grad_norm": 2.0313429832458496, "learning_rate": 3.927774214239898e-06, "loss": 0.9048, "step": 20820 }, { "epoch": 0.25379937357561577, "grad_norm": 1.8106763362884521, "learning_rate": 3.927453495830661e-06, "loss": 0.9633, "step": 20825 }, { "epoch": 0.2538603097997636, "grad_norm": 1.6688222885131836, "learning_rate": 3.927132777421424e-06, "loss": 0.8507, "step": 20830 }, { "epoch": 0.25392124602391136, "grad_norm": 1.8944988250732422, "learning_rate": 3.926812059012188e-06, "loss": 0.9194, "step": 20835 }, { "epoch": 0.2539821822480592, "grad_norm": 1.9363276958465576, "learning_rate": 3.926491340602951e-06, "loss": 0.8715, "step": 20840 }, { "epoch": 0.254043118472207, "grad_norm": 1.965067744255066, "learning_rate": 3.926170622193714e-06, "loss": 0.9515, "step": 20845 }, { "epoch": 0.2541040546963548, "grad_norm": 2.0501866340637207, "learning_rate": 3.925849903784478e-06, "loss": 0.8891, "step": 20850 }, { "epoch": 0.2541649909205026, "grad_norm": 1.7808295488357544, "learning_rate": 3.925529185375241e-06, "loss": 0.8902, "step": 20855 }, { "epoch": 0.2542259271446504, "grad_norm": 1.9448041915893555, "learning_rate": 3.925208466966004e-06, "loss": 0.7795, "step": 20860 }, { "epoch": 0.2542868633687982, "grad_norm": 1.9499790668487549, "learning_rate": 3.924887748556768e-06, "loss": 0.8727, "step": 20865 }, { "epoch": 0.254347799592946, "grad_norm": 2.106653928756714, "learning_rate": 3.924567030147531e-06, "loss": 0.8564, "step": 20870 }, { "epoch": 0.25440873581709383, "grad_norm": 2.1139276027679443, "learning_rate": 3.924246311738294e-06, "loss": 0.8621, "step": 20875 }, { "epoch": 0.25446967204124166, "grad_norm": 2.055180072784424, "learning_rate": 3.923925593329058e-06, "loss": 0.8723, "step": 20880 }, { "epoch": 0.2545306082653894, "grad_norm": 2.4045355319976807, "learning_rate": 3.923604874919821e-06, "loss": 0.9406, "step": 20885 }, { "epoch": 0.25459154448953725, "grad_norm": 2.0640358924865723, "learning_rate": 3.923284156510584e-06, "loss": 0.8437, "step": 20890 }, { "epoch": 0.25465248071368507, "grad_norm": 1.9527748823165894, "learning_rate": 3.922963438101348e-06, "loss": 0.8276, "step": 20895 }, { "epoch": 0.25471341693783284, "grad_norm": 1.6988548040390015, "learning_rate": 3.922642719692111e-06, "loss": 0.8339, "step": 20900 }, { "epoch": 0.25477435316198066, "grad_norm": 2.1710150241851807, "learning_rate": 3.922322001282874e-06, "loss": 0.9285, "step": 20905 }, { "epoch": 0.2548352893861285, "grad_norm": 2.1360154151916504, "learning_rate": 3.922001282873637e-06, "loss": 0.8691, "step": 20910 }, { "epoch": 0.2548962256102763, "grad_norm": 1.8411002159118652, "learning_rate": 3.9216805644644005e-06, "loss": 0.8888, "step": 20915 }, { "epoch": 0.2549571618344241, "grad_norm": 1.9562809467315674, "learning_rate": 3.9213598460551635e-06, "loss": 0.8395, "step": 20920 }, { "epoch": 0.2550180980585719, "grad_norm": 2.5080113410949707, "learning_rate": 3.921039127645927e-06, "loss": 0.9191, "step": 20925 }, { "epoch": 0.2550790342827197, "grad_norm": 1.7975046634674072, "learning_rate": 3.92071840923669e-06, "loss": 0.788, "step": 20930 }, { "epoch": 0.2551399705068675, "grad_norm": 2.032650947570801, "learning_rate": 3.9203976908274534e-06, "loss": 0.8761, "step": 20935 }, { "epoch": 0.2552009067310153, "grad_norm": 2.129580497741699, "learning_rate": 3.920076972418217e-06, "loss": 0.8689, "step": 20940 }, { "epoch": 0.25526184295516313, "grad_norm": 1.8543453216552734, "learning_rate": 3.91975625400898e-06, "loss": 0.8537, "step": 20945 }, { "epoch": 0.25532277917931095, "grad_norm": 1.9571919441223145, "learning_rate": 3.919435535599744e-06, "loss": 0.9151, "step": 20950 }, { "epoch": 0.2553837154034587, "grad_norm": 1.790588617324829, "learning_rate": 3.919114817190507e-06, "loss": 0.8391, "step": 20955 }, { "epoch": 0.25544465162760654, "grad_norm": 1.9145351648330688, "learning_rate": 3.91879409878127e-06, "loss": 0.8145, "step": 20960 }, { "epoch": 0.25550558785175437, "grad_norm": 1.815964937210083, "learning_rate": 3.918473380372034e-06, "loss": 0.8941, "step": 20965 }, { "epoch": 0.25556652407590214, "grad_norm": 1.8393027782440186, "learning_rate": 3.918152661962797e-06, "loss": 0.8123, "step": 20970 }, { "epoch": 0.25562746030004996, "grad_norm": 1.8082647323608398, "learning_rate": 3.917831943553561e-06, "loss": 0.9015, "step": 20975 }, { "epoch": 0.2556883965241978, "grad_norm": 2.000838279724121, "learning_rate": 3.917511225144324e-06, "loss": 0.8917, "step": 20980 }, { "epoch": 0.2557493327483456, "grad_norm": 1.8885256052017212, "learning_rate": 3.917190506735087e-06, "loss": 0.8879, "step": 20985 }, { "epoch": 0.25581026897249337, "grad_norm": 1.960736632347107, "learning_rate": 3.91686978832585e-06, "loss": 0.843, "step": 20990 }, { "epoch": 0.2558712051966412, "grad_norm": 1.8102617263793945, "learning_rate": 3.916549069916614e-06, "loss": 0.8275, "step": 20995 }, { "epoch": 0.255932141420789, "grad_norm": 1.778204083442688, "learning_rate": 3.916228351507377e-06, "loss": 0.9084, "step": 21000 }, { "epoch": 0.2559930776449368, "grad_norm": 1.7755656242370605, "learning_rate": 3.91590763309814e-06, "loss": 0.8001, "step": 21005 }, { "epoch": 0.2560540138690846, "grad_norm": 2.3659493923187256, "learning_rate": 3.915586914688904e-06, "loss": 0.922, "step": 21010 }, { "epoch": 0.25611495009323243, "grad_norm": 1.9635003805160522, "learning_rate": 3.915266196279667e-06, "loss": 0.8766, "step": 21015 }, { "epoch": 0.25617588631738025, "grad_norm": 1.6769102811813354, "learning_rate": 3.91494547787043e-06, "loss": 0.8717, "step": 21020 }, { "epoch": 0.256236822541528, "grad_norm": 2.445038318634033, "learning_rate": 3.914624759461194e-06, "loss": 0.895, "step": 21025 }, { "epoch": 0.25629775876567584, "grad_norm": 2.111062526702881, "learning_rate": 3.914304041051957e-06, "loss": 0.9304, "step": 21030 }, { "epoch": 0.25635869498982367, "grad_norm": 2.230501413345337, "learning_rate": 3.91398332264272e-06, "loss": 0.7943, "step": 21035 }, { "epoch": 0.25641963121397143, "grad_norm": 2.168729305267334, "learning_rate": 3.9136626042334836e-06, "loss": 0.8653, "step": 21040 }, { "epoch": 0.25648056743811926, "grad_norm": 1.587895393371582, "learning_rate": 3.9133418858242466e-06, "loss": 0.8345, "step": 21045 }, { "epoch": 0.2565415036622671, "grad_norm": 1.6537736654281616, "learning_rate": 3.91302116741501e-06, "loss": 0.9076, "step": 21050 }, { "epoch": 0.2566024398864149, "grad_norm": 1.6722530126571655, "learning_rate": 3.9127004490057735e-06, "loss": 0.857, "step": 21055 }, { "epoch": 0.25666337611056267, "grad_norm": 2.067148208618164, "learning_rate": 3.9123797305965365e-06, "loss": 0.8093, "step": 21060 }, { "epoch": 0.2567243123347105, "grad_norm": 1.683427333831787, "learning_rate": 3.9120590121872995e-06, "loss": 0.8629, "step": 21065 }, { "epoch": 0.2567852485588583, "grad_norm": 1.889826774597168, "learning_rate": 3.9117382937780625e-06, "loss": 0.8206, "step": 21070 }, { "epoch": 0.2568461847830061, "grad_norm": 1.849860429763794, "learning_rate": 3.911417575368826e-06, "loss": 0.834, "step": 21075 }, { "epoch": 0.2569071210071539, "grad_norm": 2.1154816150665283, "learning_rate": 3.911096856959589e-06, "loss": 0.8578, "step": 21080 }, { "epoch": 0.2569680572313017, "grad_norm": 1.9525275230407715, "learning_rate": 3.910776138550353e-06, "loss": 0.8866, "step": 21085 }, { "epoch": 0.25702899345544955, "grad_norm": 1.8730067014694214, "learning_rate": 3.910455420141116e-06, "loss": 0.8652, "step": 21090 }, { "epoch": 0.2570899296795973, "grad_norm": 1.8260563611984253, "learning_rate": 3.910134701731879e-06, "loss": 0.8296, "step": 21095 }, { "epoch": 0.25715086590374514, "grad_norm": 1.8329808712005615, "learning_rate": 3.909813983322643e-06, "loss": 0.7908, "step": 21100 }, { "epoch": 0.25721180212789296, "grad_norm": 2.2309682369232178, "learning_rate": 3.909493264913406e-06, "loss": 0.9044, "step": 21105 }, { "epoch": 0.25727273835204073, "grad_norm": 1.7582513093948364, "learning_rate": 3.90917254650417e-06, "loss": 0.923, "step": 21110 }, { "epoch": 0.25733367457618855, "grad_norm": 2.1615355014801025, "learning_rate": 3.908851828094933e-06, "loss": 0.8745, "step": 21115 }, { "epoch": 0.2573946108003364, "grad_norm": 1.867397427558899, "learning_rate": 3.908531109685697e-06, "loss": 0.9268, "step": 21120 }, { "epoch": 0.2574555470244842, "grad_norm": 2.1276307106018066, "learning_rate": 3.90821039127646e-06, "loss": 0.9137, "step": 21125 }, { "epoch": 0.25751648324863197, "grad_norm": 2.063835859298706, "learning_rate": 3.907889672867223e-06, "loss": 0.9256, "step": 21130 }, { "epoch": 0.2575774194727798, "grad_norm": 2.084491014480591, "learning_rate": 3.907568954457987e-06, "loss": 0.9003, "step": 21135 }, { "epoch": 0.2576383556969276, "grad_norm": 1.9558720588684082, "learning_rate": 3.90724823604875e-06, "loss": 0.9033, "step": 21140 }, { "epoch": 0.2576992919210754, "grad_norm": 2.0785417556762695, "learning_rate": 3.906927517639513e-06, "loss": 0.9111, "step": 21145 }, { "epoch": 0.2577602281452232, "grad_norm": 1.9871392250061035, "learning_rate": 3.906606799230277e-06, "loss": 0.9359, "step": 21150 }, { "epoch": 0.257821164369371, "grad_norm": 2.2449254989624023, "learning_rate": 3.90628608082104e-06, "loss": 0.9562, "step": 21155 }, { "epoch": 0.25788210059351885, "grad_norm": 1.9294898509979248, "learning_rate": 3.905965362411803e-06, "loss": 0.8893, "step": 21160 }, { "epoch": 0.2579430368176666, "grad_norm": 1.9583182334899902, "learning_rate": 3.905644644002566e-06, "loss": 0.9395, "step": 21165 }, { "epoch": 0.25800397304181444, "grad_norm": 2.251049041748047, "learning_rate": 3.90532392559333e-06, "loss": 0.88, "step": 21170 }, { "epoch": 0.25806490926596226, "grad_norm": 1.8965610265731812, "learning_rate": 3.905003207184093e-06, "loss": 0.7851, "step": 21175 }, { "epoch": 0.25812584549011003, "grad_norm": 1.657281756401062, "learning_rate": 3.904682488774856e-06, "loss": 0.9091, "step": 21180 }, { "epoch": 0.25818678171425785, "grad_norm": 1.9233561754226685, "learning_rate": 3.9043617703656195e-06, "loss": 0.9064, "step": 21185 }, { "epoch": 0.2582477179384057, "grad_norm": 2.012723445892334, "learning_rate": 3.9040410519563825e-06, "loss": 0.8813, "step": 21190 }, { "epoch": 0.2583086541625535, "grad_norm": 2.209912061691284, "learning_rate": 3.9037203335471455e-06, "loss": 0.8699, "step": 21195 }, { "epoch": 0.25836959038670126, "grad_norm": 2.40022873878479, "learning_rate": 3.903399615137909e-06, "loss": 0.8742, "step": 21200 }, { "epoch": 0.2584305266108491, "grad_norm": 1.704058051109314, "learning_rate": 3.9030788967286724e-06, "loss": 0.8978, "step": 21205 }, { "epoch": 0.2584914628349969, "grad_norm": 2.2753517627716064, "learning_rate": 3.9027581783194354e-06, "loss": 0.8745, "step": 21210 }, { "epoch": 0.2585523990591447, "grad_norm": 2.1532886028289795, "learning_rate": 3.902437459910199e-06, "loss": 0.8307, "step": 21215 }, { "epoch": 0.2586133352832925, "grad_norm": 2.033512592315674, "learning_rate": 3.902116741500962e-06, "loss": 0.8674, "step": 21220 }, { "epoch": 0.2586742715074403, "grad_norm": 1.8915815353393555, "learning_rate": 3.901796023091725e-06, "loss": 0.8645, "step": 21225 }, { "epoch": 0.25873520773158815, "grad_norm": 2.2739837169647217, "learning_rate": 3.901475304682489e-06, "loss": 0.8444, "step": 21230 }, { "epoch": 0.2587961439557359, "grad_norm": 2.33573579788208, "learning_rate": 3.901154586273252e-06, "loss": 0.8862, "step": 21235 }, { "epoch": 0.25885708017988374, "grad_norm": 1.7396321296691895, "learning_rate": 3.900833867864015e-06, "loss": 0.9027, "step": 21240 }, { "epoch": 0.25891801640403156, "grad_norm": 2.2790679931640625, "learning_rate": 3.900513149454779e-06, "loss": 0.8079, "step": 21245 }, { "epoch": 0.2589789526281793, "grad_norm": 2.02512264251709, "learning_rate": 3.900192431045542e-06, "loss": 0.8449, "step": 21250 }, { "epoch": 0.25903988885232715, "grad_norm": 1.8098044395446777, "learning_rate": 3.899871712636306e-06, "loss": 0.8533, "step": 21255 }, { "epoch": 0.25910082507647497, "grad_norm": 1.923590064048767, "learning_rate": 3.899550994227069e-06, "loss": 0.7984, "step": 21260 }, { "epoch": 0.2591617613006228, "grad_norm": 1.9061917066574097, "learning_rate": 3.899230275817832e-06, "loss": 0.9507, "step": 21265 }, { "epoch": 0.25922269752477056, "grad_norm": 2.0347824096679688, "learning_rate": 3.898909557408596e-06, "loss": 0.8771, "step": 21270 }, { "epoch": 0.2592836337489184, "grad_norm": 1.6888498067855835, "learning_rate": 3.898588838999359e-06, "loss": 0.8142, "step": 21275 }, { "epoch": 0.2593445699730662, "grad_norm": 1.7078198194503784, "learning_rate": 3.898268120590123e-06, "loss": 0.8563, "step": 21280 }, { "epoch": 0.259405506197214, "grad_norm": 2.2017157077789307, "learning_rate": 3.897947402180886e-06, "loss": 0.8397, "step": 21285 }, { "epoch": 0.2594664424213618, "grad_norm": 1.8491880893707275, "learning_rate": 3.897626683771649e-06, "loss": 0.9471, "step": 21290 }, { "epoch": 0.2595273786455096, "grad_norm": 1.8883403539657593, "learning_rate": 3.897305965362413e-06, "loss": 0.885, "step": 21295 }, { "epoch": 0.25958831486965744, "grad_norm": 2.4467740058898926, "learning_rate": 3.896985246953176e-06, "loss": 0.8876, "step": 21300 }, { "epoch": 0.2596492510938052, "grad_norm": 2.135070323944092, "learning_rate": 3.896664528543939e-06, "loss": 0.8118, "step": 21305 }, { "epoch": 0.25971018731795303, "grad_norm": 1.7123193740844727, "learning_rate": 3.8963438101347026e-06, "loss": 0.8704, "step": 21310 }, { "epoch": 0.25977112354210086, "grad_norm": 1.8945038318634033, "learning_rate": 3.8960230917254656e-06, "loss": 0.9156, "step": 21315 }, { "epoch": 0.2598320597662486, "grad_norm": 2.249152660369873, "learning_rate": 3.895702373316229e-06, "loss": 0.8375, "step": 21320 }, { "epoch": 0.25989299599039645, "grad_norm": 1.6873289346694946, "learning_rate": 3.895381654906992e-06, "loss": 0.8982, "step": 21325 }, { "epoch": 0.25995393221454427, "grad_norm": 2.211146116256714, "learning_rate": 3.8950609364977555e-06, "loss": 0.9011, "step": 21330 }, { "epoch": 0.26001486843869204, "grad_norm": 1.9952328205108643, "learning_rate": 3.8947402180885185e-06, "loss": 0.8858, "step": 21335 }, { "epoch": 0.26007580466283986, "grad_norm": 2.1196844577789307, "learning_rate": 3.8944194996792815e-06, "loss": 0.8889, "step": 21340 }, { "epoch": 0.2601367408869877, "grad_norm": 1.9229682683944702, "learning_rate": 3.894098781270045e-06, "loss": 0.7741, "step": 21345 }, { "epoch": 0.2601976771111355, "grad_norm": 1.8987566232681274, "learning_rate": 3.893778062860808e-06, "loss": 0.8488, "step": 21350 }, { "epoch": 0.2602586133352833, "grad_norm": 2.2233774662017822, "learning_rate": 3.893457344451571e-06, "loss": 0.7954, "step": 21355 }, { "epoch": 0.2603195495594311, "grad_norm": 2.1130385398864746, "learning_rate": 3.893136626042335e-06, "loss": 0.8828, "step": 21360 }, { "epoch": 0.2603804857835789, "grad_norm": 2.0512545108795166, "learning_rate": 3.892815907633098e-06, "loss": 0.9189, "step": 21365 }, { "epoch": 0.2604414220077267, "grad_norm": 1.8615678548812866, "learning_rate": 3.892495189223861e-06, "loss": 0.9237, "step": 21370 }, { "epoch": 0.2605023582318745, "grad_norm": 1.9149776697158813, "learning_rate": 3.892174470814625e-06, "loss": 0.8131, "step": 21375 }, { "epoch": 0.26056329445602233, "grad_norm": 1.9904519319534302, "learning_rate": 3.891853752405388e-06, "loss": 0.8631, "step": 21380 }, { "epoch": 0.26062423068017015, "grad_norm": 2.026465654373169, "learning_rate": 3.891533033996151e-06, "loss": 0.9162, "step": 21385 }, { "epoch": 0.2606851669043179, "grad_norm": 1.9900860786437988, "learning_rate": 3.891212315586915e-06, "loss": 0.856, "step": 21390 }, { "epoch": 0.26074610312846574, "grad_norm": 1.8200310468673706, "learning_rate": 3.890891597177678e-06, "loss": 0.8411, "step": 21395 }, { "epoch": 0.26080703935261357, "grad_norm": 1.877094030380249, "learning_rate": 3.890570878768442e-06, "loss": 0.836, "step": 21400 }, { "epoch": 0.26086797557676134, "grad_norm": 2.587644338607788, "learning_rate": 3.890250160359205e-06, "loss": 0.81, "step": 21405 }, { "epoch": 0.26092891180090916, "grad_norm": 1.8364558219909668, "learning_rate": 3.889929441949968e-06, "loss": 0.9136, "step": 21410 }, { "epoch": 0.260989848025057, "grad_norm": 1.8338322639465332, "learning_rate": 3.889608723540732e-06, "loss": 0.7562, "step": 21415 }, { "epoch": 0.2610507842492048, "grad_norm": 2.0349557399749756, "learning_rate": 3.889288005131495e-06, "loss": 0.8341, "step": 21420 }, { "epoch": 0.26111172047335257, "grad_norm": 1.7578171491622925, "learning_rate": 3.888967286722259e-06, "loss": 0.8209, "step": 21425 }, { "epoch": 0.2611726566975004, "grad_norm": 1.9969027042388916, "learning_rate": 3.888646568313022e-06, "loss": 0.9216, "step": 21430 }, { "epoch": 0.2612335929216482, "grad_norm": 1.669916033744812, "learning_rate": 3.888325849903785e-06, "loss": 0.8838, "step": 21435 }, { "epoch": 0.261294529145796, "grad_norm": 2.1526758670806885, "learning_rate": 3.888005131494549e-06, "loss": 0.8387, "step": 21440 }, { "epoch": 0.2613554653699438, "grad_norm": 1.6900007724761963, "learning_rate": 3.887684413085312e-06, "loss": 0.8515, "step": 21445 }, { "epoch": 0.26141640159409163, "grad_norm": 2.155487060546875, "learning_rate": 3.887363694676075e-06, "loss": 0.8877, "step": 21450 }, { "epoch": 0.26147733781823945, "grad_norm": 2.0793685913085938, "learning_rate": 3.8870429762668385e-06, "loss": 0.8896, "step": 21455 }, { "epoch": 0.2615382740423872, "grad_norm": 2.0380747318267822, "learning_rate": 3.8867222578576015e-06, "loss": 0.8567, "step": 21460 }, { "epoch": 0.26159921026653504, "grad_norm": 1.8893247842788696, "learning_rate": 3.8864015394483645e-06, "loss": 0.8784, "step": 21465 }, { "epoch": 0.26166014649068287, "grad_norm": 1.9306071996688843, "learning_rate": 3.886080821039128e-06, "loss": 0.8636, "step": 21470 }, { "epoch": 0.26172108271483063, "grad_norm": 1.9935123920440674, "learning_rate": 3.8857601026298914e-06, "loss": 0.8976, "step": 21475 }, { "epoch": 0.26178201893897846, "grad_norm": 1.6507927179336548, "learning_rate": 3.8854393842206544e-06, "loss": 0.8568, "step": 21480 }, { "epoch": 0.2618429551631263, "grad_norm": 2.0356454849243164, "learning_rate": 3.885118665811418e-06, "loss": 0.8994, "step": 21485 }, { "epoch": 0.2619038913872741, "grad_norm": 1.7870389223098755, "learning_rate": 3.884797947402181e-06, "loss": 0.8112, "step": 21490 }, { "epoch": 0.26196482761142187, "grad_norm": 1.762774109840393, "learning_rate": 3.884477228992944e-06, "loss": 0.8729, "step": 21495 }, { "epoch": 0.2620257638355697, "grad_norm": 1.9446309804916382, "learning_rate": 3.884156510583707e-06, "loss": 0.8004, "step": 21500 }, { "epoch": 0.2620867000597175, "grad_norm": 2.325838565826416, "learning_rate": 3.883835792174471e-06, "loss": 0.8693, "step": 21505 }, { "epoch": 0.2621476362838653, "grad_norm": 2.4621715545654297, "learning_rate": 3.883515073765234e-06, "loss": 0.9133, "step": 21510 }, { "epoch": 0.2622085725080131, "grad_norm": 2.205626964569092, "learning_rate": 3.883194355355997e-06, "loss": 0.9738, "step": 21515 }, { "epoch": 0.2622695087321609, "grad_norm": 1.9814026355743408, "learning_rate": 3.882873636946761e-06, "loss": 0.8802, "step": 21520 }, { "epoch": 0.26233044495630875, "grad_norm": 2.052194356918335, "learning_rate": 3.882552918537524e-06, "loss": 0.8749, "step": 21525 }, { "epoch": 0.2623913811804565, "grad_norm": 1.8822218179702759, "learning_rate": 3.882232200128287e-06, "loss": 0.9183, "step": 21530 }, { "epoch": 0.26245231740460434, "grad_norm": 2.0328242778778076, "learning_rate": 3.881911481719051e-06, "loss": 0.8098, "step": 21535 }, { "epoch": 0.26251325362875216, "grad_norm": 1.8320322036743164, "learning_rate": 3.881590763309814e-06, "loss": 0.919, "step": 21540 }, { "epoch": 0.26257418985289993, "grad_norm": 1.7933158874511719, "learning_rate": 3.881270044900577e-06, "loss": 0.8993, "step": 21545 }, { "epoch": 0.26263512607704775, "grad_norm": 2.079599380493164, "learning_rate": 3.880949326491341e-06, "loss": 0.853, "step": 21550 }, { "epoch": 0.2626960623011956, "grad_norm": 1.8922560214996338, "learning_rate": 3.880628608082104e-06, "loss": 0.8293, "step": 21555 }, { "epoch": 0.2627569985253434, "grad_norm": 1.8624335527420044, "learning_rate": 3.880307889672868e-06, "loss": 0.9026, "step": 21560 }, { "epoch": 0.26281793474949117, "grad_norm": 2.0446252822875977, "learning_rate": 3.879987171263631e-06, "loss": 0.9157, "step": 21565 }, { "epoch": 0.262878870973639, "grad_norm": 1.9129186868667603, "learning_rate": 3.879666452854394e-06, "loss": 0.8633, "step": 21570 }, { "epoch": 0.2629398071977868, "grad_norm": 2.009758710861206, "learning_rate": 3.879345734445158e-06, "loss": 0.8381, "step": 21575 }, { "epoch": 0.2630007434219346, "grad_norm": 2.1279232501983643, "learning_rate": 3.879025016035921e-06, "loss": 0.8946, "step": 21580 }, { "epoch": 0.2630616796460824, "grad_norm": 2.2210917472839355, "learning_rate": 3.8787042976266846e-06, "loss": 0.9247, "step": 21585 }, { "epoch": 0.2631226158702302, "grad_norm": 1.9176843166351318, "learning_rate": 3.878383579217448e-06, "loss": 0.8609, "step": 21590 }, { "epoch": 0.26318355209437805, "grad_norm": 1.7854703664779663, "learning_rate": 3.878062860808211e-06, "loss": 0.8687, "step": 21595 }, { "epoch": 0.2632444883185258, "grad_norm": 1.8914529085159302, "learning_rate": 3.8777421423989745e-06, "loss": 0.9203, "step": 21600 }, { "epoch": 0.26330542454267364, "grad_norm": 1.9399681091308594, "learning_rate": 3.8774214239897375e-06, "loss": 0.8805, "step": 21605 }, { "epoch": 0.26336636076682146, "grad_norm": 1.757857084274292, "learning_rate": 3.8771007055805005e-06, "loss": 0.8444, "step": 21610 }, { "epoch": 0.26342729699096923, "grad_norm": 2.1531760692596436, "learning_rate": 3.876779987171264e-06, "loss": 0.8516, "step": 21615 }, { "epoch": 0.26348823321511705, "grad_norm": 2.5987026691436768, "learning_rate": 3.876459268762027e-06, "loss": 0.8349, "step": 21620 }, { "epoch": 0.2635491694392649, "grad_norm": 1.8957566022872925, "learning_rate": 3.87613855035279e-06, "loss": 0.8347, "step": 21625 }, { "epoch": 0.2636101056634127, "grad_norm": 2.4689605236053467, "learning_rate": 3.875817831943554e-06, "loss": 0.9412, "step": 21630 }, { "epoch": 0.26367104188756046, "grad_norm": 1.8627738952636719, "learning_rate": 3.875497113534317e-06, "loss": 0.8578, "step": 21635 }, { "epoch": 0.2637319781117083, "grad_norm": 1.7937042713165283, "learning_rate": 3.87517639512508e-06, "loss": 0.8421, "step": 21640 }, { "epoch": 0.2637929143358561, "grad_norm": 1.9655030965805054, "learning_rate": 3.874855676715844e-06, "loss": 0.8385, "step": 21645 }, { "epoch": 0.2638538505600039, "grad_norm": 1.7849364280700684, "learning_rate": 3.874534958306607e-06, "loss": 0.8497, "step": 21650 }, { "epoch": 0.2639147867841517, "grad_norm": 1.9261302947998047, "learning_rate": 3.87421423989737e-06, "loss": 0.8018, "step": 21655 }, { "epoch": 0.2639757230082995, "grad_norm": 1.7229257822036743, "learning_rate": 3.873893521488133e-06, "loss": 0.8673, "step": 21660 }, { "epoch": 0.26403665923244735, "grad_norm": 2.1318559646606445, "learning_rate": 3.873572803078897e-06, "loss": 0.8756, "step": 21665 }, { "epoch": 0.2640975954565951, "grad_norm": 1.7405762672424316, "learning_rate": 3.87325208466966e-06, "loss": 0.8603, "step": 21670 }, { "epoch": 0.26415853168074294, "grad_norm": 1.7770599126815796, "learning_rate": 3.872931366260423e-06, "loss": 0.9007, "step": 21675 }, { "epoch": 0.26421946790489076, "grad_norm": 2.2142841815948486, "learning_rate": 3.872610647851187e-06, "loss": 0.8166, "step": 21680 }, { "epoch": 0.2642804041290385, "grad_norm": 1.7709887027740479, "learning_rate": 3.87228992944195e-06, "loss": 0.8606, "step": 21685 }, { "epoch": 0.26434134035318635, "grad_norm": 1.9215997457504272, "learning_rate": 3.871969211032713e-06, "loss": 0.9377, "step": 21690 }, { "epoch": 0.26440227657733417, "grad_norm": 1.8963285684585571, "learning_rate": 3.871648492623477e-06, "loss": 0.8916, "step": 21695 }, { "epoch": 0.264463212801482, "grad_norm": 1.9626768827438354, "learning_rate": 3.87132777421424e-06, "loss": 0.9165, "step": 21700 }, { "epoch": 0.26452414902562976, "grad_norm": 1.884667992591858, "learning_rate": 3.871007055805004e-06, "loss": 0.925, "step": 21705 }, { "epoch": 0.2645850852497776, "grad_norm": 1.9883867502212524, "learning_rate": 3.870686337395767e-06, "loss": 0.9103, "step": 21710 }, { "epoch": 0.2646460214739254, "grad_norm": 1.9019553661346436, "learning_rate": 3.87036561898653e-06, "loss": 0.8632, "step": 21715 }, { "epoch": 0.2647069576980732, "grad_norm": 1.9755383729934692, "learning_rate": 3.870044900577294e-06, "loss": 0.9032, "step": 21720 }, { "epoch": 0.264767893922221, "grad_norm": 1.9735978841781616, "learning_rate": 3.869724182168057e-06, "loss": 0.8679, "step": 21725 }, { "epoch": 0.2648288301463688, "grad_norm": 1.810849666595459, "learning_rate": 3.8694034637588205e-06, "loss": 0.9228, "step": 21730 }, { "epoch": 0.26488976637051664, "grad_norm": 2.0103766918182373, "learning_rate": 3.8690827453495835e-06, "loss": 0.8676, "step": 21735 }, { "epoch": 0.2649507025946644, "grad_norm": 2.145312547683716, "learning_rate": 3.8687620269403466e-06, "loss": 0.8353, "step": 21740 }, { "epoch": 0.26501163881881223, "grad_norm": 2.3339991569519043, "learning_rate": 3.8684413085311104e-06, "loss": 0.845, "step": 21745 }, { "epoch": 0.26507257504296006, "grad_norm": 2.1388368606567383, "learning_rate": 3.8681205901218734e-06, "loss": 0.8652, "step": 21750 }, { "epoch": 0.2651335112671078, "grad_norm": 1.7822812795639038, "learning_rate": 3.8677998717126365e-06, "loss": 0.8334, "step": 21755 }, { "epoch": 0.26519444749125565, "grad_norm": 1.8513801097869873, "learning_rate": 3.8674791533034e-06, "loss": 0.8846, "step": 21760 }, { "epoch": 0.26525538371540347, "grad_norm": 2.002500295639038, "learning_rate": 3.867158434894163e-06, "loss": 0.8605, "step": 21765 }, { "epoch": 0.2653163199395513, "grad_norm": 1.7014317512512207, "learning_rate": 3.866837716484926e-06, "loss": 0.8692, "step": 21770 }, { "epoch": 0.26537725616369906, "grad_norm": 1.858289122581482, "learning_rate": 3.86651699807569e-06, "loss": 0.9499, "step": 21775 }, { "epoch": 0.2654381923878469, "grad_norm": 1.9883606433868408, "learning_rate": 3.866196279666453e-06, "loss": 0.9139, "step": 21780 }, { "epoch": 0.2654991286119947, "grad_norm": 2.083705186843872, "learning_rate": 3.865875561257216e-06, "loss": 0.7804, "step": 21785 }, { "epoch": 0.2655600648361425, "grad_norm": 1.8344310522079468, "learning_rate": 3.86555484284798e-06, "loss": 0.9209, "step": 21790 }, { "epoch": 0.2656210010602903, "grad_norm": 2.150146245956421, "learning_rate": 3.865234124438743e-06, "loss": 0.9159, "step": 21795 }, { "epoch": 0.2656819372844381, "grad_norm": 1.695888876914978, "learning_rate": 3.864913406029506e-06, "loss": 0.8805, "step": 21800 }, { "epoch": 0.26574287350858594, "grad_norm": 1.9617631435394287, "learning_rate": 3.86459268762027e-06, "loss": 0.7914, "step": 21805 }, { "epoch": 0.2658038097327337, "grad_norm": 2.3735289573669434, "learning_rate": 3.864271969211033e-06, "loss": 0.9114, "step": 21810 }, { "epoch": 0.26586474595688153, "grad_norm": 1.6767334938049316, "learning_rate": 3.863951250801796e-06, "loss": 0.8403, "step": 21815 }, { "epoch": 0.26592568218102935, "grad_norm": 1.775179147720337, "learning_rate": 3.86363053239256e-06, "loss": 0.8446, "step": 21820 }, { "epoch": 0.2659866184051771, "grad_norm": 1.8998689651489258, "learning_rate": 3.863309813983323e-06, "loss": 0.9319, "step": 21825 }, { "epoch": 0.26604755462932494, "grad_norm": 2.0205698013305664, "learning_rate": 3.862989095574086e-06, "loss": 0.9037, "step": 21830 }, { "epoch": 0.26610849085347277, "grad_norm": 1.9879165887832642, "learning_rate": 3.862668377164849e-06, "loss": 0.798, "step": 21835 }, { "epoch": 0.26616942707762054, "grad_norm": 2.067525625228882, "learning_rate": 3.862347658755613e-06, "loss": 0.8812, "step": 21840 }, { "epoch": 0.26623036330176836, "grad_norm": 1.8637776374816895, "learning_rate": 3.862026940346376e-06, "loss": 0.8863, "step": 21845 }, { "epoch": 0.2662912995259162, "grad_norm": 2.094219207763672, "learning_rate": 3.86170622193714e-06, "loss": 0.8606, "step": 21850 }, { "epoch": 0.266352235750064, "grad_norm": 2.049889326095581, "learning_rate": 3.861385503527903e-06, "loss": 0.8122, "step": 21855 }, { "epoch": 0.26641317197421177, "grad_norm": 2.0363948345184326, "learning_rate": 3.861064785118666e-06, "loss": 0.8175, "step": 21860 }, { "epoch": 0.2664741081983596, "grad_norm": 1.9291001558303833, "learning_rate": 3.86074406670943e-06, "loss": 0.8498, "step": 21865 }, { "epoch": 0.2665350444225074, "grad_norm": 2.0498034954071045, "learning_rate": 3.860423348300193e-06, "loss": 0.881, "step": 21870 }, { "epoch": 0.2665959806466552, "grad_norm": 1.7941179275512695, "learning_rate": 3.8601026298909565e-06, "loss": 0.8567, "step": 21875 }, { "epoch": 0.266656916870803, "grad_norm": 2.0864505767822266, "learning_rate": 3.8597819114817195e-06, "loss": 0.8757, "step": 21880 }, { "epoch": 0.26671785309495083, "grad_norm": 1.879392147064209, "learning_rate": 3.8594611930724825e-06, "loss": 0.8305, "step": 21885 }, { "epoch": 0.26677878931909865, "grad_norm": 2.093634843826294, "learning_rate": 3.859140474663246e-06, "loss": 0.8326, "step": 21890 }, { "epoch": 0.2668397255432464, "grad_norm": 2.0486702919006348, "learning_rate": 3.858819756254009e-06, "loss": 0.838, "step": 21895 }, { "epoch": 0.26690066176739424, "grad_norm": 1.7325743436813354, "learning_rate": 3.858499037844773e-06, "loss": 0.8894, "step": 21900 }, { "epoch": 0.26696159799154207, "grad_norm": 1.9270912408828735, "learning_rate": 3.858178319435536e-06, "loss": 0.9542, "step": 21905 }, { "epoch": 0.26702253421568983, "grad_norm": 1.712429165840149, "learning_rate": 3.857857601026299e-06, "loss": 0.8519, "step": 21910 }, { "epoch": 0.26708347043983766, "grad_norm": 1.9162746667861938, "learning_rate": 3.857536882617062e-06, "loss": 0.8517, "step": 21915 }, { "epoch": 0.2671444066639855, "grad_norm": 1.9251925945281982, "learning_rate": 3.857216164207826e-06, "loss": 0.8915, "step": 21920 }, { "epoch": 0.2672053428881333, "grad_norm": 2.118469715118408, "learning_rate": 3.856895445798589e-06, "loss": 0.8772, "step": 21925 }, { "epoch": 0.26726627911228107, "grad_norm": 2.3717586994171143, "learning_rate": 3.856574727389352e-06, "loss": 0.9134, "step": 21930 }, { "epoch": 0.2673272153364289, "grad_norm": 2.289356231689453, "learning_rate": 3.856254008980116e-06, "loss": 0.9126, "step": 21935 }, { "epoch": 0.2673881515605767, "grad_norm": 1.85389244556427, "learning_rate": 3.855933290570879e-06, "loss": 0.9016, "step": 21940 }, { "epoch": 0.2674490877847245, "grad_norm": 1.87034273147583, "learning_rate": 3.855612572161642e-06, "loss": 0.9011, "step": 21945 }, { "epoch": 0.2675100240088723, "grad_norm": 2.1781005859375, "learning_rate": 3.855291853752406e-06, "loss": 0.8422, "step": 21950 }, { "epoch": 0.2675709602330201, "grad_norm": 2.024500846862793, "learning_rate": 3.854971135343169e-06, "loss": 0.8403, "step": 21955 }, { "epoch": 0.26763189645716795, "grad_norm": 2.357922077178955, "learning_rate": 3.854650416933932e-06, "loss": 0.9124, "step": 21960 }, { "epoch": 0.2676928326813157, "grad_norm": 1.7265815734863281, "learning_rate": 3.854329698524696e-06, "loss": 0.8881, "step": 21965 }, { "epoch": 0.26775376890546354, "grad_norm": 2.0321168899536133, "learning_rate": 3.854008980115459e-06, "loss": 0.8535, "step": 21970 }, { "epoch": 0.26781470512961136, "grad_norm": 2.25636887550354, "learning_rate": 3.853688261706222e-06, "loss": 0.9463, "step": 21975 }, { "epoch": 0.26787564135375913, "grad_norm": 2.1062920093536377, "learning_rate": 3.853367543296986e-06, "loss": 0.9094, "step": 21980 }, { "epoch": 0.26793657757790695, "grad_norm": 2.031242609024048, "learning_rate": 3.853046824887749e-06, "loss": 0.8559, "step": 21985 }, { "epoch": 0.2679975138020548, "grad_norm": 1.9634546041488647, "learning_rate": 3.852726106478512e-06, "loss": 0.8288, "step": 21990 }, { "epoch": 0.2680584500262026, "grad_norm": 2.0393619537353516, "learning_rate": 3.852405388069275e-06, "loss": 0.9137, "step": 21995 }, { "epoch": 0.26811938625035037, "grad_norm": 2.1189939975738525, "learning_rate": 3.852084669660039e-06, "loss": 0.8206, "step": 22000 }, { "epoch": 0.2681803224744982, "grad_norm": 1.6934716701507568, "learning_rate": 3.851763951250802e-06, "loss": 0.7948, "step": 22005 }, { "epoch": 0.268241258698646, "grad_norm": 2.106862783432007, "learning_rate": 3.8514432328415656e-06, "loss": 0.8795, "step": 22010 }, { "epoch": 0.2683021949227938, "grad_norm": 1.9650733470916748, "learning_rate": 3.851122514432329e-06, "loss": 0.7959, "step": 22015 }, { "epoch": 0.2683631311469416, "grad_norm": 1.907854676246643, "learning_rate": 3.850801796023092e-06, "loss": 0.8827, "step": 22020 }, { "epoch": 0.2684240673710894, "grad_norm": 1.915981411933899, "learning_rate": 3.8504810776138555e-06, "loss": 0.924, "step": 22025 }, { "epoch": 0.26848500359523725, "grad_norm": 2.2862448692321777, "learning_rate": 3.8501603592046185e-06, "loss": 0.9027, "step": 22030 }, { "epoch": 0.268545939819385, "grad_norm": 1.9609942436218262, "learning_rate": 3.849839640795382e-06, "loss": 0.8183, "step": 22035 }, { "epoch": 0.26860687604353284, "grad_norm": 1.8218393325805664, "learning_rate": 3.849518922386145e-06, "loss": 0.8702, "step": 22040 }, { "epoch": 0.26866781226768066, "grad_norm": 2.11124587059021, "learning_rate": 3.849198203976909e-06, "loss": 0.8041, "step": 22045 }, { "epoch": 0.26872874849182843, "grad_norm": 2.0060484409332275, "learning_rate": 3.848877485567672e-06, "loss": 0.876, "step": 22050 }, { "epoch": 0.26878968471597625, "grad_norm": 1.6668604612350464, "learning_rate": 3.848556767158435e-06, "loss": 0.8207, "step": 22055 }, { "epoch": 0.2688506209401241, "grad_norm": 1.8242253065109253, "learning_rate": 3.848236048749199e-06, "loss": 0.8699, "step": 22060 }, { "epoch": 0.2689115571642719, "grad_norm": 1.8681355714797974, "learning_rate": 3.847915330339962e-06, "loss": 0.8639, "step": 22065 }, { "epoch": 0.26897249338841966, "grad_norm": 1.668951153755188, "learning_rate": 3.847594611930725e-06, "loss": 0.7904, "step": 22070 }, { "epoch": 0.2690334296125675, "grad_norm": 1.698498010635376, "learning_rate": 3.847273893521489e-06, "loss": 0.8429, "step": 22075 }, { "epoch": 0.2690943658367153, "grad_norm": 2.1251001358032227, "learning_rate": 3.846953175112252e-06, "loss": 0.8598, "step": 22080 }, { "epoch": 0.2691553020608631, "grad_norm": 1.5960345268249512, "learning_rate": 3.846632456703015e-06, "loss": 0.8104, "step": 22085 }, { "epoch": 0.2692162382850109, "grad_norm": 1.7291756868362427, "learning_rate": 3.846311738293778e-06, "loss": 0.9044, "step": 22090 }, { "epoch": 0.2692771745091587, "grad_norm": 1.8196959495544434, "learning_rate": 3.845991019884542e-06, "loss": 0.8618, "step": 22095 }, { "epoch": 0.26933811073330655, "grad_norm": 1.7712409496307373, "learning_rate": 3.845670301475305e-06, "loss": 0.8063, "step": 22100 }, { "epoch": 0.2693990469574543, "grad_norm": 1.9264039993286133, "learning_rate": 3.845349583066068e-06, "loss": 0.8375, "step": 22105 }, { "epoch": 0.26945998318160214, "grad_norm": 2.2063450813293457, "learning_rate": 3.845028864656832e-06, "loss": 0.8827, "step": 22110 }, { "epoch": 0.26952091940574996, "grad_norm": 1.9611932039260864, "learning_rate": 3.844708146247595e-06, "loss": 0.8476, "step": 22115 }, { "epoch": 0.2695818556298977, "grad_norm": 1.9787449836730957, "learning_rate": 3.844387427838358e-06, "loss": 0.8093, "step": 22120 }, { "epoch": 0.26964279185404555, "grad_norm": 1.9659961462020874, "learning_rate": 3.844066709429122e-06, "loss": 0.9122, "step": 22125 }, { "epoch": 0.26970372807819337, "grad_norm": 2.017467737197876, "learning_rate": 3.843745991019885e-06, "loss": 0.8514, "step": 22130 }, { "epoch": 0.2697646643023412, "grad_norm": 2.0849859714508057, "learning_rate": 3.843425272610648e-06, "loss": 0.8289, "step": 22135 }, { "epoch": 0.26982560052648896, "grad_norm": 2.2485475540161133, "learning_rate": 3.843104554201412e-06, "loss": 0.8796, "step": 22140 }, { "epoch": 0.2698865367506368, "grad_norm": 2.116715908050537, "learning_rate": 3.842783835792175e-06, "loss": 0.8744, "step": 22145 }, { "epoch": 0.2699474729747846, "grad_norm": 1.817821979522705, "learning_rate": 3.842463117382938e-06, "loss": 0.848, "step": 22150 }, { "epoch": 0.2700084091989324, "grad_norm": 1.8497344255447388, "learning_rate": 3.8421423989737015e-06, "loss": 0.803, "step": 22155 }, { "epoch": 0.2700693454230802, "grad_norm": 2.0015482902526855, "learning_rate": 3.8418216805644645e-06, "loss": 0.8716, "step": 22160 }, { "epoch": 0.270130281647228, "grad_norm": 1.7527387142181396, "learning_rate": 3.8415009621552276e-06, "loss": 0.8604, "step": 22165 }, { "epoch": 0.27019121787137584, "grad_norm": 1.8279545307159424, "learning_rate": 3.841180243745991e-06, "loss": 0.8779, "step": 22170 }, { "epoch": 0.2702521540955236, "grad_norm": 1.9027413129806519, "learning_rate": 3.8408595253367544e-06, "loss": 0.8608, "step": 22175 }, { "epoch": 0.27031309031967143, "grad_norm": 2.4193613529205322, "learning_rate": 3.840538806927518e-06, "loss": 0.8668, "step": 22180 }, { "epoch": 0.27037402654381926, "grad_norm": 2.2146174907684326, "learning_rate": 3.840218088518281e-06, "loss": 0.8782, "step": 22185 }, { "epoch": 0.270434962767967, "grad_norm": 1.9527218341827393, "learning_rate": 3.839897370109044e-06, "loss": 0.8989, "step": 22190 }, { "epoch": 0.27049589899211485, "grad_norm": 2.0400609970092773, "learning_rate": 3.839576651699808e-06, "loss": 0.788, "step": 22195 }, { "epoch": 0.27055683521626267, "grad_norm": 2.1560232639312744, "learning_rate": 3.839255933290571e-06, "loss": 0.8689, "step": 22200 }, { "epoch": 0.2706177714404105, "grad_norm": 1.8191126585006714, "learning_rate": 3.838935214881335e-06, "loss": 0.8048, "step": 22205 }, { "epoch": 0.27067870766455826, "grad_norm": 1.9670120477676392, "learning_rate": 3.838614496472098e-06, "loss": 0.8567, "step": 22210 }, { "epoch": 0.2707396438887061, "grad_norm": 1.6993162631988525, "learning_rate": 3.838293778062861e-06, "loss": 0.7997, "step": 22215 }, { "epoch": 0.2708005801128539, "grad_norm": 1.8454608917236328, "learning_rate": 3.837973059653625e-06, "loss": 0.8992, "step": 22220 }, { "epoch": 0.2708615163370017, "grad_norm": 2.4205636978149414, "learning_rate": 3.837652341244388e-06, "loss": 1.0104, "step": 22225 }, { "epoch": 0.2709224525611495, "grad_norm": 2.1932857036590576, "learning_rate": 3.837331622835151e-06, "loss": 0.9258, "step": 22230 }, { "epoch": 0.2709833887852973, "grad_norm": 1.7426222562789917, "learning_rate": 3.837010904425915e-06, "loss": 0.8836, "step": 22235 }, { "epoch": 0.27104432500944514, "grad_norm": 2.161935567855835, "learning_rate": 3.836690186016678e-06, "loss": 0.942, "step": 22240 }, { "epoch": 0.2711052612335929, "grad_norm": 1.8241795301437378, "learning_rate": 3.836369467607441e-06, "loss": 0.8206, "step": 22245 }, { "epoch": 0.27116619745774073, "grad_norm": 2.019594669342041, "learning_rate": 3.836048749198204e-06, "loss": 0.8755, "step": 22250 }, { "epoch": 0.27122713368188855, "grad_norm": 1.991451621055603, "learning_rate": 3.835728030788968e-06, "loss": 0.884, "step": 22255 }, { "epoch": 0.2712880699060363, "grad_norm": 1.7427852153778076, "learning_rate": 3.835407312379731e-06, "loss": 0.8388, "step": 22260 }, { "epoch": 0.27134900613018414, "grad_norm": 1.8612496852874756, "learning_rate": 3.835086593970494e-06, "loss": 0.8572, "step": 22265 }, { "epoch": 0.27140994235433197, "grad_norm": 1.8782193660736084, "learning_rate": 3.834765875561258e-06, "loss": 0.8529, "step": 22270 }, { "epoch": 0.2714708785784798, "grad_norm": 2.170367479324341, "learning_rate": 3.834445157152021e-06, "loss": 0.8635, "step": 22275 }, { "epoch": 0.27153181480262756, "grad_norm": 2.0207319259643555, "learning_rate": 3.834124438742784e-06, "loss": 0.864, "step": 22280 }, { "epoch": 0.2715927510267754, "grad_norm": 2.219172477722168, "learning_rate": 3.833803720333548e-06, "loss": 0.8913, "step": 22285 }, { "epoch": 0.2716536872509232, "grad_norm": 1.966477870941162, "learning_rate": 3.833483001924311e-06, "loss": 0.8394, "step": 22290 }, { "epoch": 0.27171462347507097, "grad_norm": 2.1902105808258057, "learning_rate": 3.833162283515074e-06, "loss": 0.8381, "step": 22295 }, { "epoch": 0.2717755596992188, "grad_norm": 1.8412851095199585, "learning_rate": 3.8328415651058375e-06, "loss": 0.8495, "step": 22300 }, { "epoch": 0.2718364959233666, "grad_norm": 1.8321341276168823, "learning_rate": 3.8325208466966005e-06, "loss": 0.8116, "step": 22305 }, { "epoch": 0.2718974321475144, "grad_norm": 2.3801839351654053, "learning_rate": 3.8322001282873635e-06, "loss": 0.8278, "step": 22310 }, { "epoch": 0.2719583683716622, "grad_norm": 1.7191678285598755, "learning_rate": 3.831879409878127e-06, "loss": 0.915, "step": 22315 }, { "epoch": 0.27201930459581003, "grad_norm": 2.157909631729126, "learning_rate": 3.83155869146889e-06, "loss": 0.8901, "step": 22320 }, { "epoch": 0.27208024081995785, "grad_norm": 1.7030202150344849, "learning_rate": 3.831237973059654e-06, "loss": 0.8931, "step": 22325 }, { "epoch": 0.2721411770441056, "grad_norm": 2.018017530441284, "learning_rate": 3.830917254650417e-06, "loss": 0.9012, "step": 22330 }, { "epoch": 0.27220211326825344, "grad_norm": 1.922491431236267, "learning_rate": 3.83059653624118e-06, "loss": 0.8212, "step": 22335 }, { "epoch": 0.27226304949240127, "grad_norm": 2.189913272857666, "learning_rate": 3.830275817831944e-06, "loss": 0.9032, "step": 22340 }, { "epoch": 0.27232398571654903, "grad_norm": 2.082996368408203, "learning_rate": 3.829955099422707e-06, "loss": 0.8537, "step": 22345 }, { "epoch": 0.27238492194069686, "grad_norm": 2.3750171661376953, "learning_rate": 3.829634381013471e-06, "loss": 0.8536, "step": 22350 }, { "epoch": 0.2724458581648447, "grad_norm": 2.299992799758911, "learning_rate": 3.829313662604234e-06, "loss": 0.8492, "step": 22355 }, { "epoch": 0.2725067943889925, "grad_norm": 2.157451629638672, "learning_rate": 3.828992944194997e-06, "loss": 0.891, "step": 22360 }, { "epoch": 0.27256773061314027, "grad_norm": 1.9470046758651733, "learning_rate": 3.828672225785761e-06, "loss": 0.8292, "step": 22365 }, { "epoch": 0.2726286668372881, "grad_norm": 1.8840008974075317, "learning_rate": 3.828351507376524e-06, "loss": 0.8434, "step": 22370 }, { "epoch": 0.2726896030614359, "grad_norm": 2.073254108428955, "learning_rate": 3.828030788967287e-06, "loss": 0.8836, "step": 22375 }, { "epoch": 0.2727505392855837, "grad_norm": 1.9197006225585938, "learning_rate": 3.827710070558051e-06, "loss": 0.8243, "step": 22380 }, { "epoch": 0.2728114755097315, "grad_norm": 1.9855045080184937, "learning_rate": 3.827389352148814e-06, "loss": 0.8335, "step": 22385 }, { "epoch": 0.2728724117338793, "grad_norm": 1.702256441116333, "learning_rate": 3.827068633739577e-06, "loss": 0.9125, "step": 22390 }, { "epoch": 0.27293334795802715, "grad_norm": 2.1235649585723877, "learning_rate": 3.826747915330341e-06, "loss": 0.92, "step": 22395 }, { "epoch": 0.2729942841821749, "grad_norm": 2.125946283340454, "learning_rate": 3.826427196921104e-06, "loss": 0.8113, "step": 22400 }, { "epoch": 0.27305522040632274, "grad_norm": 1.8341752290725708, "learning_rate": 3.826106478511867e-06, "loss": 0.8883, "step": 22405 }, { "epoch": 0.27311615663047056, "grad_norm": 1.7587261199951172, "learning_rate": 3.825785760102631e-06, "loss": 0.8611, "step": 22410 }, { "epoch": 0.27317709285461833, "grad_norm": 2.023360252380371, "learning_rate": 3.825465041693394e-06, "loss": 0.8462, "step": 22415 }, { "epoch": 0.27323802907876615, "grad_norm": 2.1534054279327393, "learning_rate": 3.825144323284157e-06, "loss": 0.9164, "step": 22420 }, { "epoch": 0.273298965302914, "grad_norm": 2.035433053970337, "learning_rate": 3.82482360487492e-06, "loss": 0.9145, "step": 22425 }, { "epoch": 0.2733599015270618, "grad_norm": 2.0391623973846436, "learning_rate": 3.8245028864656835e-06, "loss": 0.891, "step": 22430 }, { "epoch": 0.27342083775120957, "grad_norm": 2.514958620071411, "learning_rate": 3.8241821680564466e-06, "loss": 0.8519, "step": 22435 }, { "epoch": 0.2734817739753574, "grad_norm": 1.5139590501785278, "learning_rate": 3.8238614496472096e-06, "loss": 0.8181, "step": 22440 }, { "epoch": 0.2735427101995052, "grad_norm": 2.2091822624206543, "learning_rate": 3.8235407312379734e-06, "loss": 0.9244, "step": 22445 }, { "epoch": 0.273603646423653, "grad_norm": 1.6764461994171143, "learning_rate": 3.8232200128287364e-06, "loss": 0.8634, "step": 22450 }, { "epoch": 0.2736645826478008, "grad_norm": 2.2989137172698975, "learning_rate": 3.8228992944194995e-06, "loss": 0.8387, "step": 22455 }, { "epoch": 0.2737255188719486, "grad_norm": 2.0599753856658936, "learning_rate": 3.822578576010263e-06, "loss": 0.8293, "step": 22460 }, { "epoch": 0.27378645509609645, "grad_norm": 2.1259140968322754, "learning_rate": 3.822257857601026e-06, "loss": 0.8702, "step": 22465 }, { "epoch": 0.2738473913202442, "grad_norm": 1.8811187744140625, "learning_rate": 3.821937139191789e-06, "loss": 0.8598, "step": 22470 }, { "epoch": 0.27390832754439204, "grad_norm": 1.711856484413147, "learning_rate": 3.821616420782553e-06, "loss": 0.799, "step": 22475 }, { "epoch": 0.27396926376853986, "grad_norm": 1.9120697975158691, "learning_rate": 3.821295702373316e-06, "loss": 0.8606, "step": 22480 }, { "epoch": 0.27403019999268763, "grad_norm": 2.2875564098358154, "learning_rate": 3.82097498396408e-06, "loss": 0.9059, "step": 22485 }, { "epoch": 0.27409113621683545, "grad_norm": 1.9200563430786133, "learning_rate": 3.820654265554843e-06, "loss": 0.9182, "step": 22490 }, { "epoch": 0.2741520724409833, "grad_norm": 2.389296770095825, "learning_rate": 3.820333547145606e-06, "loss": 0.91, "step": 22495 }, { "epoch": 0.2742130086651311, "grad_norm": 2.0712664127349854, "learning_rate": 3.82001282873637e-06, "loss": 0.8102, "step": 22500 }, { "epoch": 0.27427394488927886, "grad_norm": 2.016406297683716, "learning_rate": 3.819692110327133e-06, "loss": 0.9667, "step": 22505 }, { "epoch": 0.2743348811134267, "grad_norm": 1.846349835395813, "learning_rate": 3.819371391917897e-06, "loss": 0.7692, "step": 22510 }, { "epoch": 0.2743958173375745, "grad_norm": 1.9271788597106934, "learning_rate": 3.81905067350866e-06, "loss": 0.8893, "step": 22515 }, { "epoch": 0.2744567535617223, "grad_norm": 1.9488567113876343, "learning_rate": 3.818729955099423e-06, "loss": 0.8548, "step": 22520 }, { "epoch": 0.2745176897858701, "grad_norm": 1.9191817045211792, "learning_rate": 3.818409236690187e-06, "loss": 0.8179, "step": 22525 }, { "epoch": 0.2745786260100179, "grad_norm": 2.428400993347168, "learning_rate": 3.81808851828095e-06, "loss": 0.858, "step": 22530 }, { "epoch": 0.27463956223416575, "grad_norm": 2.078334093093872, "learning_rate": 3.817767799871713e-06, "loss": 0.8333, "step": 22535 }, { "epoch": 0.2747004984583135, "grad_norm": 1.9204628467559814, "learning_rate": 3.817447081462477e-06, "loss": 0.8606, "step": 22540 }, { "epoch": 0.27476143468246134, "grad_norm": 1.988571047782898, "learning_rate": 3.81712636305324e-06, "loss": 0.8734, "step": 22545 }, { "epoch": 0.27482237090660916, "grad_norm": 2.1730129718780518, "learning_rate": 3.816805644644003e-06, "loss": 0.8546, "step": 22550 }, { "epoch": 0.2748833071307569, "grad_norm": 1.9828726053237915, "learning_rate": 3.816484926234767e-06, "loss": 0.8686, "step": 22555 }, { "epoch": 0.27494424335490475, "grad_norm": 1.8709806203842163, "learning_rate": 3.81616420782553e-06, "loss": 0.9068, "step": 22560 }, { "epoch": 0.27500517957905257, "grad_norm": 2.0397729873657227, "learning_rate": 3.815843489416293e-06, "loss": 0.8087, "step": 22565 }, { "epoch": 0.2750661158032004, "grad_norm": 2.007274866104126, "learning_rate": 3.8155227710070565e-06, "loss": 0.8473, "step": 22570 }, { "epoch": 0.27512705202734816, "grad_norm": 1.892098069190979, "learning_rate": 3.8152020525978195e-06, "loss": 0.8257, "step": 22575 }, { "epoch": 0.275187988251496, "grad_norm": 1.915652871131897, "learning_rate": 3.8148813341885825e-06, "loss": 0.9433, "step": 22580 }, { "epoch": 0.2752489244756438, "grad_norm": 1.8738268613815308, "learning_rate": 3.814560615779346e-06, "loss": 0.9029, "step": 22585 }, { "epoch": 0.2753098606997916, "grad_norm": 1.8529868125915527, "learning_rate": 3.8142398973701094e-06, "loss": 0.8495, "step": 22590 }, { "epoch": 0.2753707969239394, "grad_norm": 1.8485792875289917, "learning_rate": 3.8139191789608724e-06, "loss": 0.8579, "step": 22595 }, { "epoch": 0.2754317331480872, "grad_norm": 2.1794729232788086, "learning_rate": 3.813598460551636e-06, "loss": 0.8619, "step": 22600 }, { "epoch": 0.27549266937223504, "grad_norm": 1.9962046146392822, "learning_rate": 3.8132777421423993e-06, "loss": 0.8541, "step": 22605 }, { "epoch": 0.2755536055963828, "grad_norm": 2.1795661449432373, "learning_rate": 3.8129570237331627e-06, "loss": 0.8891, "step": 22610 }, { "epoch": 0.27561454182053063, "grad_norm": 1.779665231704712, "learning_rate": 3.8126363053239257e-06, "loss": 0.827, "step": 22615 }, { "epoch": 0.27567547804467846, "grad_norm": 1.8089038133621216, "learning_rate": 3.812315586914689e-06, "loss": 0.8739, "step": 22620 }, { "epoch": 0.2757364142688262, "grad_norm": 1.9976117610931396, "learning_rate": 3.8119948685054526e-06, "loss": 0.8523, "step": 22625 }, { "epoch": 0.27579735049297405, "grad_norm": 2.725276470184326, "learning_rate": 3.8116741500962156e-06, "loss": 0.8664, "step": 22630 }, { "epoch": 0.27585828671712187, "grad_norm": 2.031498670578003, "learning_rate": 3.8113534316869795e-06, "loss": 0.8898, "step": 22635 }, { "epoch": 0.2759192229412697, "grad_norm": 1.6846877336502075, "learning_rate": 3.8110327132777425e-06, "loss": 0.8203, "step": 22640 }, { "epoch": 0.27598015916541746, "grad_norm": 2.115262508392334, "learning_rate": 3.8107119948685055e-06, "loss": 0.8999, "step": 22645 }, { "epoch": 0.2760410953895653, "grad_norm": 1.929963231086731, "learning_rate": 3.8103912764592694e-06, "loss": 0.8975, "step": 22650 }, { "epoch": 0.2761020316137131, "grad_norm": 2.1010067462921143, "learning_rate": 3.8100705580500324e-06, "loss": 0.8902, "step": 22655 }, { "epoch": 0.2761629678378609, "grad_norm": 1.9973763227462769, "learning_rate": 3.8097498396407954e-06, "loss": 0.8424, "step": 22660 }, { "epoch": 0.2762239040620087, "grad_norm": 1.721801996231079, "learning_rate": 3.8094291212315593e-06, "loss": 0.8621, "step": 22665 }, { "epoch": 0.2762848402861565, "grad_norm": 2.6158363819122314, "learning_rate": 3.8091084028223223e-06, "loss": 0.9167, "step": 22670 }, { "epoch": 0.27634577651030434, "grad_norm": 2.1075005531311035, "learning_rate": 3.8087876844130858e-06, "loss": 0.8475, "step": 22675 }, { "epoch": 0.2764067127344521, "grad_norm": 2.2590386867523193, "learning_rate": 3.8084669660038488e-06, "loss": 0.8556, "step": 22680 }, { "epoch": 0.27646764895859993, "grad_norm": 1.9760767221450806, "learning_rate": 3.8081462475946122e-06, "loss": 0.8998, "step": 22685 }, { "epoch": 0.27652858518274775, "grad_norm": 2.035741090774536, "learning_rate": 3.8078255291853757e-06, "loss": 0.8207, "step": 22690 }, { "epoch": 0.2765895214068955, "grad_norm": 2.4882214069366455, "learning_rate": 3.8075048107761387e-06, "loss": 0.8599, "step": 22695 }, { "epoch": 0.27665045763104334, "grad_norm": 2.104860544204712, "learning_rate": 3.8071840923669025e-06, "loss": 0.8859, "step": 22700 }, { "epoch": 0.27671139385519117, "grad_norm": 2.011425018310547, "learning_rate": 3.8068633739576656e-06, "loss": 0.9408, "step": 22705 }, { "epoch": 0.276772330079339, "grad_norm": 2.1163737773895264, "learning_rate": 3.8065426555484286e-06, "loss": 0.8522, "step": 22710 }, { "epoch": 0.27683326630348676, "grad_norm": 2.313462495803833, "learning_rate": 3.8062219371391924e-06, "loss": 0.8222, "step": 22715 }, { "epoch": 0.2768942025276346, "grad_norm": 2.127039670944214, "learning_rate": 3.8059012187299554e-06, "loss": 0.8879, "step": 22720 }, { "epoch": 0.2769551387517824, "grad_norm": 2.2146246433258057, "learning_rate": 3.8055805003207185e-06, "loss": 0.8544, "step": 22725 }, { "epoch": 0.27701607497593017, "grad_norm": 1.8715405464172363, "learning_rate": 3.8052597819114823e-06, "loss": 0.8479, "step": 22730 }, { "epoch": 0.277077011200078, "grad_norm": 1.9849025011062622, "learning_rate": 3.8049390635022453e-06, "loss": 0.8735, "step": 22735 }, { "epoch": 0.2771379474242258, "grad_norm": 1.9268019199371338, "learning_rate": 3.8046183450930084e-06, "loss": 0.8795, "step": 22740 }, { "epoch": 0.27719888364837364, "grad_norm": 1.771347165107727, "learning_rate": 3.8042976266837722e-06, "loss": 0.8544, "step": 22745 }, { "epoch": 0.2772598198725214, "grad_norm": 1.9671109914779663, "learning_rate": 3.8039769082745352e-06, "loss": 0.8644, "step": 22750 }, { "epoch": 0.27732075609666923, "grad_norm": 1.9917277097702026, "learning_rate": 3.8036561898652987e-06, "loss": 0.8604, "step": 22755 }, { "epoch": 0.27738169232081705, "grad_norm": 2.257694721221924, "learning_rate": 3.8033354714560617e-06, "loss": 0.874, "step": 22760 }, { "epoch": 0.2774426285449648, "grad_norm": 1.5826435089111328, "learning_rate": 3.803014753046825e-06, "loss": 0.8387, "step": 22765 }, { "epoch": 0.27750356476911264, "grad_norm": 1.7022424936294556, "learning_rate": 3.8026940346375886e-06, "loss": 0.8733, "step": 22770 }, { "epoch": 0.27756450099326047, "grad_norm": 1.9109421968460083, "learning_rate": 3.8023733162283516e-06, "loss": 0.845, "step": 22775 }, { "epoch": 0.27762543721740823, "grad_norm": 1.6650326251983643, "learning_rate": 3.8020525978191155e-06, "loss": 0.8684, "step": 22780 }, { "epoch": 0.27768637344155606, "grad_norm": 2.0829410552978516, "learning_rate": 3.8017318794098785e-06, "loss": 0.8811, "step": 22785 }, { "epoch": 0.2777473096657039, "grad_norm": 2.3027069568634033, "learning_rate": 3.8014111610006415e-06, "loss": 0.9136, "step": 22790 }, { "epoch": 0.2778082458898517, "grad_norm": 2.0554847717285156, "learning_rate": 3.8010904425914054e-06, "loss": 0.8688, "step": 22795 }, { "epoch": 0.27786918211399947, "grad_norm": 2.1508095264434814, "learning_rate": 3.8007697241821684e-06, "loss": 0.9188, "step": 22800 }, { "epoch": 0.2779301183381473, "grad_norm": 2.1848671436309814, "learning_rate": 3.8004490057729314e-06, "loss": 0.8686, "step": 22805 }, { "epoch": 0.2779910545622951, "grad_norm": 1.9739978313446045, "learning_rate": 3.8001282873636953e-06, "loss": 0.8451, "step": 22810 }, { "epoch": 0.2780519907864429, "grad_norm": 1.8220057487487793, "learning_rate": 3.7998075689544583e-06, "loss": 0.9329, "step": 22815 }, { "epoch": 0.2781129270105907, "grad_norm": 2.0548408031463623, "learning_rate": 3.7994868505452213e-06, "loss": 0.8763, "step": 22820 }, { "epoch": 0.2781738632347385, "grad_norm": 1.98201322555542, "learning_rate": 3.799166132135985e-06, "loss": 0.9054, "step": 22825 }, { "epoch": 0.27823479945888635, "grad_norm": 1.5750348567962646, "learning_rate": 3.798845413726748e-06, "loss": 0.8952, "step": 22830 }, { "epoch": 0.2782957356830341, "grad_norm": 2.192603349685669, "learning_rate": 3.7985246953175116e-06, "loss": 0.87, "step": 22835 }, { "epoch": 0.27835667190718194, "grad_norm": 1.872659683227539, "learning_rate": 3.7982039769082746e-06, "loss": 0.9326, "step": 22840 }, { "epoch": 0.27841760813132976, "grad_norm": 1.9420932531356812, "learning_rate": 3.797883258499038e-06, "loss": 0.8647, "step": 22845 }, { "epoch": 0.27847854435547753, "grad_norm": 1.8401962518692017, "learning_rate": 3.7975625400898015e-06, "loss": 0.9201, "step": 22850 }, { "epoch": 0.27853948057962535, "grad_norm": 1.892549991607666, "learning_rate": 3.7972418216805645e-06, "loss": 0.8409, "step": 22855 }, { "epoch": 0.2786004168037732, "grad_norm": 1.9144971370697021, "learning_rate": 3.7969211032713284e-06, "loss": 0.8939, "step": 22860 }, { "epoch": 0.278661353027921, "grad_norm": 1.956487774848938, "learning_rate": 3.7966003848620914e-06, "loss": 0.9293, "step": 22865 }, { "epoch": 0.27872228925206877, "grad_norm": 1.8461954593658447, "learning_rate": 3.7962796664528544e-06, "loss": 0.8787, "step": 22870 }, { "epoch": 0.2787832254762166, "grad_norm": 1.8351554870605469, "learning_rate": 3.7959589480436183e-06, "loss": 0.8689, "step": 22875 }, { "epoch": 0.2788441617003644, "grad_norm": 1.8742903470993042, "learning_rate": 3.7956382296343813e-06, "loss": 0.9123, "step": 22880 }, { "epoch": 0.2789050979245122, "grad_norm": 1.9252893924713135, "learning_rate": 3.7953175112251443e-06, "loss": 0.8034, "step": 22885 }, { "epoch": 0.27896603414866, "grad_norm": 1.833045482635498, "learning_rate": 3.794996792815908e-06, "loss": 0.8621, "step": 22890 }, { "epoch": 0.2790269703728078, "grad_norm": 2.4729230403900146, "learning_rate": 3.794676074406671e-06, "loss": 0.9567, "step": 22895 }, { "epoch": 0.27908790659695565, "grad_norm": 1.8971291780471802, "learning_rate": 3.7943553559974346e-06, "loss": 0.915, "step": 22900 }, { "epoch": 0.2791488428211034, "grad_norm": 1.9101300239562988, "learning_rate": 3.794034637588198e-06, "loss": 0.9437, "step": 22905 }, { "epoch": 0.27920977904525124, "grad_norm": 2.2091503143310547, "learning_rate": 3.793713919178961e-06, "loss": 0.8873, "step": 22910 }, { "epoch": 0.27927071526939906, "grad_norm": 1.8818013668060303, "learning_rate": 3.7933932007697245e-06, "loss": 0.8946, "step": 22915 }, { "epoch": 0.27933165149354683, "grad_norm": 1.907110571861267, "learning_rate": 3.7930724823604876e-06, "loss": 0.9347, "step": 22920 }, { "epoch": 0.27939258771769465, "grad_norm": 1.9672329425811768, "learning_rate": 3.7927517639512514e-06, "loss": 0.7704, "step": 22925 }, { "epoch": 0.2794535239418425, "grad_norm": 1.9254553318023682, "learning_rate": 3.7924310455420144e-06, "loss": 0.9057, "step": 22930 }, { "epoch": 0.2795144601659903, "grad_norm": 2.1599700450897217, "learning_rate": 3.7921103271327775e-06, "loss": 0.8168, "step": 22935 }, { "epoch": 0.27957539639013806, "grad_norm": 2.084470272064209, "learning_rate": 3.7917896087235413e-06, "loss": 0.8895, "step": 22940 }, { "epoch": 0.2796363326142859, "grad_norm": 1.889076828956604, "learning_rate": 3.7914688903143043e-06, "loss": 0.8289, "step": 22945 }, { "epoch": 0.2796972688384337, "grad_norm": 1.816754698753357, "learning_rate": 3.7911481719050673e-06, "loss": 0.8196, "step": 22950 }, { "epoch": 0.2797582050625815, "grad_norm": 1.9492919445037842, "learning_rate": 3.7908274534958312e-06, "loss": 0.8498, "step": 22955 }, { "epoch": 0.2798191412867293, "grad_norm": 1.8783329725265503, "learning_rate": 3.7905067350865942e-06, "loss": 0.9458, "step": 22960 }, { "epoch": 0.2798800775108771, "grad_norm": 2.033005475997925, "learning_rate": 3.7901860166773572e-06, "loss": 0.8535, "step": 22965 }, { "epoch": 0.27994101373502495, "grad_norm": 1.75349760055542, "learning_rate": 3.789865298268121e-06, "loss": 0.8409, "step": 22970 }, { "epoch": 0.2800019499591727, "grad_norm": 1.8618090152740479, "learning_rate": 3.789544579858884e-06, "loss": 0.9743, "step": 22975 }, { "epoch": 0.28006288618332054, "grad_norm": 2.042553424835205, "learning_rate": 3.7892238614496476e-06, "loss": 0.8501, "step": 22980 }, { "epoch": 0.28012382240746836, "grad_norm": 1.9194798469543457, "learning_rate": 3.788903143040411e-06, "loss": 0.8751, "step": 22985 }, { "epoch": 0.2801847586316161, "grad_norm": 1.897804856300354, "learning_rate": 3.788582424631174e-06, "loss": 0.8682, "step": 22990 }, { "epoch": 0.28024569485576395, "grad_norm": 2.000704288482666, "learning_rate": 3.7882617062219375e-06, "loss": 0.8518, "step": 22995 }, { "epoch": 0.28030663107991177, "grad_norm": 2.050161361694336, "learning_rate": 3.787940987812701e-06, "loss": 0.8342, "step": 23000 }, { "epoch": 0.2803675673040596, "grad_norm": 1.9698163270950317, "learning_rate": 3.7876202694034643e-06, "loss": 0.8674, "step": 23005 }, { "epoch": 0.28042850352820736, "grad_norm": 1.8853365182876587, "learning_rate": 3.7872995509942274e-06, "loss": 0.8554, "step": 23010 }, { "epoch": 0.2804894397523552, "grad_norm": 2.0898172855377197, "learning_rate": 3.7869788325849904e-06, "loss": 0.8586, "step": 23015 }, { "epoch": 0.280550375976503, "grad_norm": 2.14235782623291, "learning_rate": 3.7866581141757542e-06, "loss": 0.9393, "step": 23020 }, { "epoch": 0.2806113122006508, "grad_norm": 1.9710241556167603, "learning_rate": 3.7863373957665173e-06, "loss": 0.9465, "step": 23025 }, { "epoch": 0.2806722484247986, "grad_norm": 1.8604085445404053, "learning_rate": 3.7860166773572803e-06, "loss": 0.8796, "step": 23030 }, { "epoch": 0.2807331846489464, "grad_norm": 1.8993147611618042, "learning_rate": 3.785695958948044e-06, "loss": 0.8175, "step": 23035 }, { "epoch": 0.28079412087309424, "grad_norm": 2.012310266494751, "learning_rate": 3.785375240538807e-06, "loss": 0.8765, "step": 23040 }, { "epoch": 0.280855057097242, "grad_norm": 1.9510433673858643, "learning_rate": 3.78505452212957e-06, "loss": 0.838, "step": 23045 }, { "epoch": 0.28091599332138983, "grad_norm": 2.0065908432006836, "learning_rate": 3.784733803720334e-06, "loss": 0.889, "step": 23050 }, { "epoch": 0.28097692954553766, "grad_norm": 2.2397446632385254, "learning_rate": 3.784413085311097e-06, "loss": 0.8449, "step": 23055 }, { "epoch": 0.2810378657696854, "grad_norm": 2.0379393100738525, "learning_rate": 3.7840923669018605e-06, "loss": 0.87, "step": 23060 }, { "epoch": 0.28109880199383325, "grad_norm": 2.0964856147766113, "learning_rate": 3.783771648492624e-06, "loss": 0.8784, "step": 23065 }, { "epoch": 0.28115973821798107, "grad_norm": 2.1476738452911377, "learning_rate": 3.783450930083387e-06, "loss": 0.9033, "step": 23070 }, { "epoch": 0.2812206744421289, "grad_norm": 1.8492894172668457, "learning_rate": 3.7831302116741504e-06, "loss": 0.8701, "step": 23075 }, { "epoch": 0.28128161066627666, "grad_norm": 1.6531908512115479, "learning_rate": 3.782809493264914e-06, "loss": 0.8105, "step": 23080 }, { "epoch": 0.2813425468904245, "grad_norm": 2.28547739982605, "learning_rate": 3.7824887748556773e-06, "loss": 0.8944, "step": 23085 }, { "epoch": 0.2814034831145723, "grad_norm": 2.1198620796203613, "learning_rate": 3.7821680564464403e-06, "loss": 0.82, "step": 23090 }, { "epoch": 0.2814644193387201, "grad_norm": 1.9087542295455933, "learning_rate": 3.7818473380372033e-06, "loss": 0.8783, "step": 23095 }, { "epoch": 0.2815253555628679, "grad_norm": 1.8664703369140625, "learning_rate": 3.781526619627967e-06, "loss": 0.8763, "step": 23100 }, { "epoch": 0.2815862917870157, "grad_norm": 1.9919601678848267, "learning_rate": 3.78120590121873e-06, "loss": 0.814, "step": 23105 }, { "epoch": 0.28164722801116354, "grad_norm": 1.726194143295288, "learning_rate": 3.780885182809493e-06, "loss": 0.8526, "step": 23110 }, { "epoch": 0.2817081642353113, "grad_norm": 1.8216501474380493, "learning_rate": 3.780564464400257e-06, "loss": 0.9376, "step": 23115 }, { "epoch": 0.28176910045945913, "grad_norm": 1.744720697402954, "learning_rate": 3.78024374599102e-06, "loss": 0.9523, "step": 23120 }, { "epoch": 0.28183003668360695, "grad_norm": 2.025949716567993, "learning_rate": 3.779923027581783e-06, "loss": 0.9209, "step": 23125 }, { "epoch": 0.2818909729077547, "grad_norm": 2.0688910484313965, "learning_rate": 3.779602309172547e-06, "loss": 0.8571, "step": 23130 }, { "epoch": 0.28195190913190255, "grad_norm": 1.8208625316619873, "learning_rate": 3.77928159076331e-06, "loss": 0.8569, "step": 23135 }, { "epoch": 0.28201284535605037, "grad_norm": 1.9750635623931885, "learning_rate": 3.7789608723540734e-06, "loss": 0.8421, "step": 23140 }, { "epoch": 0.2820737815801982, "grad_norm": 2.118840217590332, "learning_rate": 3.778640153944837e-06, "loss": 0.8593, "step": 23145 }, { "epoch": 0.28213471780434596, "grad_norm": 1.6584994792938232, "learning_rate": 3.7783194355356003e-06, "loss": 0.9108, "step": 23150 }, { "epoch": 0.2821956540284938, "grad_norm": 1.8276453018188477, "learning_rate": 3.7779987171263633e-06, "loss": 0.9178, "step": 23155 }, { "epoch": 0.2822565902526416, "grad_norm": 2.2049853801727295, "learning_rate": 3.7776779987171268e-06, "loss": 0.8935, "step": 23160 }, { "epoch": 0.28231752647678937, "grad_norm": 1.5741808414459229, "learning_rate": 3.77735728030789e-06, "loss": 0.8053, "step": 23165 }, { "epoch": 0.2823784627009372, "grad_norm": 2.181276559829712, "learning_rate": 3.7770365618986532e-06, "loss": 0.8433, "step": 23170 }, { "epoch": 0.282439398925085, "grad_norm": 1.9291129112243652, "learning_rate": 3.7767158434894162e-06, "loss": 0.8869, "step": 23175 }, { "epoch": 0.28250033514923284, "grad_norm": 1.8922278881072998, "learning_rate": 3.77639512508018e-06, "loss": 0.813, "step": 23180 }, { "epoch": 0.2825612713733806, "grad_norm": 1.7403233051300049, "learning_rate": 3.776074406670943e-06, "loss": 0.9013, "step": 23185 }, { "epoch": 0.28262220759752843, "grad_norm": 2.3542747497558594, "learning_rate": 3.775753688261706e-06, "loss": 0.9654, "step": 23190 }, { "epoch": 0.28268314382167625, "grad_norm": 1.924184799194336, "learning_rate": 3.77543296985247e-06, "loss": 0.8464, "step": 23195 }, { "epoch": 0.282744080045824, "grad_norm": 2.2848575115203857, "learning_rate": 3.775112251443233e-06, "loss": 0.8794, "step": 23200 }, { "epoch": 0.28280501626997184, "grad_norm": 1.874695897102356, "learning_rate": 3.7747915330339965e-06, "loss": 0.8479, "step": 23205 }, { "epoch": 0.28286595249411967, "grad_norm": 2.1053035259246826, "learning_rate": 3.77447081462476e-06, "loss": 0.9142, "step": 23210 }, { "epoch": 0.2829268887182675, "grad_norm": 2.2483139038085938, "learning_rate": 3.774150096215523e-06, "loss": 0.8662, "step": 23215 }, { "epoch": 0.28298782494241526, "grad_norm": 2.0198028087615967, "learning_rate": 3.7738293778062863e-06, "loss": 0.9429, "step": 23220 }, { "epoch": 0.2830487611665631, "grad_norm": 2.201751232147217, "learning_rate": 3.77350865939705e-06, "loss": 0.8725, "step": 23225 }, { "epoch": 0.2831096973907109, "grad_norm": 1.9513312578201294, "learning_rate": 3.7731879409878132e-06, "loss": 0.8213, "step": 23230 }, { "epoch": 0.28317063361485867, "grad_norm": 2.053966522216797, "learning_rate": 3.7728672225785762e-06, "loss": 0.859, "step": 23235 }, { "epoch": 0.2832315698390065, "grad_norm": 1.9378013610839844, "learning_rate": 3.7725465041693397e-06, "loss": 0.8656, "step": 23240 }, { "epoch": 0.2832925060631543, "grad_norm": 2.0788257122039795, "learning_rate": 3.772225785760103e-06, "loss": 0.89, "step": 23245 }, { "epoch": 0.2833534422873021, "grad_norm": 2.019059181213379, "learning_rate": 3.771905067350866e-06, "loss": 0.8374, "step": 23250 }, { "epoch": 0.2834143785114499, "grad_norm": 2.382143497467041, "learning_rate": 3.771584348941629e-06, "loss": 0.8542, "step": 23255 }, { "epoch": 0.2834753147355977, "grad_norm": 1.8787882328033447, "learning_rate": 3.771263630532393e-06, "loss": 0.867, "step": 23260 }, { "epoch": 0.28353625095974555, "grad_norm": 1.9591273069381714, "learning_rate": 3.770942912123156e-06, "loss": 0.8396, "step": 23265 }, { "epoch": 0.2835971871838933, "grad_norm": 2.256009578704834, "learning_rate": 3.770622193713919e-06, "loss": 0.832, "step": 23270 }, { "epoch": 0.28365812340804114, "grad_norm": 1.9713386297225952, "learning_rate": 3.770301475304683e-06, "loss": 0.8525, "step": 23275 }, { "epoch": 0.28371905963218896, "grad_norm": 2.1359012126922607, "learning_rate": 3.769980756895446e-06, "loss": 0.9133, "step": 23280 }, { "epoch": 0.28377999585633673, "grad_norm": 1.7505760192871094, "learning_rate": 3.7696600384862094e-06, "loss": 0.9193, "step": 23285 }, { "epoch": 0.28384093208048455, "grad_norm": 1.5907306671142578, "learning_rate": 3.769339320076973e-06, "loss": 0.8673, "step": 23290 }, { "epoch": 0.2839018683046324, "grad_norm": 2.0037612915039062, "learning_rate": 3.769018601667736e-06, "loss": 0.8914, "step": 23295 }, { "epoch": 0.2839628045287802, "grad_norm": 1.7677795886993408, "learning_rate": 3.7686978832584993e-06, "loss": 0.9024, "step": 23300 }, { "epoch": 0.28402374075292797, "grad_norm": 2.1371610164642334, "learning_rate": 3.7683771648492627e-06, "loss": 0.9374, "step": 23305 }, { "epoch": 0.2840846769770758, "grad_norm": 2.468388557434082, "learning_rate": 3.768056446440026e-06, "loss": 0.9068, "step": 23310 }, { "epoch": 0.2841456132012236, "grad_norm": 1.7910375595092773, "learning_rate": 3.767735728030789e-06, "loss": 0.8345, "step": 23315 }, { "epoch": 0.2842065494253714, "grad_norm": 1.8388408422470093, "learning_rate": 3.7674150096215526e-06, "loss": 0.8498, "step": 23320 }, { "epoch": 0.2842674856495192, "grad_norm": 1.9465842247009277, "learning_rate": 3.767094291212316e-06, "loss": 0.866, "step": 23325 }, { "epoch": 0.284328421873667, "grad_norm": 2.055631160736084, "learning_rate": 3.766773572803079e-06, "loss": 0.8223, "step": 23330 }, { "epoch": 0.28438935809781485, "grad_norm": 1.9887672662734985, "learning_rate": 3.766452854393843e-06, "loss": 0.7958, "step": 23335 }, { "epoch": 0.2844502943219626, "grad_norm": 2.1536200046539307, "learning_rate": 3.766132135984606e-06, "loss": 0.9108, "step": 23340 }, { "epoch": 0.28451123054611044, "grad_norm": 1.6841883659362793, "learning_rate": 3.765811417575369e-06, "loss": 0.8431, "step": 23345 }, { "epoch": 0.28457216677025826, "grad_norm": 1.913331389427185, "learning_rate": 3.765490699166132e-06, "loss": 0.8709, "step": 23350 }, { "epoch": 0.28463310299440603, "grad_norm": 2.766122579574585, "learning_rate": 3.765169980756896e-06, "loss": 0.8634, "step": 23355 }, { "epoch": 0.28469403921855385, "grad_norm": 1.9162023067474365, "learning_rate": 3.764849262347659e-06, "loss": 0.8847, "step": 23360 }, { "epoch": 0.2847549754427017, "grad_norm": 1.808201551437378, "learning_rate": 3.7645285439384223e-06, "loss": 0.8205, "step": 23365 }, { "epoch": 0.2848159116668495, "grad_norm": 1.7403879165649414, "learning_rate": 3.7642078255291857e-06, "loss": 0.8751, "step": 23370 }, { "epoch": 0.28487684789099726, "grad_norm": 2.0374605655670166, "learning_rate": 3.763887107119949e-06, "loss": 0.892, "step": 23375 }, { "epoch": 0.2849377841151451, "grad_norm": 2.4039227962493896, "learning_rate": 3.763566388710712e-06, "loss": 0.8756, "step": 23380 }, { "epoch": 0.2849987203392929, "grad_norm": 2.035155773162842, "learning_rate": 3.7632456703014756e-06, "loss": 0.8507, "step": 23385 }, { "epoch": 0.2850596565634407, "grad_norm": 1.9624791145324707, "learning_rate": 3.762924951892239e-06, "loss": 0.9044, "step": 23390 }, { "epoch": 0.2851205927875885, "grad_norm": 1.9160631895065308, "learning_rate": 3.762604233483002e-06, "loss": 0.8668, "step": 23395 }, { "epoch": 0.2851815290117363, "grad_norm": 1.95735502243042, "learning_rate": 3.762283515073766e-06, "loss": 0.8597, "step": 23400 }, { "epoch": 0.28524246523588415, "grad_norm": 1.9114078283309937, "learning_rate": 3.761962796664529e-06, "loss": 0.8518, "step": 23405 }, { "epoch": 0.2853034014600319, "grad_norm": 2.149879217147827, "learning_rate": 3.761642078255292e-06, "loss": 0.9247, "step": 23410 }, { "epoch": 0.28536433768417974, "grad_norm": 2.00907301902771, "learning_rate": 3.761321359846056e-06, "loss": 0.8283, "step": 23415 }, { "epoch": 0.28542527390832756, "grad_norm": 1.9485081434249878, "learning_rate": 3.761000641436819e-06, "loss": 0.8939, "step": 23420 }, { "epoch": 0.2854862101324753, "grad_norm": 1.9282751083374023, "learning_rate": 3.760679923027582e-06, "loss": 0.8358, "step": 23425 }, { "epoch": 0.28554714635662315, "grad_norm": 1.9598675966262817, "learning_rate": 3.7603592046183453e-06, "loss": 0.8458, "step": 23430 }, { "epoch": 0.28560808258077097, "grad_norm": 1.9444676637649536, "learning_rate": 3.7600384862091088e-06, "loss": 0.8976, "step": 23435 }, { "epoch": 0.2856690188049188, "grad_norm": 1.8325920104980469, "learning_rate": 3.759717767799872e-06, "loss": 0.8591, "step": 23440 }, { "epoch": 0.28572995502906656, "grad_norm": 2.047238826751709, "learning_rate": 3.7593970493906352e-06, "loss": 0.9161, "step": 23445 }, { "epoch": 0.2857908912532144, "grad_norm": 1.936870813369751, "learning_rate": 3.7590763309813987e-06, "loss": 0.8247, "step": 23450 }, { "epoch": 0.2858518274773622, "grad_norm": 1.6962382793426514, "learning_rate": 3.758755612572162e-06, "loss": 0.8374, "step": 23455 }, { "epoch": 0.28591276370151, "grad_norm": 2.005201578140259, "learning_rate": 3.758434894162925e-06, "loss": 0.954, "step": 23460 }, { "epoch": 0.2859736999256578, "grad_norm": 2.1777005195617676, "learning_rate": 3.7581141757536886e-06, "loss": 0.833, "step": 23465 }, { "epoch": 0.2860346361498056, "grad_norm": 2.473156690597534, "learning_rate": 3.757793457344452e-06, "loss": 0.9204, "step": 23470 }, { "epoch": 0.28609557237395344, "grad_norm": 2.192018508911133, "learning_rate": 3.757472738935215e-06, "loss": 0.8828, "step": 23475 }, { "epoch": 0.2861565085981012, "grad_norm": 2.016826868057251, "learning_rate": 3.757152020525979e-06, "loss": 0.9338, "step": 23480 }, { "epoch": 0.28621744482224903, "grad_norm": 1.6974824666976929, "learning_rate": 3.756831302116742e-06, "loss": 0.8447, "step": 23485 }, { "epoch": 0.28627838104639686, "grad_norm": 2.0360145568847656, "learning_rate": 3.756510583707505e-06, "loss": 0.8648, "step": 23490 }, { "epoch": 0.2863393172705446, "grad_norm": 1.9498701095581055, "learning_rate": 3.756189865298269e-06, "loss": 0.8127, "step": 23495 }, { "epoch": 0.28640025349469245, "grad_norm": 1.8595412969589233, "learning_rate": 3.755869146889032e-06, "loss": 0.871, "step": 23500 }, { "epoch": 0.28646118971884027, "grad_norm": 2.457078695297241, "learning_rate": 3.755548428479795e-06, "loss": 0.8663, "step": 23505 }, { "epoch": 0.2865221259429881, "grad_norm": 1.902402639389038, "learning_rate": 3.7552277100705583e-06, "loss": 0.8873, "step": 23510 }, { "epoch": 0.28658306216713586, "grad_norm": 2.2040796279907227, "learning_rate": 3.7549069916613217e-06, "loss": 0.8745, "step": 23515 }, { "epoch": 0.2866439983912837, "grad_norm": 1.8681485652923584, "learning_rate": 3.7545862732520847e-06, "loss": 0.8642, "step": 23520 }, { "epoch": 0.2867049346154315, "grad_norm": 2.1102163791656494, "learning_rate": 3.754265554842848e-06, "loss": 0.9049, "step": 23525 }, { "epoch": 0.2867658708395793, "grad_norm": 2.009981155395508, "learning_rate": 3.7539448364336116e-06, "loss": 0.8501, "step": 23530 }, { "epoch": 0.2868268070637271, "grad_norm": 2.1868817806243896, "learning_rate": 3.753624118024375e-06, "loss": 0.8038, "step": 23535 }, { "epoch": 0.2868877432878749, "grad_norm": 2.446516275405884, "learning_rate": 3.753303399615138e-06, "loss": 0.8627, "step": 23540 }, { "epoch": 0.28694867951202274, "grad_norm": 1.8780946731567383, "learning_rate": 3.7529826812059015e-06, "loss": 0.8645, "step": 23545 }, { "epoch": 0.2870096157361705, "grad_norm": 2.139763355255127, "learning_rate": 3.752661962796665e-06, "loss": 0.8351, "step": 23550 }, { "epoch": 0.28707055196031833, "grad_norm": 1.948384165763855, "learning_rate": 3.752341244387428e-06, "loss": 0.8878, "step": 23555 }, { "epoch": 0.28713148818446615, "grad_norm": 2.0401556491851807, "learning_rate": 3.752020525978192e-06, "loss": 0.8724, "step": 23560 }, { "epoch": 0.2871924244086139, "grad_norm": 2.0700063705444336, "learning_rate": 3.751699807568955e-06, "loss": 0.8869, "step": 23565 }, { "epoch": 0.28725336063276175, "grad_norm": 2.4248030185699463, "learning_rate": 3.751379089159718e-06, "loss": 0.8701, "step": 23570 }, { "epoch": 0.28731429685690957, "grad_norm": 2.073927164077759, "learning_rate": 3.7510583707504817e-06, "loss": 0.8747, "step": 23575 }, { "epoch": 0.2873752330810574, "grad_norm": 1.8835762739181519, "learning_rate": 3.7507376523412447e-06, "loss": 0.8715, "step": 23580 }, { "epoch": 0.28743616930520516, "grad_norm": 2.081885576248169, "learning_rate": 3.7504169339320077e-06, "loss": 0.8389, "step": 23585 }, { "epoch": 0.287497105529353, "grad_norm": 1.9703760147094727, "learning_rate": 3.7500962155227716e-06, "loss": 0.8722, "step": 23590 }, { "epoch": 0.2875580417535008, "grad_norm": 2.220104455947876, "learning_rate": 3.7497754971135346e-06, "loss": 0.8533, "step": 23595 }, { "epoch": 0.28761897797764857, "grad_norm": 1.9168068170547485, "learning_rate": 3.749454778704298e-06, "loss": 0.8299, "step": 23600 }, { "epoch": 0.2876799142017964, "grad_norm": 2.125345230102539, "learning_rate": 3.749134060295061e-06, "loss": 0.8393, "step": 23605 }, { "epoch": 0.2877408504259442, "grad_norm": 1.625327706336975, "learning_rate": 3.7488133418858245e-06, "loss": 0.864, "step": 23610 }, { "epoch": 0.28780178665009204, "grad_norm": 1.8181302547454834, "learning_rate": 3.748492623476588e-06, "loss": 0.8385, "step": 23615 }, { "epoch": 0.2878627228742398, "grad_norm": 2.4631667137145996, "learning_rate": 3.748171905067351e-06, "loss": 0.8699, "step": 23620 }, { "epoch": 0.28792365909838763, "grad_norm": 2.1824288368225098, "learning_rate": 3.747851186658115e-06, "loss": 0.9104, "step": 23625 }, { "epoch": 0.28798459532253545, "grad_norm": 2.3859729766845703, "learning_rate": 3.747530468248878e-06, "loss": 0.8864, "step": 23630 }, { "epoch": 0.2880455315466832, "grad_norm": 1.916803002357483, "learning_rate": 3.747209749839641e-06, "loss": 0.9037, "step": 23635 }, { "epoch": 0.28810646777083104, "grad_norm": 2.0308773517608643, "learning_rate": 3.7468890314304047e-06, "loss": 0.865, "step": 23640 }, { "epoch": 0.28816740399497887, "grad_norm": 1.9795763492584229, "learning_rate": 3.7465683130211678e-06, "loss": 0.8289, "step": 23645 }, { "epoch": 0.2882283402191267, "grad_norm": 1.828992247581482, "learning_rate": 3.7462475946119308e-06, "loss": 0.8545, "step": 23650 }, { "epoch": 0.28828927644327446, "grad_norm": 2.019688129425049, "learning_rate": 3.7459268762026946e-06, "loss": 0.8922, "step": 23655 }, { "epoch": 0.2883502126674223, "grad_norm": 2.2501423358917236, "learning_rate": 3.7456061577934577e-06, "loss": 0.8383, "step": 23660 }, { "epoch": 0.2884111488915701, "grad_norm": 2.146401882171631, "learning_rate": 3.7452854393842207e-06, "loss": 0.9415, "step": 23665 }, { "epoch": 0.28847208511571787, "grad_norm": 2.126718282699585, "learning_rate": 3.7449647209749845e-06, "loss": 0.8793, "step": 23670 }, { "epoch": 0.2885330213398657, "grad_norm": 2.3775904178619385, "learning_rate": 3.7446440025657476e-06, "loss": 0.8931, "step": 23675 }, { "epoch": 0.2885939575640135, "grad_norm": 1.9930166006088257, "learning_rate": 3.744323284156511e-06, "loss": 0.8592, "step": 23680 }, { "epoch": 0.28865489378816134, "grad_norm": 1.7474565505981445, "learning_rate": 3.744002565747274e-06, "loss": 0.9048, "step": 23685 }, { "epoch": 0.2887158300123091, "grad_norm": 1.6567353010177612, "learning_rate": 3.7436818473380375e-06, "loss": 0.8918, "step": 23690 }, { "epoch": 0.2887767662364569, "grad_norm": 2.4410622119903564, "learning_rate": 3.743361128928801e-06, "loss": 0.945, "step": 23695 }, { "epoch": 0.28883770246060475, "grad_norm": 2.218698501586914, "learning_rate": 3.743040410519564e-06, "loss": 0.8653, "step": 23700 }, { "epoch": 0.2888986386847525, "grad_norm": 2.104372024536133, "learning_rate": 3.7427196921103278e-06, "loss": 0.9597, "step": 23705 }, { "epoch": 0.28895957490890034, "grad_norm": 1.9266670942306519, "learning_rate": 3.742398973701091e-06, "loss": 0.8605, "step": 23710 }, { "epoch": 0.28902051113304816, "grad_norm": 2.1618285179138184, "learning_rate": 3.742078255291854e-06, "loss": 0.791, "step": 23715 }, { "epoch": 0.289081447357196, "grad_norm": 2.2784230709075928, "learning_rate": 3.7417575368826177e-06, "loss": 0.8478, "step": 23720 }, { "epoch": 0.28914238358134375, "grad_norm": 2.2596774101257324, "learning_rate": 3.7414368184733807e-06, "loss": 0.8816, "step": 23725 }, { "epoch": 0.2892033198054916, "grad_norm": 2.0695900917053223, "learning_rate": 3.7411161000641437e-06, "loss": 0.8856, "step": 23730 }, { "epoch": 0.2892642560296394, "grad_norm": 2.0614843368530273, "learning_rate": 3.7407953816549076e-06, "loss": 0.8529, "step": 23735 }, { "epoch": 0.28932519225378717, "grad_norm": 1.9684251546859741, "learning_rate": 3.7404746632456706e-06, "loss": 0.8812, "step": 23740 }, { "epoch": 0.289386128477935, "grad_norm": 1.7082743644714355, "learning_rate": 3.7401539448364336e-06, "loss": 0.8524, "step": 23745 }, { "epoch": 0.2894470647020828, "grad_norm": 1.855210542678833, "learning_rate": 3.7398332264271975e-06, "loss": 0.8523, "step": 23750 }, { "epoch": 0.2895080009262306, "grad_norm": 1.8660006523132324, "learning_rate": 3.7395125080179605e-06, "loss": 0.8521, "step": 23755 }, { "epoch": 0.2895689371503784, "grad_norm": 1.9718254804611206, "learning_rate": 3.739191789608724e-06, "loss": 0.8801, "step": 23760 }, { "epoch": 0.2896298733745262, "grad_norm": 1.7824040651321411, "learning_rate": 3.738871071199487e-06, "loss": 0.8373, "step": 23765 }, { "epoch": 0.28969080959867405, "grad_norm": 1.9376258850097656, "learning_rate": 3.7385503527902504e-06, "loss": 0.8543, "step": 23770 }, { "epoch": 0.2897517458228218, "grad_norm": 1.9918434619903564, "learning_rate": 3.738229634381014e-06, "loss": 0.8207, "step": 23775 }, { "epoch": 0.28981268204696964, "grad_norm": 1.934059739112854, "learning_rate": 3.737908915971777e-06, "loss": 0.8763, "step": 23780 }, { "epoch": 0.28987361827111746, "grad_norm": 1.7473158836364746, "learning_rate": 3.7375881975625407e-06, "loss": 0.867, "step": 23785 }, { "epoch": 0.28993455449526523, "grad_norm": 2.2491204738616943, "learning_rate": 3.7372674791533037e-06, "loss": 0.9257, "step": 23790 }, { "epoch": 0.28999549071941305, "grad_norm": 1.8570411205291748, "learning_rate": 3.7369467607440667e-06, "loss": 0.822, "step": 23795 }, { "epoch": 0.2900564269435609, "grad_norm": 2.0818614959716797, "learning_rate": 3.7366260423348306e-06, "loss": 0.8222, "step": 23800 }, { "epoch": 0.2901173631677087, "grad_norm": 2.0012686252593994, "learning_rate": 3.7363053239255936e-06, "loss": 0.919, "step": 23805 }, { "epoch": 0.29017829939185646, "grad_norm": 2.0984859466552734, "learning_rate": 3.7359846055163566e-06, "loss": 0.8407, "step": 23810 }, { "epoch": 0.2902392356160043, "grad_norm": 1.876032829284668, "learning_rate": 3.7356638871071205e-06, "loss": 0.9039, "step": 23815 }, { "epoch": 0.2903001718401521, "grad_norm": 2.325638771057129, "learning_rate": 3.7353431686978835e-06, "loss": 0.918, "step": 23820 }, { "epoch": 0.2903611080642999, "grad_norm": 1.8710505962371826, "learning_rate": 3.7350224502886465e-06, "loss": 0.8641, "step": 23825 }, { "epoch": 0.2904220442884477, "grad_norm": 2.1278555393218994, "learning_rate": 3.7347017318794104e-06, "loss": 0.8637, "step": 23830 }, { "epoch": 0.2904829805125955, "grad_norm": 1.8374791145324707, "learning_rate": 3.7343810134701734e-06, "loss": 0.9222, "step": 23835 }, { "epoch": 0.29054391673674335, "grad_norm": 1.7015365362167358, "learning_rate": 3.734060295060937e-06, "loss": 0.8775, "step": 23840 }, { "epoch": 0.2906048529608911, "grad_norm": 1.9237737655639648, "learning_rate": 3.7337395766517e-06, "loss": 0.9126, "step": 23845 }, { "epoch": 0.29066578918503894, "grad_norm": 2.070554256439209, "learning_rate": 3.7334188582424637e-06, "loss": 0.8508, "step": 23850 }, { "epoch": 0.29072672540918676, "grad_norm": 2.2015304565429688, "learning_rate": 3.7330981398332267e-06, "loss": 0.8707, "step": 23855 }, { "epoch": 0.2907876616333345, "grad_norm": 2.166825771331787, "learning_rate": 3.7327774214239898e-06, "loss": 0.8849, "step": 23860 }, { "epoch": 0.29084859785748235, "grad_norm": 2.067211389541626, "learning_rate": 3.7324567030147536e-06, "loss": 0.9475, "step": 23865 }, { "epoch": 0.29090953408163017, "grad_norm": 2.0150463581085205, "learning_rate": 3.7321359846055166e-06, "loss": 0.8421, "step": 23870 }, { "epoch": 0.290970470305778, "grad_norm": 1.8534820079803467, "learning_rate": 3.7318152661962797e-06, "loss": 0.8802, "step": 23875 }, { "epoch": 0.29103140652992576, "grad_norm": 1.8397718667984009, "learning_rate": 3.7314945477870435e-06, "loss": 0.8668, "step": 23880 }, { "epoch": 0.2910923427540736, "grad_norm": 2.3111681938171387, "learning_rate": 3.7311738293778065e-06, "loss": 0.8722, "step": 23885 }, { "epoch": 0.2911532789782214, "grad_norm": 2.527733325958252, "learning_rate": 3.7308531109685696e-06, "loss": 0.7747, "step": 23890 }, { "epoch": 0.2912142152023692, "grad_norm": 1.7474861145019531, "learning_rate": 3.7305323925593334e-06, "loss": 0.8409, "step": 23895 }, { "epoch": 0.291275151426517, "grad_norm": 2.168958902359009, "learning_rate": 3.7302116741500964e-06, "loss": 0.8747, "step": 23900 }, { "epoch": 0.2913360876506648, "grad_norm": 1.9999816417694092, "learning_rate": 3.72989095574086e-06, "loss": 0.9141, "step": 23905 }, { "epoch": 0.29139702387481264, "grad_norm": 2.1512818336486816, "learning_rate": 3.7295702373316233e-06, "loss": 0.91, "step": 23910 }, { "epoch": 0.2914579600989604, "grad_norm": 2.0658650398254395, "learning_rate": 3.7292495189223863e-06, "loss": 0.8761, "step": 23915 }, { "epoch": 0.29151889632310823, "grad_norm": 1.8570740222930908, "learning_rate": 3.7289288005131498e-06, "loss": 0.8663, "step": 23920 }, { "epoch": 0.29157983254725606, "grad_norm": 1.9264750480651855, "learning_rate": 3.7286080821039132e-06, "loss": 0.8231, "step": 23925 }, { "epoch": 0.2916407687714038, "grad_norm": 2.117905616760254, "learning_rate": 3.7282873636946767e-06, "loss": 0.904, "step": 23930 }, { "epoch": 0.29170170499555165, "grad_norm": 1.6644375324249268, "learning_rate": 3.7279666452854397e-06, "loss": 0.8282, "step": 23935 }, { "epoch": 0.29176264121969947, "grad_norm": 2.0089848041534424, "learning_rate": 3.7276459268762027e-06, "loss": 0.8565, "step": 23940 }, { "epoch": 0.2918235774438473, "grad_norm": 1.8934452533721924, "learning_rate": 3.7273252084669666e-06, "loss": 0.8773, "step": 23945 }, { "epoch": 0.29188451366799506, "grad_norm": 1.9225412607192993, "learning_rate": 3.7270044900577296e-06, "loss": 0.8873, "step": 23950 }, { "epoch": 0.2919454498921429, "grad_norm": 1.896965503692627, "learning_rate": 3.7266837716484926e-06, "loss": 0.8571, "step": 23955 }, { "epoch": 0.2920063861162907, "grad_norm": 2.3116109371185303, "learning_rate": 3.7263630532392565e-06, "loss": 0.8344, "step": 23960 }, { "epoch": 0.2920673223404385, "grad_norm": 1.7078009843826294, "learning_rate": 3.7260423348300195e-06, "loss": 0.8731, "step": 23965 }, { "epoch": 0.2921282585645863, "grad_norm": 2.013066053390503, "learning_rate": 3.7257216164207825e-06, "loss": 0.7787, "step": 23970 }, { "epoch": 0.2921891947887341, "grad_norm": 1.8018665313720703, "learning_rate": 3.7254008980115464e-06, "loss": 0.8949, "step": 23975 }, { "epoch": 0.29225013101288194, "grad_norm": 1.8954274654388428, "learning_rate": 3.7250801796023094e-06, "loss": 0.8555, "step": 23980 }, { "epoch": 0.2923110672370297, "grad_norm": 1.6772619485855103, "learning_rate": 3.724759461193073e-06, "loss": 0.902, "step": 23985 }, { "epoch": 0.29237200346117753, "grad_norm": 1.7871105670928955, "learning_rate": 3.7244387427838362e-06, "loss": 0.855, "step": 23990 }, { "epoch": 0.29243293968532535, "grad_norm": 1.8023205995559692, "learning_rate": 3.7241180243745993e-06, "loss": 0.7799, "step": 23995 }, { "epoch": 0.2924938759094731, "grad_norm": 1.880893588066101, "learning_rate": 3.7237973059653627e-06, "loss": 0.8578, "step": 24000 }, { "epoch": 0.29255481213362095, "grad_norm": 1.9476872682571411, "learning_rate": 3.723476587556126e-06, "loss": 0.8582, "step": 24005 }, { "epoch": 0.29261574835776877, "grad_norm": 1.65859055519104, "learning_rate": 3.7231558691468896e-06, "loss": 0.9133, "step": 24010 }, { "epoch": 0.2926766845819166, "grad_norm": 2.054225206375122, "learning_rate": 3.7228351507376526e-06, "loss": 0.8568, "step": 24015 }, { "epoch": 0.29273762080606436, "grad_norm": 1.7122788429260254, "learning_rate": 3.7225144323284156e-06, "loss": 0.8609, "step": 24020 }, { "epoch": 0.2927985570302122, "grad_norm": 1.9465415477752686, "learning_rate": 3.7221937139191795e-06, "loss": 0.9036, "step": 24025 }, { "epoch": 0.29285949325436, "grad_norm": 1.9633280038833618, "learning_rate": 3.7218729955099425e-06, "loss": 0.8094, "step": 24030 }, { "epoch": 0.29292042947850777, "grad_norm": 1.6760057210922241, "learning_rate": 3.7215522771007055e-06, "loss": 0.8939, "step": 24035 }, { "epoch": 0.2929813657026556, "grad_norm": 1.7604315280914307, "learning_rate": 3.7212315586914694e-06, "loss": 0.8725, "step": 24040 }, { "epoch": 0.2930423019268034, "grad_norm": 1.863826036453247, "learning_rate": 3.7209108402822324e-06, "loss": 0.9203, "step": 24045 }, { "epoch": 0.29310323815095124, "grad_norm": 2.265458583831787, "learning_rate": 3.7205901218729954e-06, "loss": 0.8693, "step": 24050 }, { "epoch": 0.293164174375099, "grad_norm": 1.8719816207885742, "learning_rate": 3.7202694034637593e-06, "loss": 0.8409, "step": 24055 }, { "epoch": 0.29322511059924683, "grad_norm": 2.2230982780456543, "learning_rate": 3.7199486850545223e-06, "loss": 0.8541, "step": 24060 }, { "epoch": 0.29328604682339465, "grad_norm": 1.9116404056549072, "learning_rate": 3.7196279666452857e-06, "loss": 0.9238, "step": 24065 }, { "epoch": 0.2933469830475424, "grad_norm": 2.144479751586914, "learning_rate": 3.719307248236049e-06, "loss": 0.7812, "step": 24070 }, { "epoch": 0.29340791927169024, "grad_norm": 1.8700426816940308, "learning_rate": 3.7189865298268126e-06, "loss": 0.8931, "step": 24075 }, { "epoch": 0.29346885549583807, "grad_norm": 1.7231690883636475, "learning_rate": 3.7186658114175756e-06, "loss": 0.8631, "step": 24080 }, { "epoch": 0.2935297917199859, "grad_norm": 1.9282313585281372, "learning_rate": 3.718345093008339e-06, "loss": 0.866, "step": 24085 }, { "epoch": 0.29359072794413366, "grad_norm": 1.7569780349731445, "learning_rate": 3.7180243745991025e-06, "loss": 0.8284, "step": 24090 }, { "epoch": 0.2936516641682815, "grad_norm": 1.9601221084594727, "learning_rate": 3.7177036561898655e-06, "loss": 0.8588, "step": 24095 }, { "epoch": 0.2937126003924293, "grad_norm": 2.183370351791382, "learning_rate": 3.7173829377806285e-06, "loss": 0.8682, "step": 24100 }, { "epoch": 0.29377353661657707, "grad_norm": 2.1571764945983887, "learning_rate": 3.7170622193713924e-06, "loss": 0.884, "step": 24105 }, { "epoch": 0.2938344728407249, "grad_norm": 2.1523401737213135, "learning_rate": 3.7167415009621554e-06, "loss": 0.9027, "step": 24110 }, { "epoch": 0.2938954090648727, "grad_norm": 1.6809881925582886, "learning_rate": 3.7164207825529184e-06, "loss": 0.8411, "step": 24115 }, { "epoch": 0.29395634528902054, "grad_norm": 1.8757914304733276, "learning_rate": 3.7161000641436823e-06, "loss": 0.9235, "step": 24120 }, { "epoch": 0.2940172815131683, "grad_norm": 1.8579641580581665, "learning_rate": 3.7157793457344453e-06, "loss": 0.8915, "step": 24125 }, { "epoch": 0.2940782177373161, "grad_norm": 2.1738693714141846, "learning_rate": 3.7154586273252088e-06, "loss": 0.8566, "step": 24130 }, { "epoch": 0.29413915396146395, "grad_norm": 1.8548208475112915, "learning_rate": 3.715137908915972e-06, "loss": 0.8678, "step": 24135 }, { "epoch": 0.2942000901856117, "grad_norm": 2.3003432750701904, "learning_rate": 3.7148171905067352e-06, "loss": 0.8956, "step": 24140 }, { "epoch": 0.29426102640975954, "grad_norm": 1.754027247428894, "learning_rate": 3.7144964720974987e-06, "loss": 0.9108, "step": 24145 }, { "epoch": 0.29432196263390736, "grad_norm": 1.8736344575881958, "learning_rate": 3.714175753688262e-06, "loss": 0.8624, "step": 24150 }, { "epoch": 0.2943828988580552, "grad_norm": 2.082115411758423, "learning_rate": 3.7138550352790255e-06, "loss": 0.8418, "step": 24155 }, { "epoch": 0.29444383508220295, "grad_norm": 2.5757272243499756, "learning_rate": 3.7135343168697886e-06, "loss": 0.8345, "step": 24160 }, { "epoch": 0.2945047713063508, "grad_norm": 1.7870655059814453, "learning_rate": 3.713213598460552e-06, "loss": 0.8983, "step": 24165 }, { "epoch": 0.2945657075304986, "grad_norm": 1.7590017318725586, "learning_rate": 3.7128928800513154e-06, "loss": 0.8533, "step": 24170 }, { "epoch": 0.29462664375464637, "grad_norm": 1.8597685098648071, "learning_rate": 3.7125721616420785e-06, "loss": 0.8675, "step": 24175 }, { "epoch": 0.2946875799787942, "grad_norm": 2.1297690868377686, "learning_rate": 3.7122514432328423e-06, "loss": 0.8626, "step": 24180 }, { "epoch": 0.294748516202942, "grad_norm": 2.092034339904785, "learning_rate": 3.7119307248236053e-06, "loss": 0.8967, "step": 24185 }, { "epoch": 0.29480945242708984, "grad_norm": 2.043248414993286, "learning_rate": 3.7116100064143684e-06, "loss": 0.917, "step": 24190 }, { "epoch": 0.2948703886512376, "grad_norm": 1.9034415483474731, "learning_rate": 3.7112892880051314e-06, "loss": 0.8256, "step": 24195 }, { "epoch": 0.2949313248753854, "grad_norm": 2.321317434310913, "learning_rate": 3.7109685695958952e-06, "loss": 0.8224, "step": 24200 }, { "epoch": 0.29499226109953325, "grad_norm": 2.0574569702148438, "learning_rate": 3.7106478511866583e-06, "loss": 0.9074, "step": 24205 }, { "epoch": 0.295053197323681, "grad_norm": 2.072449207305908, "learning_rate": 3.7103271327774217e-06, "loss": 0.847, "step": 24210 }, { "epoch": 0.29511413354782884, "grad_norm": 1.994537353515625, "learning_rate": 3.710006414368185e-06, "loss": 0.859, "step": 24215 }, { "epoch": 0.29517506977197666, "grad_norm": 1.9094470739364624, "learning_rate": 3.709685695958948e-06, "loss": 0.8558, "step": 24220 }, { "epoch": 0.29523600599612443, "grad_norm": 1.8432282209396362, "learning_rate": 3.7093649775497116e-06, "loss": 0.8928, "step": 24225 }, { "epoch": 0.29529694222027225, "grad_norm": 2.0963144302368164, "learning_rate": 3.709044259140475e-06, "loss": 0.9238, "step": 24230 }, { "epoch": 0.2953578784444201, "grad_norm": 2.058858633041382, "learning_rate": 3.7087235407312385e-06, "loss": 0.7987, "step": 24235 }, { "epoch": 0.2954188146685679, "grad_norm": 1.8666462898254395, "learning_rate": 3.7084028223220015e-06, "loss": 0.8241, "step": 24240 }, { "epoch": 0.29547975089271566, "grad_norm": 1.8269484043121338, "learning_rate": 3.708082103912765e-06, "loss": 0.8876, "step": 24245 }, { "epoch": 0.2955406871168635, "grad_norm": 1.6599571704864502, "learning_rate": 3.7077613855035284e-06, "loss": 0.8884, "step": 24250 }, { "epoch": 0.2956016233410113, "grad_norm": 1.6476143598556519, "learning_rate": 3.7074406670942914e-06, "loss": 0.8751, "step": 24255 }, { "epoch": 0.2956625595651591, "grad_norm": 2.0314531326293945, "learning_rate": 3.7071199486850552e-06, "loss": 0.9134, "step": 24260 }, { "epoch": 0.2957234957893069, "grad_norm": 1.9437768459320068, "learning_rate": 3.7067992302758183e-06, "loss": 0.8387, "step": 24265 }, { "epoch": 0.2957844320134547, "grad_norm": 1.8195427656173706, "learning_rate": 3.7064785118665813e-06, "loss": 0.851, "step": 24270 }, { "epoch": 0.29584536823760255, "grad_norm": 1.914096713066101, "learning_rate": 3.7061577934573443e-06, "loss": 0.8614, "step": 24275 }, { "epoch": 0.2959063044617503, "grad_norm": 1.9643120765686035, "learning_rate": 3.705837075048108e-06, "loss": 0.8538, "step": 24280 }, { "epoch": 0.29596724068589814, "grad_norm": 2.2056164741516113, "learning_rate": 3.705516356638871e-06, "loss": 0.8235, "step": 24285 }, { "epoch": 0.29602817691004596, "grad_norm": 2.131824254989624, "learning_rate": 3.7051956382296346e-06, "loss": 0.8932, "step": 24290 }, { "epoch": 0.2960891131341937, "grad_norm": 1.9101715087890625, "learning_rate": 3.704874919820398e-06, "loss": 0.8828, "step": 24295 }, { "epoch": 0.29615004935834155, "grad_norm": 1.8495426177978516, "learning_rate": 3.704554201411161e-06, "loss": 0.8536, "step": 24300 }, { "epoch": 0.2962109855824894, "grad_norm": 2.0523953437805176, "learning_rate": 3.7042334830019245e-06, "loss": 0.8704, "step": 24305 }, { "epoch": 0.2962719218066372, "grad_norm": 1.8387346267700195, "learning_rate": 3.703912764592688e-06, "loss": 0.8234, "step": 24310 }, { "epoch": 0.29633285803078496, "grad_norm": 1.9822072982788086, "learning_rate": 3.7035920461834514e-06, "loss": 0.868, "step": 24315 }, { "epoch": 0.2963937942549328, "grad_norm": 1.5616908073425293, "learning_rate": 3.7032713277742144e-06, "loss": 0.8062, "step": 24320 }, { "epoch": 0.2964547304790806, "grad_norm": 2.056706190109253, "learning_rate": 3.7029506093649783e-06, "loss": 0.9107, "step": 24325 }, { "epoch": 0.2965156667032284, "grad_norm": 2.1851136684417725, "learning_rate": 3.7026298909557413e-06, "loss": 0.9064, "step": 24330 }, { "epoch": 0.2965766029273762, "grad_norm": 1.937940001487732, "learning_rate": 3.7023091725465043e-06, "loss": 0.8387, "step": 24335 }, { "epoch": 0.296637539151524, "grad_norm": 1.752456545829773, "learning_rate": 3.701988454137268e-06, "loss": 0.8196, "step": 24340 }, { "epoch": 0.29669847537567184, "grad_norm": 2.3470280170440674, "learning_rate": 3.701667735728031e-06, "loss": 0.9225, "step": 24345 }, { "epoch": 0.2967594115998196, "grad_norm": 2.2497124671936035, "learning_rate": 3.701347017318794e-06, "loss": 0.8187, "step": 24350 }, { "epoch": 0.29682034782396743, "grad_norm": 1.9013999700546265, "learning_rate": 3.7010262989095576e-06, "loss": 0.849, "step": 24355 }, { "epoch": 0.29688128404811526, "grad_norm": 2.5956525802612305, "learning_rate": 3.700705580500321e-06, "loss": 0.8501, "step": 24360 }, { "epoch": 0.296942220272263, "grad_norm": 1.7808492183685303, "learning_rate": 3.700384862091084e-06, "loss": 0.9294, "step": 24365 }, { "epoch": 0.29700315649641085, "grad_norm": 1.859835147857666, "learning_rate": 3.7000641436818475e-06, "loss": 0.8664, "step": 24370 }, { "epoch": 0.29706409272055867, "grad_norm": 2.030008316040039, "learning_rate": 3.699743425272611e-06, "loss": 0.8879, "step": 24375 }, { "epoch": 0.2971250289447065, "grad_norm": 1.8770956993103027, "learning_rate": 3.6994227068633744e-06, "loss": 0.8838, "step": 24380 }, { "epoch": 0.29718596516885426, "grad_norm": 2.141306161880493, "learning_rate": 3.6991019884541374e-06, "loss": 0.9166, "step": 24385 }, { "epoch": 0.2972469013930021, "grad_norm": 2.338160991668701, "learning_rate": 3.698781270044901e-06, "loss": 0.8385, "step": 24390 }, { "epoch": 0.2973078376171499, "grad_norm": 1.9494446516036987, "learning_rate": 3.6984605516356643e-06, "loss": 0.8191, "step": 24395 }, { "epoch": 0.2973687738412977, "grad_norm": 2.358555555343628, "learning_rate": 3.6981398332264273e-06, "loss": 0.7566, "step": 24400 }, { "epoch": 0.2974297100654455, "grad_norm": 1.9095799922943115, "learning_rate": 3.697819114817191e-06, "loss": 0.8783, "step": 24405 }, { "epoch": 0.2974906462895933, "grad_norm": 1.9103833436965942, "learning_rate": 3.6974983964079542e-06, "loss": 0.8425, "step": 24410 }, { "epoch": 0.29755158251374114, "grad_norm": 1.9432560205459595, "learning_rate": 3.6971776779987172e-06, "loss": 0.8462, "step": 24415 }, { "epoch": 0.2976125187378889, "grad_norm": 1.8650847673416138, "learning_rate": 3.696856959589481e-06, "loss": 0.8575, "step": 24420 }, { "epoch": 0.29767345496203673, "grad_norm": 2.0250518321990967, "learning_rate": 3.696536241180244e-06, "loss": 0.8385, "step": 24425 }, { "epoch": 0.29773439118618455, "grad_norm": 2.203570604324341, "learning_rate": 3.696215522771007e-06, "loss": 0.7888, "step": 24430 }, { "epoch": 0.2977953274103323, "grad_norm": 2.424405574798584, "learning_rate": 3.6958948043617706e-06, "loss": 0.8191, "step": 24435 }, { "epoch": 0.29785626363448015, "grad_norm": 1.7653359174728394, "learning_rate": 3.695574085952534e-06, "loss": 0.9171, "step": 24440 }, { "epoch": 0.29791719985862797, "grad_norm": 2.2046310901641846, "learning_rate": 3.695253367543297e-06, "loss": 0.9154, "step": 24445 }, { "epoch": 0.2979781360827758, "grad_norm": 2.12003231048584, "learning_rate": 3.6949326491340605e-06, "loss": 0.9204, "step": 24450 }, { "epoch": 0.29803907230692356, "grad_norm": 1.7972007989883423, "learning_rate": 3.694611930724824e-06, "loss": 0.7893, "step": 24455 }, { "epoch": 0.2981000085310714, "grad_norm": 2.0598490238189697, "learning_rate": 3.6942912123155874e-06, "loss": 0.8832, "step": 24460 }, { "epoch": 0.2981609447552192, "grad_norm": 2.167206287384033, "learning_rate": 3.6939704939063504e-06, "loss": 0.8493, "step": 24465 }, { "epoch": 0.29822188097936697, "grad_norm": 2.023533821105957, "learning_rate": 3.693649775497114e-06, "loss": 0.8504, "step": 24470 }, { "epoch": 0.2982828172035148, "grad_norm": 1.9600433111190796, "learning_rate": 3.6933290570878773e-06, "loss": 0.7904, "step": 24475 }, { "epoch": 0.2983437534276626, "grad_norm": 2.0533416271209717, "learning_rate": 3.6930083386786403e-06, "loss": 0.8973, "step": 24480 }, { "epoch": 0.29840468965181044, "grad_norm": 1.9068235158920288, "learning_rate": 3.692687620269404e-06, "loss": 0.9213, "step": 24485 }, { "epoch": 0.2984656258759582, "grad_norm": 2.2082998752593994, "learning_rate": 3.692366901860167e-06, "loss": 0.9735, "step": 24490 }, { "epoch": 0.29852656210010603, "grad_norm": 1.9890713691711426, "learning_rate": 3.69204618345093e-06, "loss": 0.8116, "step": 24495 }, { "epoch": 0.29858749832425385, "grad_norm": 2.33475923538208, "learning_rate": 3.691725465041694e-06, "loss": 0.8396, "step": 24500 }, { "epoch": 0.2986484345484016, "grad_norm": 1.9590353965759277, "learning_rate": 3.691404746632457e-06, "loss": 0.8921, "step": 24505 }, { "epoch": 0.29870937077254944, "grad_norm": 1.8195658922195435, "learning_rate": 3.69108402822322e-06, "loss": 0.8721, "step": 24510 }, { "epoch": 0.29877030699669727, "grad_norm": 1.986795425415039, "learning_rate": 3.690763309813984e-06, "loss": 0.886, "step": 24515 }, { "epoch": 0.2988312432208451, "grad_norm": 1.9900003671646118, "learning_rate": 3.690442591404747e-06, "loss": 0.8871, "step": 24520 }, { "epoch": 0.29889217944499286, "grad_norm": 2.3048999309539795, "learning_rate": 3.69012187299551e-06, "loss": 0.8941, "step": 24525 }, { "epoch": 0.2989531156691407, "grad_norm": 1.8541091680526733, "learning_rate": 3.6898011545862734e-06, "loss": 0.7504, "step": 24530 }, { "epoch": 0.2990140518932885, "grad_norm": 1.9210789203643799, "learning_rate": 3.689480436177037e-06, "loss": 0.8239, "step": 24535 }, { "epoch": 0.29907498811743627, "grad_norm": 2.4009737968444824, "learning_rate": 3.6891597177678003e-06, "loss": 0.8965, "step": 24540 }, { "epoch": 0.2991359243415841, "grad_norm": 1.8016350269317627, "learning_rate": 3.6888389993585633e-06, "loss": 0.8531, "step": 24545 }, { "epoch": 0.2991968605657319, "grad_norm": 2.1812219619750977, "learning_rate": 3.688518280949327e-06, "loss": 0.9426, "step": 24550 }, { "epoch": 0.29925779678987974, "grad_norm": 2.126905918121338, "learning_rate": 3.68819756254009e-06, "loss": 0.8497, "step": 24555 }, { "epoch": 0.2993187330140275, "grad_norm": 1.6792725324630737, "learning_rate": 3.687876844130853e-06, "loss": 0.8183, "step": 24560 }, { "epoch": 0.2993796692381753, "grad_norm": 1.9945412874221802, "learning_rate": 3.687556125721617e-06, "loss": 0.8204, "step": 24565 }, { "epoch": 0.29944060546232315, "grad_norm": 1.7950278520584106, "learning_rate": 3.68723540731238e-06, "loss": 0.9039, "step": 24570 }, { "epoch": 0.2995015416864709, "grad_norm": 2.1860337257385254, "learning_rate": 3.686914688903143e-06, "loss": 0.8747, "step": 24575 }, { "epoch": 0.29956247791061874, "grad_norm": 2.0802865028381348, "learning_rate": 3.686593970493907e-06, "loss": 0.9184, "step": 24580 }, { "epoch": 0.29962341413476656, "grad_norm": 1.866463303565979, "learning_rate": 3.68627325208467e-06, "loss": 0.9176, "step": 24585 }, { "epoch": 0.2996843503589144, "grad_norm": 1.7080161571502686, "learning_rate": 3.685952533675433e-06, "loss": 0.8598, "step": 24590 }, { "epoch": 0.29974528658306215, "grad_norm": 2.0086545944213867, "learning_rate": 3.685631815266197e-06, "loss": 0.8858, "step": 24595 }, { "epoch": 0.29980622280721, "grad_norm": 2.0600297451019287, "learning_rate": 3.68531109685696e-06, "loss": 0.9578, "step": 24600 }, { "epoch": 0.2998671590313578, "grad_norm": 2.219480276107788, "learning_rate": 3.6849903784477233e-06, "loss": 0.8685, "step": 24605 }, { "epoch": 0.29992809525550557, "grad_norm": 1.8708842992782593, "learning_rate": 3.6846696600384863e-06, "loss": 0.7988, "step": 24610 }, { "epoch": 0.2999890314796534, "grad_norm": 1.8608559370040894, "learning_rate": 3.6843489416292498e-06, "loss": 0.8591, "step": 24615 }, { "epoch": 0.3000499677038012, "grad_norm": 2.084683656692505, "learning_rate": 3.684028223220013e-06, "loss": 0.926, "step": 24620 }, { "epoch": 0.30011090392794904, "grad_norm": 1.813871145248413, "learning_rate": 3.6837075048107762e-06, "loss": 0.8618, "step": 24625 }, { "epoch": 0.3001718401520968, "grad_norm": 1.7922157049179077, "learning_rate": 3.68338678640154e-06, "loss": 0.8157, "step": 24630 }, { "epoch": 0.3002327763762446, "grad_norm": 2.1480517387390137, "learning_rate": 3.683066067992303e-06, "loss": 0.8655, "step": 24635 }, { "epoch": 0.30029371260039245, "grad_norm": 1.8416235446929932, "learning_rate": 3.682745349583066e-06, "loss": 0.8958, "step": 24640 }, { "epoch": 0.3003546488245402, "grad_norm": 1.952636480331421, "learning_rate": 3.68242463117383e-06, "loss": 0.9233, "step": 24645 }, { "epoch": 0.30041558504868804, "grad_norm": 2.03051495552063, "learning_rate": 3.682103912764593e-06, "loss": 0.822, "step": 24650 }, { "epoch": 0.30047652127283586, "grad_norm": 1.8594300746917725, "learning_rate": 3.681783194355356e-06, "loss": 0.8951, "step": 24655 }, { "epoch": 0.3005374574969837, "grad_norm": 1.9348692893981934, "learning_rate": 3.68146247594612e-06, "loss": 0.8722, "step": 24660 }, { "epoch": 0.30059839372113145, "grad_norm": 1.960455060005188, "learning_rate": 3.681141757536883e-06, "loss": 0.8542, "step": 24665 }, { "epoch": 0.3006593299452793, "grad_norm": 1.892232894897461, "learning_rate": 3.680821039127646e-06, "loss": 0.9057, "step": 24670 }, { "epoch": 0.3007202661694271, "grad_norm": 2.0310261249542236, "learning_rate": 3.6805003207184098e-06, "loss": 0.8984, "step": 24675 }, { "epoch": 0.30078120239357486, "grad_norm": 1.6869663000106812, "learning_rate": 3.680179602309173e-06, "loss": 0.9088, "step": 24680 }, { "epoch": 0.3008421386177227, "grad_norm": 1.6356170177459717, "learning_rate": 3.6798588838999362e-06, "loss": 0.8347, "step": 24685 }, { "epoch": 0.3009030748418705, "grad_norm": 2.1280171871185303, "learning_rate": 3.6795381654906993e-06, "loss": 0.9572, "step": 24690 }, { "epoch": 0.3009640110660183, "grad_norm": 1.9774856567382812, "learning_rate": 3.6792174470814627e-06, "loss": 0.8009, "step": 24695 }, { "epoch": 0.3010249472901661, "grad_norm": 1.9021674394607544, "learning_rate": 3.678896728672226e-06, "loss": 0.868, "step": 24700 }, { "epoch": 0.3010858835143139, "grad_norm": 1.9544881582260132, "learning_rate": 3.678576010262989e-06, "loss": 0.858, "step": 24705 }, { "epoch": 0.30114681973846175, "grad_norm": 1.8325611352920532, "learning_rate": 3.678255291853753e-06, "loss": 0.8706, "step": 24710 }, { "epoch": 0.3012077559626095, "grad_norm": 2.084503173828125, "learning_rate": 3.677934573444516e-06, "loss": 0.935, "step": 24715 }, { "epoch": 0.30126869218675734, "grad_norm": 2.2691550254821777, "learning_rate": 3.677613855035279e-06, "loss": 0.9173, "step": 24720 }, { "epoch": 0.30132962841090516, "grad_norm": 2.0435304641723633, "learning_rate": 3.677293136626043e-06, "loss": 0.8554, "step": 24725 }, { "epoch": 0.3013905646350529, "grad_norm": 1.677511215209961, "learning_rate": 3.676972418216806e-06, "loss": 0.9, "step": 24730 }, { "epoch": 0.30145150085920075, "grad_norm": 2.053370237350464, "learning_rate": 3.676651699807569e-06, "loss": 0.8509, "step": 24735 }, { "epoch": 0.3015124370833486, "grad_norm": 1.8400570154190063, "learning_rate": 3.676330981398333e-06, "loss": 0.8765, "step": 24740 }, { "epoch": 0.3015733733074964, "grad_norm": 1.9332208633422852, "learning_rate": 3.676010262989096e-06, "loss": 0.9064, "step": 24745 }, { "epoch": 0.30163430953164416, "grad_norm": 2.147526979446411, "learning_rate": 3.675689544579859e-06, "loss": 0.866, "step": 24750 }, { "epoch": 0.301695245755792, "grad_norm": 2.1697325706481934, "learning_rate": 3.6753688261706227e-06, "loss": 0.8561, "step": 24755 }, { "epoch": 0.3017561819799398, "grad_norm": 2.483710289001465, "learning_rate": 3.6750481077613857e-06, "loss": 0.8504, "step": 24760 }, { "epoch": 0.3018171182040876, "grad_norm": 1.8515691757202148, "learning_rate": 3.674727389352149e-06, "loss": 0.8758, "step": 24765 }, { "epoch": 0.3018780544282354, "grad_norm": 1.9246113300323486, "learning_rate": 3.6744066709429126e-06, "loss": 0.8705, "step": 24770 }, { "epoch": 0.3019389906523832, "grad_norm": 1.8778002262115479, "learning_rate": 3.674085952533676e-06, "loss": 0.8401, "step": 24775 }, { "epoch": 0.30199992687653104, "grad_norm": 1.8372076749801636, "learning_rate": 3.673765234124439e-06, "loss": 0.848, "step": 24780 }, { "epoch": 0.3020608631006788, "grad_norm": 2.1031241416931152, "learning_rate": 3.673444515715202e-06, "loss": 0.8891, "step": 24785 }, { "epoch": 0.30212179932482663, "grad_norm": 1.975243091583252, "learning_rate": 3.673123797305966e-06, "loss": 0.8113, "step": 24790 }, { "epoch": 0.30218273554897446, "grad_norm": 1.9380314350128174, "learning_rate": 3.672803078896729e-06, "loss": 0.96, "step": 24795 }, { "epoch": 0.3022436717731222, "grad_norm": 1.997414231300354, "learning_rate": 3.672482360487492e-06, "loss": 0.8508, "step": 24800 }, { "epoch": 0.30230460799727005, "grad_norm": 2.188924789428711, "learning_rate": 3.672161642078256e-06, "loss": 0.8628, "step": 24805 }, { "epoch": 0.30236554422141787, "grad_norm": 2.364666700363159, "learning_rate": 3.671840923669019e-06, "loss": 0.9325, "step": 24810 }, { "epoch": 0.3024264804455657, "grad_norm": 1.7797799110412598, "learning_rate": 3.671520205259782e-06, "loss": 0.866, "step": 24815 }, { "epoch": 0.30248741666971346, "grad_norm": 2.2466518878936768, "learning_rate": 3.6711994868505457e-06, "loss": 0.8195, "step": 24820 }, { "epoch": 0.3025483528938613, "grad_norm": 1.9814244508743286, "learning_rate": 3.6708787684413088e-06, "loss": 0.824, "step": 24825 }, { "epoch": 0.3026092891180091, "grad_norm": 1.728085994720459, "learning_rate": 3.670558050032072e-06, "loss": 0.8121, "step": 24830 }, { "epoch": 0.3026702253421569, "grad_norm": 1.6568536758422852, "learning_rate": 3.6702373316228356e-06, "loss": 0.9005, "step": 24835 }, { "epoch": 0.3027311615663047, "grad_norm": 2.4039647579193115, "learning_rate": 3.6699166132135987e-06, "loss": 0.8733, "step": 24840 }, { "epoch": 0.3027920977904525, "grad_norm": 2.153364419937134, "learning_rate": 3.669595894804362e-06, "loss": 0.8516, "step": 24845 }, { "epoch": 0.30285303401460034, "grad_norm": 1.934098243713379, "learning_rate": 3.6692751763951255e-06, "loss": 0.94, "step": 24850 }, { "epoch": 0.3029139702387481, "grad_norm": 1.6672676801681519, "learning_rate": 3.668954457985889e-06, "loss": 0.855, "step": 24855 }, { "epoch": 0.30297490646289593, "grad_norm": 1.9161192178726196, "learning_rate": 3.668633739576652e-06, "loss": 0.8101, "step": 24860 }, { "epoch": 0.30303584268704375, "grad_norm": 1.7032859325408936, "learning_rate": 3.668313021167415e-06, "loss": 0.7855, "step": 24865 }, { "epoch": 0.3030967789111915, "grad_norm": 1.9140539169311523, "learning_rate": 3.667992302758179e-06, "loss": 0.91, "step": 24870 }, { "epoch": 0.30315771513533935, "grad_norm": 1.810839295387268, "learning_rate": 3.667671584348942e-06, "loss": 0.8432, "step": 24875 }, { "epoch": 0.30321865135948717, "grad_norm": 2.055367946624756, "learning_rate": 3.667350865939705e-06, "loss": 0.8716, "step": 24880 }, { "epoch": 0.303279587583635, "grad_norm": 1.9085537195205688, "learning_rate": 3.6670301475304688e-06, "loss": 0.9066, "step": 24885 }, { "epoch": 0.30334052380778276, "grad_norm": 1.7920372486114502, "learning_rate": 3.6667094291212318e-06, "loss": 0.8123, "step": 24890 }, { "epoch": 0.3034014600319306, "grad_norm": 3.02215576171875, "learning_rate": 3.666388710711995e-06, "loss": 0.875, "step": 24895 }, { "epoch": 0.3034623962560784, "grad_norm": 1.8673001527786255, "learning_rate": 3.6660679923027587e-06, "loss": 0.8734, "step": 24900 }, { "epoch": 0.30352333248022617, "grad_norm": 2.0035290718078613, "learning_rate": 3.6657472738935217e-06, "loss": 0.8415, "step": 24905 }, { "epoch": 0.303584268704374, "grad_norm": 2.101656675338745, "learning_rate": 3.665426555484285e-06, "loss": 0.8953, "step": 24910 }, { "epoch": 0.3036452049285218, "grad_norm": 2.022209405899048, "learning_rate": 3.6651058370750486e-06, "loss": 0.8396, "step": 24915 }, { "epoch": 0.30370614115266964, "grad_norm": 1.980158805847168, "learning_rate": 3.6647851186658116e-06, "loss": 0.8297, "step": 24920 }, { "epoch": 0.3037670773768174, "grad_norm": 2.173672914505005, "learning_rate": 3.664464400256575e-06, "loss": 0.873, "step": 24925 }, { "epoch": 0.30382801360096523, "grad_norm": 2.259282112121582, "learning_rate": 3.6641436818473385e-06, "loss": 0.8059, "step": 24930 }, { "epoch": 0.30388894982511305, "grad_norm": 1.9072809219360352, "learning_rate": 3.663822963438102e-06, "loss": 0.9133, "step": 24935 }, { "epoch": 0.3039498860492608, "grad_norm": 1.7728019952774048, "learning_rate": 3.663502245028865e-06, "loss": 0.8445, "step": 24940 }, { "epoch": 0.30401082227340864, "grad_norm": 2.060488224029541, "learning_rate": 3.663181526619628e-06, "loss": 0.8774, "step": 24945 }, { "epoch": 0.30407175849755647, "grad_norm": 2.039393186569214, "learning_rate": 3.662860808210392e-06, "loss": 0.9213, "step": 24950 }, { "epoch": 0.3041326947217043, "grad_norm": 1.7570133209228516, "learning_rate": 3.662540089801155e-06, "loss": 0.8742, "step": 24955 }, { "epoch": 0.30419363094585206, "grad_norm": 1.80900239944458, "learning_rate": 3.662219371391918e-06, "loss": 0.7871, "step": 24960 }, { "epoch": 0.3042545671699999, "grad_norm": 1.8715285062789917, "learning_rate": 3.6618986529826817e-06, "loss": 0.8662, "step": 24965 }, { "epoch": 0.3043155033941477, "grad_norm": 2.0206239223480225, "learning_rate": 3.6615779345734447e-06, "loss": 0.9237, "step": 24970 }, { "epoch": 0.30437643961829547, "grad_norm": 2.108306884765625, "learning_rate": 3.6612572161642077e-06, "loss": 0.9281, "step": 24975 }, { "epoch": 0.3044373758424433, "grad_norm": 1.9200788736343384, "learning_rate": 3.6609364977549716e-06, "loss": 0.9414, "step": 24980 }, { "epoch": 0.3044983120665911, "grad_norm": 1.8108810186386108, "learning_rate": 3.6606157793457346e-06, "loss": 0.8169, "step": 24985 }, { "epoch": 0.30455924829073894, "grad_norm": 2.073345184326172, "learning_rate": 3.660295060936498e-06, "loss": 0.9437, "step": 24990 }, { "epoch": 0.3046201845148867, "grad_norm": 2.317498207092285, "learning_rate": 3.6599743425272615e-06, "loss": 0.8761, "step": 24995 }, { "epoch": 0.3046811207390345, "grad_norm": 1.7471439838409424, "learning_rate": 3.6596536241180245e-06, "loss": 0.8016, "step": 25000 }, { "epoch": 0.30474205696318235, "grad_norm": 1.9498569965362549, "learning_rate": 3.659332905708788e-06, "loss": 0.8787, "step": 25005 }, { "epoch": 0.3048029931873301, "grad_norm": 1.9270694255828857, "learning_rate": 3.6590121872995514e-06, "loss": 0.8641, "step": 25010 }, { "epoch": 0.30486392941147794, "grad_norm": 2.6195075511932373, "learning_rate": 3.658691468890315e-06, "loss": 0.8772, "step": 25015 }, { "epoch": 0.30492486563562576, "grad_norm": 1.9504798650741577, "learning_rate": 3.658370750481078e-06, "loss": 0.8726, "step": 25020 }, { "epoch": 0.3049858018597736, "grad_norm": 1.896255612373352, "learning_rate": 3.658050032071841e-06, "loss": 0.8624, "step": 25025 }, { "epoch": 0.30504673808392135, "grad_norm": 1.6974509954452515, "learning_rate": 3.6577293136626047e-06, "loss": 0.8893, "step": 25030 }, { "epoch": 0.3051076743080692, "grad_norm": 1.921694278717041, "learning_rate": 3.6574085952533677e-06, "loss": 0.8364, "step": 25035 }, { "epoch": 0.305168610532217, "grad_norm": 1.9709309339523315, "learning_rate": 3.6570878768441308e-06, "loss": 0.8271, "step": 25040 }, { "epoch": 0.30522954675636477, "grad_norm": 2.0802624225616455, "learning_rate": 3.6567671584348946e-06, "loss": 0.895, "step": 25045 }, { "epoch": 0.3052904829805126, "grad_norm": 1.8558024168014526, "learning_rate": 3.6564464400256576e-06, "loss": 0.8766, "step": 25050 }, { "epoch": 0.3053514192046604, "grad_norm": 14.384215354919434, "learning_rate": 3.656125721616421e-06, "loss": 0.8752, "step": 25055 }, { "epoch": 0.30541235542880824, "grad_norm": 6.215162754058838, "learning_rate": 3.6558050032071845e-06, "loss": 0.8856, "step": 25060 }, { "epoch": 0.305473291652956, "grad_norm": 1.7719573974609375, "learning_rate": 3.6554842847979475e-06, "loss": 0.8361, "step": 25065 }, { "epoch": 0.3055342278771038, "grad_norm": 2.042631149291992, "learning_rate": 3.655163566388711e-06, "loss": 0.8674, "step": 25070 }, { "epoch": 0.30559516410125165, "grad_norm": 1.9620323181152344, "learning_rate": 3.6548428479794744e-06, "loss": 0.862, "step": 25075 }, { "epoch": 0.3056561003253994, "grad_norm": 2.1065971851348877, "learning_rate": 3.654522129570238e-06, "loss": 0.8567, "step": 25080 }, { "epoch": 0.30571703654954724, "grad_norm": 2.4260687828063965, "learning_rate": 3.654201411161001e-06, "loss": 0.8774, "step": 25085 }, { "epoch": 0.30577797277369506, "grad_norm": 1.9024910926818848, "learning_rate": 3.6538806927517643e-06, "loss": 0.8668, "step": 25090 }, { "epoch": 0.3058389089978429, "grad_norm": 2.046088218688965, "learning_rate": 3.6535599743425278e-06, "loss": 0.9114, "step": 25095 }, { "epoch": 0.30589984522199065, "grad_norm": 2.2866156101226807, "learning_rate": 3.6532392559332908e-06, "loss": 0.8325, "step": 25100 }, { "epoch": 0.3059607814461385, "grad_norm": 1.9060673713684082, "learning_rate": 3.6529185375240546e-06, "loss": 0.9114, "step": 25105 }, { "epoch": 0.3060217176702863, "grad_norm": 2.1783576011657715, "learning_rate": 3.6525978191148177e-06, "loss": 0.8516, "step": 25110 }, { "epoch": 0.30608265389443406, "grad_norm": 1.9841864109039307, "learning_rate": 3.6522771007055807e-06, "loss": 0.892, "step": 25115 }, { "epoch": 0.3061435901185819, "grad_norm": 1.9607917070388794, "learning_rate": 3.6519563822963437e-06, "loss": 0.8969, "step": 25120 }, { "epoch": 0.3062045263427297, "grad_norm": 1.860132098197937, "learning_rate": 3.6516356638871075e-06, "loss": 0.8759, "step": 25125 }, { "epoch": 0.30626546256687753, "grad_norm": 1.596680998802185, "learning_rate": 3.6513149454778706e-06, "loss": 0.8263, "step": 25130 }, { "epoch": 0.3063263987910253, "grad_norm": 1.9522285461425781, "learning_rate": 3.650994227068634e-06, "loss": 0.8665, "step": 25135 }, { "epoch": 0.3063873350151731, "grad_norm": 2.5222420692443848, "learning_rate": 3.6506735086593974e-06, "loss": 0.8327, "step": 25140 }, { "epoch": 0.30644827123932095, "grad_norm": 1.8504290580749512, "learning_rate": 3.6503527902501605e-06, "loss": 0.7903, "step": 25145 }, { "epoch": 0.3065092074634687, "grad_norm": 1.709066390991211, "learning_rate": 3.650032071840924e-06, "loss": 0.9215, "step": 25150 }, { "epoch": 0.30657014368761654, "grad_norm": 1.8488945960998535, "learning_rate": 3.6497113534316873e-06, "loss": 0.8841, "step": 25155 }, { "epoch": 0.30663107991176436, "grad_norm": 1.9210032224655151, "learning_rate": 3.6493906350224508e-06, "loss": 0.8166, "step": 25160 }, { "epoch": 0.3066920161359121, "grad_norm": 1.829688549041748, "learning_rate": 3.649069916613214e-06, "loss": 0.8385, "step": 25165 }, { "epoch": 0.30675295236005995, "grad_norm": 2.0516433715820312, "learning_rate": 3.6487491982039772e-06, "loss": 0.8785, "step": 25170 }, { "epoch": 0.3068138885842078, "grad_norm": 1.864325761795044, "learning_rate": 3.6484284797947407e-06, "loss": 0.8956, "step": 25175 }, { "epoch": 0.3068748248083556, "grad_norm": 2.2896924018859863, "learning_rate": 3.6481077613855037e-06, "loss": 0.8723, "step": 25180 }, { "epoch": 0.30693576103250336, "grad_norm": 2.1995151042938232, "learning_rate": 3.6477870429762676e-06, "loss": 0.8441, "step": 25185 }, { "epoch": 0.3069966972566512, "grad_norm": 2.1419081687927246, "learning_rate": 3.6474663245670306e-06, "loss": 0.867, "step": 25190 }, { "epoch": 0.307057633480799, "grad_norm": 1.9020140171051025, "learning_rate": 3.6471456061577936e-06, "loss": 0.8722, "step": 25195 }, { "epoch": 0.3071185697049468, "grad_norm": 1.66928231716156, "learning_rate": 3.6468248877485566e-06, "loss": 0.8683, "step": 25200 }, { "epoch": 0.3071795059290946, "grad_norm": 1.7422627210617065, "learning_rate": 3.6465041693393205e-06, "loss": 0.8456, "step": 25205 }, { "epoch": 0.3072404421532424, "grad_norm": 2.0559444427490234, "learning_rate": 3.6461834509300835e-06, "loss": 0.8615, "step": 25210 }, { "epoch": 0.30730137837739024, "grad_norm": 1.7229351997375488, "learning_rate": 3.645862732520847e-06, "loss": 0.8391, "step": 25215 }, { "epoch": 0.307362314601538, "grad_norm": 2.0694735050201416, "learning_rate": 3.6455420141116104e-06, "loss": 0.8317, "step": 25220 }, { "epoch": 0.30742325082568583, "grad_norm": 2.12626051902771, "learning_rate": 3.6452212957023734e-06, "loss": 0.8552, "step": 25225 }, { "epoch": 0.30748418704983366, "grad_norm": 2.410922050476074, "learning_rate": 3.644900577293137e-06, "loss": 0.8379, "step": 25230 }, { "epoch": 0.3075451232739814, "grad_norm": 1.9303338527679443, "learning_rate": 3.6445798588839003e-06, "loss": 0.9121, "step": 25235 }, { "epoch": 0.30760605949812925, "grad_norm": 1.8925873041152954, "learning_rate": 3.6442591404746637e-06, "loss": 0.8067, "step": 25240 }, { "epoch": 0.30766699572227707, "grad_norm": 2.0311520099639893, "learning_rate": 3.6439384220654267e-06, "loss": 0.8837, "step": 25245 }, { "epoch": 0.3077279319464249, "grad_norm": 1.890990138053894, "learning_rate": 3.6436177036561906e-06, "loss": 0.8399, "step": 25250 }, { "epoch": 0.30778886817057266, "grad_norm": 1.8311809301376343, "learning_rate": 3.6432969852469536e-06, "loss": 0.8816, "step": 25255 }, { "epoch": 0.3078498043947205, "grad_norm": 1.8908841609954834, "learning_rate": 3.6429762668377166e-06, "loss": 0.8585, "step": 25260 }, { "epoch": 0.3079107406188683, "grad_norm": 2.0679750442504883, "learning_rate": 3.6426555484284805e-06, "loss": 0.7829, "step": 25265 }, { "epoch": 0.3079716768430161, "grad_norm": 1.9769290685653687, "learning_rate": 3.6423348300192435e-06, "loss": 0.8847, "step": 25270 }, { "epoch": 0.3080326130671639, "grad_norm": 1.8952717781066895, "learning_rate": 3.6420141116100065e-06, "loss": 0.9505, "step": 25275 }, { "epoch": 0.3080935492913117, "grad_norm": 1.8137359619140625, "learning_rate": 3.64169339320077e-06, "loss": 0.8518, "step": 25280 }, { "epoch": 0.30815448551545954, "grad_norm": 2.669140338897705, "learning_rate": 3.6413726747915334e-06, "loss": 0.8654, "step": 25285 }, { "epoch": 0.3082154217396073, "grad_norm": 1.9526435136795044, "learning_rate": 3.6410519563822964e-06, "loss": 0.9003, "step": 25290 }, { "epoch": 0.30827635796375513, "grad_norm": 1.911744236946106, "learning_rate": 3.64073123797306e-06, "loss": 0.8325, "step": 25295 }, { "epoch": 0.30833729418790295, "grad_norm": 1.7029746770858765, "learning_rate": 3.6404105195638233e-06, "loss": 0.8631, "step": 25300 }, { "epoch": 0.3083982304120507, "grad_norm": 1.8786232471466064, "learning_rate": 3.6400898011545867e-06, "loss": 0.8564, "step": 25305 }, { "epoch": 0.30845916663619855, "grad_norm": 2.015846014022827, "learning_rate": 3.6397690827453498e-06, "loss": 0.8889, "step": 25310 }, { "epoch": 0.30852010286034637, "grad_norm": 1.8728102445602417, "learning_rate": 3.639448364336113e-06, "loss": 0.8327, "step": 25315 }, { "epoch": 0.3085810390844942, "grad_norm": 1.9084008932113647, "learning_rate": 3.6391276459268766e-06, "loss": 0.9139, "step": 25320 }, { "epoch": 0.30864197530864196, "grad_norm": 2.5012714862823486, "learning_rate": 3.6388069275176397e-06, "loss": 0.8854, "step": 25325 }, { "epoch": 0.3087029115327898, "grad_norm": 2.582392692565918, "learning_rate": 3.6384862091084035e-06, "loss": 0.9155, "step": 25330 }, { "epoch": 0.3087638477569376, "grad_norm": 2.024050235748291, "learning_rate": 3.6381654906991665e-06, "loss": 0.8206, "step": 25335 }, { "epoch": 0.30882478398108537, "grad_norm": 1.9064695835113525, "learning_rate": 3.6378447722899296e-06, "loss": 0.9561, "step": 25340 }, { "epoch": 0.3088857202052332, "grad_norm": 2.348275899887085, "learning_rate": 3.6375240538806934e-06, "loss": 0.806, "step": 25345 }, { "epoch": 0.308946656429381, "grad_norm": 2.0117311477661133, "learning_rate": 3.6372033354714564e-06, "loss": 0.8253, "step": 25350 }, { "epoch": 0.30900759265352884, "grad_norm": 1.9177279472351074, "learning_rate": 3.6368826170622195e-06, "loss": 0.9275, "step": 25355 }, { "epoch": 0.3090685288776766, "grad_norm": 1.7616961002349854, "learning_rate": 3.636561898652983e-06, "loss": 0.8673, "step": 25360 }, { "epoch": 0.30912946510182443, "grad_norm": 1.7663097381591797, "learning_rate": 3.6362411802437463e-06, "loss": 0.8229, "step": 25365 }, { "epoch": 0.30919040132597225, "grad_norm": 2.7574241161346436, "learning_rate": 3.6359204618345093e-06, "loss": 0.8781, "step": 25370 }, { "epoch": 0.30925133755012, "grad_norm": 1.940828800201416, "learning_rate": 3.6355997434252728e-06, "loss": 0.9742, "step": 25375 }, { "epoch": 0.30931227377426784, "grad_norm": 1.8323854207992554, "learning_rate": 3.6352790250160362e-06, "loss": 0.905, "step": 25380 }, { "epoch": 0.30937320999841567, "grad_norm": 1.7942514419555664, "learning_rate": 3.6349583066067997e-06, "loss": 0.7913, "step": 25385 }, { "epoch": 0.3094341462225635, "grad_norm": 1.9265570640563965, "learning_rate": 3.6346375881975627e-06, "loss": 0.8412, "step": 25390 }, { "epoch": 0.30949508244671126, "grad_norm": 1.8442877531051636, "learning_rate": 3.634316869788326e-06, "loss": 0.8565, "step": 25395 }, { "epoch": 0.3095560186708591, "grad_norm": 2.4583771228790283, "learning_rate": 3.6339961513790896e-06, "loss": 0.8936, "step": 25400 }, { "epoch": 0.3096169548950069, "grad_norm": 2.0579397678375244, "learning_rate": 3.6336754329698526e-06, "loss": 0.9464, "step": 25405 }, { "epoch": 0.30967789111915467, "grad_norm": 2.038320541381836, "learning_rate": 3.6333547145606164e-06, "loss": 0.9047, "step": 25410 }, { "epoch": 0.3097388273433025, "grad_norm": 1.7114115953445435, "learning_rate": 3.6330339961513795e-06, "loss": 0.883, "step": 25415 }, { "epoch": 0.3097997635674503, "grad_norm": 1.9951192140579224, "learning_rate": 3.6327132777421425e-06, "loss": 0.8651, "step": 25420 }, { "epoch": 0.30986069979159814, "grad_norm": 2.365809440612793, "learning_rate": 3.6323925593329063e-06, "loss": 0.9467, "step": 25425 }, { "epoch": 0.3099216360157459, "grad_norm": 1.7156394720077515, "learning_rate": 3.6320718409236694e-06, "loss": 0.888, "step": 25430 }, { "epoch": 0.30998257223989373, "grad_norm": 2.463848352432251, "learning_rate": 3.6317511225144324e-06, "loss": 0.9329, "step": 25435 }, { "epoch": 0.31004350846404155, "grad_norm": 1.9188191890716553, "learning_rate": 3.6314304041051962e-06, "loss": 0.8741, "step": 25440 }, { "epoch": 0.3101044446881893, "grad_norm": 2.3074774742126465, "learning_rate": 3.6311096856959593e-06, "loss": 0.7798, "step": 25445 }, { "epoch": 0.31016538091233714, "grad_norm": 1.863385796546936, "learning_rate": 3.6307889672867223e-06, "loss": 0.8547, "step": 25450 }, { "epoch": 0.31022631713648496, "grad_norm": 1.8188751935958862, "learning_rate": 3.6304682488774857e-06, "loss": 0.9104, "step": 25455 }, { "epoch": 0.3102872533606328, "grad_norm": 1.918837070465088, "learning_rate": 3.630147530468249e-06, "loss": 0.8718, "step": 25460 }, { "epoch": 0.31034818958478055, "grad_norm": 1.8869391679763794, "learning_rate": 3.6298268120590126e-06, "loss": 0.8607, "step": 25465 }, { "epoch": 0.3104091258089284, "grad_norm": 1.80754554271698, "learning_rate": 3.6295060936497756e-06, "loss": 0.8122, "step": 25470 }, { "epoch": 0.3104700620330762, "grad_norm": 2.0630226135253906, "learning_rate": 3.6291853752405395e-06, "loss": 0.9218, "step": 25475 }, { "epoch": 0.31053099825722397, "grad_norm": 1.9482104778289795, "learning_rate": 3.6288646568313025e-06, "loss": 0.872, "step": 25480 }, { "epoch": 0.3105919344813718, "grad_norm": 1.9252835512161255, "learning_rate": 3.6285439384220655e-06, "loss": 0.9178, "step": 25485 }, { "epoch": 0.3106528707055196, "grad_norm": 2.162588119506836, "learning_rate": 3.6282232200128294e-06, "loss": 0.873, "step": 25490 }, { "epoch": 0.31071380692966744, "grad_norm": 1.8069000244140625, "learning_rate": 3.6279025016035924e-06, "loss": 0.902, "step": 25495 }, { "epoch": 0.3107747431538152, "grad_norm": 1.9093745946884155, "learning_rate": 3.6275817831943554e-06, "loss": 0.8533, "step": 25500 }, { "epoch": 0.310835679377963, "grad_norm": 2.016754388809204, "learning_rate": 3.6272610647851193e-06, "loss": 0.8732, "step": 25505 }, { "epoch": 0.31089661560211085, "grad_norm": 1.937570571899414, "learning_rate": 3.6269403463758823e-06, "loss": 0.8579, "step": 25510 }, { "epoch": 0.3109575518262586, "grad_norm": 2.53255295753479, "learning_rate": 3.6266196279666453e-06, "loss": 0.7969, "step": 25515 }, { "epoch": 0.31101848805040644, "grad_norm": 2.1081204414367676, "learning_rate": 3.626298909557409e-06, "loss": 0.8595, "step": 25520 }, { "epoch": 0.31107942427455426, "grad_norm": 2.5508475303649902, "learning_rate": 3.625978191148172e-06, "loss": 0.8858, "step": 25525 }, { "epoch": 0.3111403604987021, "grad_norm": 2.1490466594696045, "learning_rate": 3.6256574727389356e-06, "loss": 0.8257, "step": 25530 }, { "epoch": 0.31120129672284985, "grad_norm": 1.9421207904815674, "learning_rate": 3.6253367543296986e-06, "loss": 0.869, "step": 25535 }, { "epoch": 0.3112622329469977, "grad_norm": 1.7769887447357178, "learning_rate": 3.625016035920462e-06, "loss": 0.8444, "step": 25540 }, { "epoch": 0.3113231691711455, "grad_norm": 1.5996719598770142, "learning_rate": 3.6246953175112255e-06, "loss": 0.8088, "step": 25545 }, { "epoch": 0.31138410539529326, "grad_norm": 2.189657688140869, "learning_rate": 3.6243745991019885e-06, "loss": 0.8524, "step": 25550 }, { "epoch": 0.3114450416194411, "grad_norm": 2.0390279293060303, "learning_rate": 3.6240538806927524e-06, "loss": 0.8651, "step": 25555 }, { "epoch": 0.3115059778435889, "grad_norm": 2.173963785171509, "learning_rate": 3.6237331622835154e-06, "loss": 0.8838, "step": 25560 }, { "epoch": 0.31156691406773673, "grad_norm": 2.3388679027557373, "learning_rate": 3.6234124438742784e-06, "loss": 0.8815, "step": 25565 }, { "epoch": 0.3116278502918845, "grad_norm": 2.8054494857788086, "learning_rate": 3.6230917254650423e-06, "loss": 0.9156, "step": 25570 }, { "epoch": 0.3116887865160323, "grad_norm": 1.8572676181793213, "learning_rate": 3.6227710070558053e-06, "loss": 0.8795, "step": 25575 }, { "epoch": 0.31174972274018015, "grad_norm": 2.0985281467437744, "learning_rate": 3.6224502886465683e-06, "loss": 0.8671, "step": 25580 }, { "epoch": 0.3118106589643279, "grad_norm": 2.1421566009521484, "learning_rate": 3.622129570237332e-06, "loss": 0.8509, "step": 25585 }, { "epoch": 0.31187159518847574, "grad_norm": 2.1310007572174072, "learning_rate": 3.6218088518280952e-06, "loss": 0.9277, "step": 25590 }, { "epoch": 0.31193253141262356, "grad_norm": 2.534125328063965, "learning_rate": 3.6214881334188582e-06, "loss": 0.7956, "step": 25595 }, { "epoch": 0.3119934676367714, "grad_norm": 1.9974979162216187, "learning_rate": 3.621167415009622e-06, "loss": 0.8736, "step": 25600 }, { "epoch": 0.31205440386091915, "grad_norm": 2.352778911590576, "learning_rate": 3.620846696600385e-06, "loss": 0.8401, "step": 25605 }, { "epoch": 0.312115340085067, "grad_norm": 2.466496467590332, "learning_rate": 3.6205259781911486e-06, "loss": 0.8593, "step": 25610 }, { "epoch": 0.3121762763092148, "grad_norm": 2.120718002319336, "learning_rate": 3.6202052597819116e-06, "loss": 0.9179, "step": 25615 }, { "epoch": 0.31223721253336256, "grad_norm": 1.8245238065719604, "learning_rate": 3.619884541372675e-06, "loss": 0.8534, "step": 25620 }, { "epoch": 0.3122981487575104, "grad_norm": 1.9109859466552734, "learning_rate": 3.6195638229634384e-06, "loss": 0.8656, "step": 25625 }, { "epoch": 0.3123590849816582, "grad_norm": 1.8075214624404907, "learning_rate": 3.6192431045542015e-06, "loss": 0.836, "step": 25630 }, { "epoch": 0.31242002120580603, "grad_norm": 2.1331450939178467, "learning_rate": 3.6189223861449653e-06, "loss": 0.8844, "step": 25635 }, { "epoch": 0.3124809574299538, "grad_norm": 1.7499589920043945, "learning_rate": 3.6186016677357283e-06, "loss": 0.845, "step": 25640 }, { "epoch": 0.3125418936541016, "grad_norm": 2.221095561981201, "learning_rate": 3.6182809493264914e-06, "loss": 0.8799, "step": 25645 }, { "epoch": 0.31260282987824944, "grad_norm": 2.0787298679351807, "learning_rate": 3.6179602309172552e-06, "loss": 0.8731, "step": 25650 }, { "epoch": 0.3126637661023972, "grad_norm": 2.371128559112549, "learning_rate": 3.6176395125080182e-06, "loss": 0.9114, "step": 25655 }, { "epoch": 0.31272470232654503, "grad_norm": 1.972755789756775, "learning_rate": 3.6173187940987813e-06, "loss": 0.8684, "step": 25660 }, { "epoch": 0.31278563855069286, "grad_norm": 1.8745853900909424, "learning_rate": 3.616998075689545e-06, "loss": 0.8956, "step": 25665 }, { "epoch": 0.3128465747748406, "grad_norm": 2.06416654586792, "learning_rate": 3.616677357280308e-06, "loss": 0.9228, "step": 25670 }, { "epoch": 0.31290751099898845, "grad_norm": 2.1968166828155518, "learning_rate": 3.616356638871071e-06, "loss": 0.7894, "step": 25675 }, { "epoch": 0.31296844722313627, "grad_norm": 2.2027459144592285, "learning_rate": 3.616035920461835e-06, "loss": 0.9016, "step": 25680 }, { "epoch": 0.3130293834472841, "grad_norm": 2.1454505920410156, "learning_rate": 3.615715202052598e-06, "loss": 0.8453, "step": 25685 }, { "epoch": 0.31309031967143186, "grad_norm": 2.000335216522217, "learning_rate": 3.6153944836433615e-06, "loss": 0.867, "step": 25690 }, { "epoch": 0.3131512558955797, "grad_norm": 2.158965826034546, "learning_rate": 3.615073765234125e-06, "loss": 0.8858, "step": 25695 }, { "epoch": 0.3132121921197275, "grad_norm": 2.354769706726074, "learning_rate": 3.614753046824888e-06, "loss": 0.8315, "step": 25700 }, { "epoch": 0.3132731283438753, "grad_norm": 1.8878971338272095, "learning_rate": 3.6144323284156514e-06, "loss": 0.8574, "step": 25705 }, { "epoch": 0.3133340645680231, "grad_norm": 2.3126842975616455, "learning_rate": 3.6141116100064144e-06, "loss": 0.8982, "step": 25710 }, { "epoch": 0.3133950007921709, "grad_norm": 2.0664753913879395, "learning_rate": 3.6137908915971783e-06, "loss": 0.852, "step": 25715 }, { "epoch": 0.31345593701631874, "grad_norm": 1.9289265871047974, "learning_rate": 3.6134701731879413e-06, "loss": 0.9015, "step": 25720 }, { "epoch": 0.3135168732404665, "grad_norm": 1.7808680534362793, "learning_rate": 3.6131494547787043e-06, "loss": 0.8205, "step": 25725 }, { "epoch": 0.31357780946461433, "grad_norm": 2.2441020011901855, "learning_rate": 3.612828736369468e-06, "loss": 0.8265, "step": 25730 }, { "epoch": 0.31363874568876215, "grad_norm": 1.9768964052200317, "learning_rate": 3.612508017960231e-06, "loss": 0.8577, "step": 25735 }, { "epoch": 0.3136996819129099, "grad_norm": 1.7572203874588013, "learning_rate": 3.612187299550994e-06, "loss": 0.9371, "step": 25740 }, { "epoch": 0.31376061813705775, "grad_norm": 2.6011428833007812, "learning_rate": 3.611866581141758e-06, "loss": 0.8911, "step": 25745 }, { "epoch": 0.31382155436120557, "grad_norm": 2.290999412536621, "learning_rate": 3.611545862732521e-06, "loss": 0.8934, "step": 25750 }, { "epoch": 0.3138824905853534, "grad_norm": 2.0220565795898438, "learning_rate": 3.6112251443232845e-06, "loss": 0.8106, "step": 25755 }, { "epoch": 0.31394342680950116, "grad_norm": 1.927415132522583, "learning_rate": 3.610904425914048e-06, "loss": 0.8845, "step": 25760 }, { "epoch": 0.314004363033649, "grad_norm": 1.9257680177688599, "learning_rate": 3.610583707504811e-06, "loss": 0.7979, "step": 25765 }, { "epoch": 0.3140652992577968, "grad_norm": 1.774075984954834, "learning_rate": 3.6102629890955744e-06, "loss": 0.9236, "step": 25770 }, { "epoch": 0.31412623548194457, "grad_norm": 1.9677988290786743, "learning_rate": 3.609942270686338e-06, "loss": 0.8825, "step": 25775 }, { "epoch": 0.3141871717060924, "grad_norm": 1.9851484298706055, "learning_rate": 3.6096215522771013e-06, "loss": 0.8544, "step": 25780 }, { "epoch": 0.3142481079302402, "grad_norm": 2.1265809535980225, "learning_rate": 3.6093008338678643e-06, "loss": 0.8966, "step": 25785 }, { "epoch": 0.31430904415438804, "grad_norm": 2.190095901489258, "learning_rate": 3.6089801154586273e-06, "loss": 0.8556, "step": 25790 }, { "epoch": 0.3143699803785358, "grad_norm": 1.8844337463378906, "learning_rate": 3.608659397049391e-06, "loss": 0.9186, "step": 25795 }, { "epoch": 0.31443091660268363, "grad_norm": 1.9853416681289673, "learning_rate": 3.608338678640154e-06, "loss": 0.8521, "step": 25800 }, { "epoch": 0.31449185282683145, "grad_norm": 2.0654141902923584, "learning_rate": 3.6080179602309172e-06, "loss": 0.8741, "step": 25805 }, { "epoch": 0.3145527890509792, "grad_norm": 2.1146938800811768, "learning_rate": 3.607697241821681e-06, "loss": 0.8054, "step": 25810 }, { "epoch": 0.31461372527512704, "grad_norm": 1.9247838258743286, "learning_rate": 3.607376523412444e-06, "loss": 0.8087, "step": 25815 }, { "epoch": 0.31467466149927487, "grad_norm": 1.8392672538757324, "learning_rate": 3.607055805003207e-06, "loss": 0.937, "step": 25820 }, { "epoch": 0.3147355977234227, "grad_norm": 2.5787644386291504, "learning_rate": 3.606735086593971e-06, "loss": 0.8721, "step": 25825 }, { "epoch": 0.31479653394757046, "grad_norm": 1.924079418182373, "learning_rate": 3.606414368184734e-06, "loss": 0.874, "step": 25830 }, { "epoch": 0.3148574701717183, "grad_norm": 2.0534260272979736, "learning_rate": 3.6060936497754974e-06, "loss": 0.8575, "step": 25835 }, { "epoch": 0.3149184063958661, "grad_norm": 1.8413537740707397, "learning_rate": 3.605772931366261e-06, "loss": 0.7976, "step": 25840 }, { "epoch": 0.31497934262001387, "grad_norm": 1.7681845426559448, "learning_rate": 3.605452212957024e-06, "loss": 0.8866, "step": 25845 }, { "epoch": 0.3150402788441617, "grad_norm": 1.8476264476776123, "learning_rate": 3.6051314945477873e-06, "loss": 0.8572, "step": 25850 }, { "epoch": 0.3151012150683095, "grad_norm": 1.9483451843261719, "learning_rate": 3.6048107761385508e-06, "loss": 0.8849, "step": 25855 }, { "epoch": 0.31516215129245734, "grad_norm": 2.1873810291290283, "learning_rate": 3.6044900577293142e-06, "loss": 0.8403, "step": 25860 }, { "epoch": 0.3152230875166051, "grad_norm": 2.2334847450256348, "learning_rate": 3.6041693393200772e-06, "loss": 0.9076, "step": 25865 }, { "epoch": 0.31528402374075293, "grad_norm": 1.923066258430481, "learning_rate": 3.6038486209108402e-06, "loss": 0.8667, "step": 25870 }, { "epoch": 0.31534495996490075, "grad_norm": 2.113985061645508, "learning_rate": 3.603527902501604e-06, "loss": 0.8615, "step": 25875 }, { "epoch": 0.3154058961890485, "grad_norm": 1.9035478830337524, "learning_rate": 3.603207184092367e-06, "loss": 0.8806, "step": 25880 }, { "epoch": 0.31546683241319634, "grad_norm": 1.9918839931488037, "learning_rate": 3.60288646568313e-06, "loss": 0.9079, "step": 25885 }, { "epoch": 0.31552776863734416, "grad_norm": 1.708422303199768, "learning_rate": 3.602565747273894e-06, "loss": 0.858, "step": 25890 }, { "epoch": 0.315588704861492, "grad_norm": 1.893551230430603, "learning_rate": 3.602245028864657e-06, "loss": 0.8664, "step": 25895 }, { "epoch": 0.31564964108563975, "grad_norm": 1.8786907196044922, "learning_rate": 3.60192431045542e-06, "loss": 0.8733, "step": 25900 }, { "epoch": 0.3157105773097876, "grad_norm": 2.186008930206299, "learning_rate": 3.601603592046184e-06, "loss": 0.8919, "step": 25905 }, { "epoch": 0.3157715135339354, "grad_norm": 1.9571565389633179, "learning_rate": 3.601282873636947e-06, "loss": 0.7889, "step": 25910 }, { "epoch": 0.31583244975808317, "grad_norm": 1.924911618232727, "learning_rate": 3.6009621552277104e-06, "loss": 0.8974, "step": 25915 }, { "epoch": 0.315893385982231, "grad_norm": 2.1817667484283447, "learning_rate": 3.600641436818474e-06, "loss": 0.9604, "step": 25920 }, { "epoch": 0.3159543222063788, "grad_norm": 2.972050428390503, "learning_rate": 3.600320718409237e-06, "loss": 0.9179, "step": 25925 }, { "epoch": 0.31601525843052664, "grad_norm": 1.847999930381775, "learning_rate": 3.6000000000000003e-06, "loss": 0.8629, "step": 25930 }, { "epoch": 0.3160761946546744, "grad_norm": 2.4814248085021973, "learning_rate": 3.5996792815907637e-06, "loss": 0.8929, "step": 25935 }, { "epoch": 0.3161371308788222, "grad_norm": 1.8310214281082153, "learning_rate": 3.599358563181527e-06, "loss": 0.8488, "step": 25940 }, { "epoch": 0.31619806710297005, "grad_norm": 2.422400712966919, "learning_rate": 3.59903784477229e-06, "loss": 0.9098, "step": 25945 }, { "epoch": 0.3162590033271178, "grad_norm": 2.001960277557373, "learning_rate": 3.598717126363053e-06, "loss": 0.8401, "step": 25950 }, { "epoch": 0.31631993955126564, "grad_norm": 1.9018983840942383, "learning_rate": 3.598396407953817e-06, "loss": 0.9102, "step": 25955 }, { "epoch": 0.31638087577541346, "grad_norm": 2.324906349182129, "learning_rate": 3.59807568954458e-06, "loss": 0.9226, "step": 25960 }, { "epoch": 0.3164418119995613, "grad_norm": 1.9774162769317627, "learning_rate": 3.597754971135343e-06, "loss": 0.9378, "step": 25965 }, { "epoch": 0.31650274822370905, "grad_norm": 2.1391546726226807, "learning_rate": 3.597434252726107e-06, "loss": 0.8945, "step": 25970 }, { "epoch": 0.3165636844478569, "grad_norm": 2.4705147743225098, "learning_rate": 3.59711353431687e-06, "loss": 0.9061, "step": 25975 }, { "epoch": 0.3166246206720047, "grad_norm": 1.8074471950531006, "learning_rate": 3.5967928159076334e-06, "loss": 0.9373, "step": 25980 }, { "epoch": 0.31668555689615246, "grad_norm": 2.1164770126342773, "learning_rate": 3.596472097498397e-06, "loss": 0.8683, "step": 25985 }, { "epoch": 0.3167464931203003, "grad_norm": 1.7788407802581787, "learning_rate": 3.59615137908916e-06, "loss": 0.8118, "step": 25990 }, { "epoch": 0.3168074293444481, "grad_norm": 2.4061973094940186, "learning_rate": 3.5958306606799233e-06, "loss": 0.9011, "step": 25995 }, { "epoch": 0.31686836556859593, "grad_norm": 2.136943817138672, "learning_rate": 3.5955099422706867e-06, "loss": 0.8655, "step": 26000 }, { "epoch": 0.3169293017927437, "grad_norm": 1.994857668876648, "learning_rate": 3.59518922386145e-06, "loss": 0.8407, "step": 26005 }, { "epoch": 0.3169902380168915, "grad_norm": 1.7798219919204712, "learning_rate": 3.594868505452213e-06, "loss": 0.8426, "step": 26010 }, { "epoch": 0.31705117424103935, "grad_norm": 1.9770886898040771, "learning_rate": 3.5945477870429766e-06, "loss": 0.8704, "step": 26015 }, { "epoch": 0.3171121104651871, "grad_norm": 2.189448118209839, "learning_rate": 3.59422706863374e-06, "loss": 0.7777, "step": 26020 }, { "epoch": 0.31717304668933494, "grad_norm": 2.2941269874572754, "learning_rate": 3.593906350224503e-06, "loss": 0.8704, "step": 26025 }, { "epoch": 0.31723398291348276, "grad_norm": 1.858082890510559, "learning_rate": 3.593585631815267e-06, "loss": 0.8427, "step": 26030 }, { "epoch": 0.3172949191376306, "grad_norm": 1.83406662940979, "learning_rate": 3.59326491340603e-06, "loss": 0.8827, "step": 26035 }, { "epoch": 0.31735585536177835, "grad_norm": 2.2564616203308105, "learning_rate": 3.592944194996793e-06, "loss": 0.8422, "step": 26040 }, { "epoch": 0.3174167915859262, "grad_norm": 1.9552656412124634, "learning_rate": 3.592623476587556e-06, "loss": 0.8846, "step": 26045 }, { "epoch": 0.317477727810074, "grad_norm": 1.875663161277771, "learning_rate": 3.59230275817832e-06, "loss": 0.8787, "step": 26050 }, { "epoch": 0.31753866403422176, "grad_norm": 2.398557186126709, "learning_rate": 3.591982039769083e-06, "loss": 0.8697, "step": 26055 }, { "epoch": 0.3175996002583696, "grad_norm": 2.2152693271636963, "learning_rate": 3.5916613213598463e-06, "loss": 0.8635, "step": 26060 }, { "epoch": 0.3176605364825174, "grad_norm": 2.512554168701172, "learning_rate": 3.5913406029506098e-06, "loss": 0.9203, "step": 26065 }, { "epoch": 0.31772147270666523, "grad_norm": 1.9031258821487427, "learning_rate": 3.5910198845413728e-06, "loss": 0.8595, "step": 26070 }, { "epoch": 0.317782408930813, "grad_norm": 1.848374366760254, "learning_rate": 3.5906991661321362e-06, "loss": 0.891, "step": 26075 }, { "epoch": 0.3178433451549608, "grad_norm": 1.8831422328948975, "learning_rate": 3.5903784477228997e-06, "loss": 0.8079, "step": 26080 }, { "epoch": 0.31790428137910864, "grad_norm": 2.0686986446380615, "learning_rate": 3.590057729313663e-06, "loss": 0.9116, "step": 26085 }, { "epoch": 0.3179652176032564, "grad_norm": 1.7545100450515747, "learning_rate": 3.589737010904426e-06, "loss": 0.9, "step": 26090 }, { "epoch": 0.31802615382740423, "grad_norm": 1.741958498954773, "learning_rate": 3.5894162924951896e-06, "loss": 0.998, "step": 26095 }, { "epoch": 0.31808709005155206, "grad_norm": 1.8638181686401367, "learning_rate": 3.589095574085953e-06, "loss": 0.8963, "step": 26100 }, { "epoch": 0.3181480262756999, "grad_norm": 1.9189732074737549, "learning_rate": 3.588774855676716e-06, "loss": 0.8853, "step": 26105 }, { "epoch": 0.31820896249984765, "grad_norm": 2.0789570808410645, "learning_rate": 3.58845413726748e-06, "loss": 0.8634, "step": 26110 }, { "epoch": 0.31826989872399547, "grad_norm": 2.4874324798583984, "learning_rate": 3.588133418858243e-06, "loss": 0.8772, "step": 26115 }, { "epoch": 0.3183308349481433, "grad_norm": 1.7395764589309692, "learning_rate": 3.587812700449006e-06, "loss": 0.8584, "step": 26120 }, { "epoch": 0.31839177117229106, "grad_norm": 2.038585901260376, "learning_rate": 3.587491982039769e-06, "loss": 0.8949, "step": 26125 }, { "epoch": 0.3184527073964389, "grad_norm": 2.2307636737823486, "learning_rate": 3.587171263630533e-06, "loss": 0.8402, "step": 26130 }, { "epoch": 0.3185136436205867, "grad_norm": 1.762105941772461, "learning_rate": 3.586850545221296e-06, "loss": 0.956, "step": 26135 }, { "epoch": 0.3185745798447345, "grad_norm": 2.7910821437835693, "learning_rate": 3.5865298268120592e-06, "loss": 0.878, "step": 26140 }, { "epoch": 0.3186355160688823, "grad_norm": 2.209256410598755, "learning_rate": 3.5862091084028227e-06, "loss": 0.8877, "step": 26145 }, { "epoch": 0.3186964522930301, "grad_norm": 1.7677265405654907, "learning_rate": 3.5858883899935857e-06, "loss": 0.9054, "step": 26150 }, { "epoch": 0.31875738851717794, "grad_norm": 1.9760974645614624, "learning_rate": 3.585567671584349e-06, "loss": 0.9294, "step": 26155 }, { "epoch": 0.3188183247413257, "grad_norm": 2.1754119396209717, "learning_rate": 3.5852469531751126e-06, "loss": 0.8649, "step": 26160 }, { "epoch": 0.31887926096547353, "grad_norm": 1.653619408607483, "learning_rate": 3.584926234765876e-06, "loss": 0.8157, "step": 26165 }, { "epoch": 0.31894019718962136, "grad_norm": 2.194016218185425, "learning_rate": 3.584605516356639e-06, "loss": 0.8379, "step": 26170 }, { "epoch": 0.3190011334137691, "grad_norm": 1.792653203010559, "learning_rate": 3.584284797947403e-06, "loss": 0.858, "step": 26175 }, { "epoch": 0.31906206963791695, "grad_norm": 2.2102763652801514, "learning_rate": 3.583964079538166e-06, "loss": 0.8575, "step": 26180 }, { "epoch": 0.31912300586206477, "grad_norm": 1.9120683670043945, "learning_rate": 3.583643361128929e-06, "loss": 0.8407, "step": 26185 }, { "epoch": 0.3191839420862126, "grad_norm": 1.9743365049362183, "learning_rate": 3.583322642719693e-06, "loss": 0.9294, "step": 26190 }, { "epoch": 0.31924487831036036, "grad_norm": 1.9035086631774902, "learning_rate": 3.583001924310456e-06, "loss": 0.87, "step": 26195 }, { "epoch": 0.3193058145345082, "grad_norm": 2.285842180252075, "learning_rate": 3.582681205901219e-06, "loss": 0.8488, "step": 26200 }, { "epoch": 0.319366750758656, "grad_norm": 1.7824456691741943, "learning_rate": 3.582360487491982e-06, "loss": 0.8999, "step": 26205 }, { "epoch": 0.31942768698280377, "grad_norm": 2.15053653717041, "learning_rate": 3.5820397690827457e-06, "loss": 0.8094, "step": 26210 }, { "epoch": 0.3194886232069516, "grad_norm": 1.8258452415466309, "learning_rate": 3.5817190506735087e-06, "loss": 0.8508, "step": 26215 }, { "epoch": 0.3195495594310994, "grad_norm": 1.986574649810791, "learning_rate": 3.581398332264272e-06, "loss": 0.9307, "step": 26220 }, { "epoch": 0.31961049565524724, "grad_norm": 1.8615508079528809, "learning_rate": 3.5810776138550356e-06, "loss": 0.8868, "step": 26225 }, { "epoch": 0.319671431879395, "grad_norm": 2.3422555923461914, "learning_rate": 3.580756895445799e-06, "loss": 0.8587, "step": 26230 }, { "epoch": 0.31973236810354283, "grad_norm": 2.1037607192993164, "learning_rate": 3.580436177036562e-06, "loss": 0.9094, "step": 26235 }, { "epoch": 0.31979330432769065, "grad_norm": 1.8873447179794312, "learning_rate": 3.5801154586273255e-06, "loss": 0.7843, "step": 26240 }, { "epoch": 0.3198542405518384, "grad_norm": 2.1819634437561035, "learning_rate": 3.579794740218089e-06, "loss": 0.8708, "step": 26245 }, { "epoch": 0.31991517677598624, "grad_norm": 1.6897462606430054, "learning_rate": 3.579474021808852e-06, "loss": 0.9112, "step": 26250 }, { "epoch": 0.31997611300013407, "grad_norm": 1.9906485080718994, "learning_rate": 3.579153303399616e-06, "loss": 0.8822, "step": 26255 }, { "epoch": 0.3200370492242819, "grad_norm": 1.9007110595703125, "learning_rate": 3.578832584990379e-06, "loss": 0.902, "step": 26260 }, { "epoch": 0.32009798544842966, "grad_norm": 2.1657357215881348, "learning_rate": 3.578511866581142e-06, "loss": 0.844, "step": 26265 }, { "epoch": 0.3201589216725775, "grad_norm": 2.0591447353363037, "learning_rate": 3.5781911481719057e-06, "loss": 0.8809, "step": 26270 }, { "epoch": 0.3202198578967253, "grad_norm": 1.9257944822311401, "learning_rate": 3.5778704297626687e-06, "loss": 0.8917, "step": 26275 }, { "epoch": 0.32028079412087307, "grad_norm": 2.123933792114258, "learning_rate": 3.5775497113534318e-06, "loss": 0.7767, "step": 26280 }, { "epoch": 0.3203417303450209, "grad_norm": 1.7063883543014526, "learning_rate": 3.5772289929441956e-06, "loss": 0.7911, "step": 26285 }, { "epoch": 0.3204026665691687, "grad_norm": 1.9064669609069824, "learning_rate": 3.5769082745349586e-06, "loss": 0.905, "step": 26290 }, { "epoch": 0.32046360279331654, "grad_norm": 1.955538034439087, "learning_rate": 3.5765875561257217e-06, "loss": 0.8345, "step": 26295 }, { "epoch": 0.3205245390174643, "grad_norm": 2.178514242172241, "learning_rate": 3.576266837716485e-06, "loss": 0.8535, "step": 26300 }, { "epoch": 0.32058547524161213, "grad_norm": 1.937706708908081, "learning_rate": 3.5759461193072485e-06, "loss": 0.8823, "step": 26305 }, { "epoch": 0.32064641146575995, "grad_norm": 1.9558699131011963, "learning_rate": 3.575625400898012e-06, "loss": 0.8838, "step": 26310 }, { "epoch": 0.3207073476899077, "grad_norm": 1.8631144762039185, "learning_rate": 3.575304682488775e-06, "loss": 0.8361, "step": 26315 }, { "epoch": 0.32076828391405554, "grad_norm": 1.6178812980651855, "learning_rate": 3.5749839640795384e-06, "loss": 0.8083, "step": 26320 }, { "epoch": 0.32082922013820336, "grad_norm": 2.3636903762817383, "learning_rate": 3.574663245670302e-06, "loss": 0.8545, "step": 26325 }, { "epoch": 0.3208901563623512, "grad_norm": 2.1712965965270996, "learning_rate": 3.574342527261065e-06, "loss": 0.9537, "step": 26330 }, { "epoch": 0.32095109258649895, "grad_norm": 1.8574714660644531, "learning_rate": 3.5740218088518288e-06, "loss": 0.9283, "step": 26335 }, { "epoch": 0.3210120288106468, "grad_norm": 1.9929343461990356, "learning_rate": 3.5737010904425918e-06, "loss": 0.7864, "step": 26340 }, { "epoch": 0.3210729650347946, "grad_norm": 1.887247920036316, "learning_rate": 3.573380372033355e-06, "loss": 0.8501, "step": 26345 }, { "epoch": 0.32113390125894237, "grad_norm": 2.2194840908050537, "learning_rate": 3.5730596536241187e-06, "loss": 0.8387, "step": 26350 }, { "epoch": 0.3211948374830902, "grad_norm": 1.8002151250839233, "learning_rate": 3.5727389352148817e-06, "loss": 0.8337, "step": 26355 }, { "epoch": 0.321255773707238, "grad_norm": 2.1062588691711426, "learning_rate": 3.5724182168056447e-06, "loss": 0.8649, "step": 26360 }, { "epoch": 0.32131670993138584, "grad_norm": 3.0995090007781982, "learning_rate": 3.5720974983964086e-06, "loss": 0.8933, "step": 26365 }, { "epoch": 0.3213776461555336, "grad_norm": 1.6025217771530151, "learning_rate": 3.5717767799871716e-06, "loss": 0.8006, "step": 26370 }, { "epoch": 0.3214385823796814, "grad_norm": 2.1448354721069336, "learning_rate": 3.5714560615779346e-06, "loss": 0.8495, "step": 26375 }, { "epoch": 0.32149951860382925, "grad_norm": 1.9321969747543335, "learning_rate": 3.571135343168698e-06, "loss": 0.9138, "step": 26380 }, { "epoch": 0.321560454827977, "grad_norm": 1.9010388851165771, "learning_rate": 3.5708146247594615e-06, "loss": 0.9048, "step": 26385 }, { "epoch": 0.32162139105212484, "grad_norm": 1.9140679836273193, "learning_rate": 3.570493906350225e-06, "loss": 0.8874, "step": 26390 }, { "epoch": 0.32168232727627266, "grad_norm": 1.8837922811508179, "learning_rate": 3.570173187940988e-06, "loss": 0.8547, "step": 26395 }, { "epoch": 0.3217432635004205, "grad_norm": 1.758664608001709, "learning_rate": 3.5698524695317514e-06, "loss": 0.8624, "step": 26400 }, { "epoch": 0.32180419972456825, "grad_norm": 2.2021756172180176, "learning_rate": 3.569531751122515e-06, "loss": 0.851, "step": 26405 }, { "epoch": 0.3218651359487161, "grad_norm": 2.024057388305664, "learning_rate": 3.569211032713278e-06, "loss": 0.833, "step": 26410 }, { "epoch": 0.3219260721728639, "grad_norm": 2.3665263652801514, "learning_rate": 3.5688903143040417e-06, "loss": 0.875, "step": 26415 }, { "epoch": 0.32198700839701166, "grad_norm": 2.020413875579834, "learning_rate": 3.5685695958948047e-06, "loss": 0.9177, "step": 26420 }, { "epoch": 0.3220479446211595, "grad_norm": 2.216539144515991, "learning_rate": 3.5682488774855677e-06, "loss": 0.8297, "step": 26425 }, { "epoch": 0.3221088808453073, "grad_norm": 2.2960002422332764, "learning_rate": 3.5679281590763316e-06, "loss": 0.7945, "step": 26430 }, { "epoch": 0.32216981706945513, "grad_norm": 1.965219497680664, "learning_rate": 3.5676074406670946e-06, "loss": 0.8461, "step": 26435 }, { "epoch": 0.3222307532936029, "grad_norm": 1.8318853378295898, "learning_rate": 3.5672867222578576e-06, "loss": 0.8068, "step": 26440 }, { "epoch": 0.3222916895177507, "grad_norm": 1.857442855834961, "learning_rate": 3.5669660038486215e-06, "loss": 0.9153, "step": 26445 }, { "epoch": 0.32235262574189855, "grad_norm": 2.406552314758301, "learning_rate": 3.5666452854393845e-06, "loss": 0.9279, "step": 26450 }, { "epoch": 0.3224135619660463, "grad_norm": 2.092763900756836, "learning_rate": 3.566324567030148e-06, "loss": 0.9639, "step": 26455 }, { "epoch": 0.32247449819019414, "grad_norm": 2.2558984756469727, "learning_rate": 3.566003848620911e-06, "loss": 0.8982, "step": 26460 }, { "epoch": 0.32253543441434196, "grad_norm": 1.8670477867126465, "learning_rate": 3.5656831302116744e-06, "loss": 0.8662, "step": 26465 }, { "epoch": 0.3225963706384898, "grad_norm": 2.0895273685455322, "learning_rate": 3.565362411802438e-06, "loss": 0.9033, "step": 26470 }, { "epoch": 0.32265730686263755, "grad_norm": 2.118445634841919, "learning_rate": 3.565041693393201e-06, "loss": 0.7727, "step": 26475 }, { "epoch": 0.3227182430867854, "grad_norm": 1.8340606689453125, "learning_rate": 3.5647209749839647e-06, "loss": 0.861, "step": 26480 }, { "epoch": 0.3227791793109332, "grad_norm": 2.0580670833587646, "learning_rate": 3.5644002565747277e-06, "loss": 0.7929, "step": 26485 }, { "epoch": 0.32284011553508096, "grad_norm": 2.248987913131714, "learning_rate": 3.5640795381654908e-06, "loss": 0.9313, "step": 26490 }, { "epoch": 0.3229010517592288, "grad_norm": 2.0842175483703613, "learning_rate": 3.5637588197562546e-06, "loss": 0.8963, "step": 26495 }, { "epoch": 0.3229619879833766, "grad_norm": 1.9531599283218384, "learning_rate": 3.5634381013470176e-06, "loss": 0.8656, "step": 26500 }, { "epoch": 0.32302292420752443, "grad_norm": 1.6415568590164185, "learning_rate": 3.5631173829377806e-06, "loss": 0.8047, "step": 26505 }, { "epoch": 0.3230838604316722, "grad_norm": 2.3603744506835938, "learning_rate": 3.5627966645285445e-06, "loss": 0.8578, "step": 26510 }, { "epoch": 0.32314479665582, "grad_norm": 2.003803014755249, "learning_rate": 3.5624759461193075e-06, "loss": 0.8726, "step": 26515 }, { "epoch": 0.32320573287996784, "grad_norm": 2.1071105003356934, "learning_rate": 3.5621552277100705e-06, "loss": 0.8551, "step": 26520 }, { "epoch": 0.3232666691041156, "grad_norm": 2.1814184188842773, "learning_rate": 3.5618345093008344e-06, "loss": 0.8735, "step": 26525 }, { "epoch": 0.32332760532826343, "grad_norm": 2.057229995727539, "learning_rate": 3.5615137908915974e-06, "loss": 0.8127, "step": 26530 }, { "epoch": 0.32338854155241126, "grad_norm": 1.7884122133255005, "learning_rate": 3.561193072482361e-06, "loss": 0.8788, "step": 26535 }, { "epoch": 0.3234494777765591, "grad_norm": 1.9489712715148926, "learning_rate": 3.560872354073124e-06, "loss": 0.8243, "step": 26540 }, { "epoch": 0.32351041400070685, "grad_norm": 1.8738898038864136, "learning_rate": 3.5605516356638873e-06, "loss": 0.8211, "step": 26545 }, { "epoch": 0.32357135022485467, "grad_norm": 1.8618903160095215, "learning_rate": 3.5602309172546508e-06, "loss": 0.817, "step": 26550 }, { "epoch": 0.3236322864490025, "grad_norm": 1.930329442024231, "learning_rate": 3.5599101988454138e-06, "loss": 0.8348, "step": 26555 }, { "epoch": 0.32369322267315026, "grad_norm": 1.8939064741134644, "learning_rate": 3.5595894804361776e-06, "loss": 0.8242, "step": 26560 }, { "epoch": 0.3237541588972981, "grad_norm": 2.186824083328247, "learning_rate": 3.5592687620269407e-06, "loss": 0.7602, "step": 26565 }, { "epoch": 0.3238150951214459, "grad_norm": 1.8310002088546753, "learning_rate": 3.5589480436177037e-06, "loss": 0.8978, "step": 26570 }, { "epoch": 0.32387603134559373, "grad_norm": 1.8483829498291016, "learning_rate": 3.5586273252084675e-06, "loss": 0.9184, "step": 26575 }, { "epoch": 0.3239369675697415, "grad_norm": 2.2329533100128174, "learning_rate": 3.5583066067992306e-06, "loss": 0.8374, "step": 26580 }, { "epoch": 0.3239979037938893, "grad_norm": 2.041780471801758, "learning_rate": 3.5579858883899936e-06, "loss": 0.8818, "step": 26585 }, { "epoch": 0.32405884001803714, "grad_norm": 1.6195112466812134, "learning_rate": 3.5576651699807574e-06, "loss": 0.913, "step": 26590 }, { "epoch": 0.3241197762421849, "grad_norm": 1.8687067031860352, "learning_rate": 3.5573444515715205e-06, "loss": 0.866, "step": 26595 }, { "epoch": 0.32418071246633273, "grad_norm": 2.049398422241211, "learning_rate": 3.5570237331622835e-06, "loss": 0.882, "step": 26600 }, { "epoch": 0.32424164869048056, "grad_norm": 2.091012716293335, "learning_rate": 3.5567030147530473e-06, "loss": 0.8754, "step": 26605 }, { "epoch": 0.3243025849146283, "grad_norm": 2.0077126026153564, "learning_rate": 3.5563822963438104e-06, "loss": 0.8779, "step": 26610 }, { "epoch": 0.32436352113877615, "grad_norm": 1.885914921760559, "learning_rate": 3.556061577934574e-06, "loss": 0.9242, "step": 26615 }, { "epoch": 0.32442445736292397, "grad_norm": 1.9607962369918823, "learning_rate": 3.5557408595253372e-06, "loss": 0.8747, "step": 26620 }, { "epoch": 0.3244853935870718, "grad_norm": 2.099001169204712, "learning_rate": 3.5554201411161003e-06, "loss": 0.884, "step": 26625 }, { "epoch": 0.32454632981121956, "grad_norm": 2.1327321529388428, "learning_rate": 3.5550994227068637e-06, "loss": 0.8217, "step": 26630 }, { "epoch": 0.3246072660353674, "grad_norm": 2.1705193519592285, "learning_rate": 3.5547787042976267e-06, "loss": 0.9273, "step": 26635 }, { "epoch": 0.3246682022595152, "grad_norm": 1.5727365016937256, "learning_rate": 3.5544579858883906e-06, "loss": 0.8495, "step": 26640 }, { "epoch": 0.32472913848366297, "grad_norm": 1.854567289352417, "learning_rate": 3.5541372674791536e-06, "loss": 0.8736, "step": 26645 }, { "epoch": 0.3247900747078108, "grad_norm": 1.9812155961990356, "learning_rate": 3.5538165490699166e-06, "loss": 0.8443, "step": 26650 }, { "epoch": 0.3248510109319586, "grad_norm": 1.8868838548660278, "learning_rate": 3.5534958306606805e-06, "loss": 0.8544, "step": 26655 }, { "epoch": 0.32491194715610644, "grad_norm": 1.9583615064620972, "learning_rate": 3.5531751122514435e-06, "loss": 0.8937, "step": 26660 }, { "epoch": 0.3249728833802542, "grad_norm": 2.2587852478027344, "learning_rate": 3.5528543938422065e-06, "loss": 0.9126, "step": 26665 }, { "epoch": 0.32503381960440203, "grad_norm": 1.8231216669082642, "learning_rate": 3.5525336754329704e-06, "loss": 0.8486, "step": 26670 }, { "epoch": 0.32509475582854985, "grad_norm": 2.048006534576416, "learning_rate": 3.5522129570237334e-06, "loss": 0.7954, "step": 26675 }, { "epoch": 0.3251556920526976, "grad_norm": 2.0450525283813477, "learning_rate": 3.551892238614497e-06, "loss": 0.8181, "step": 26680 }, { "epoch": 0.32521662827684544, "grad_norm": 1.7785643339157104, "learning_rate": 3.5515715202052603e-06, "loss": 0.8966, "step": 26685 }, { "epoch": 0.32527756450099327, "grad_norm": 1.9258060455322266, "learning_rate": 3.5512508017960233e-06, "loss": 0.9136, "step": 26690 }, { "epoch": 0.3253385007251411, "grad_norm": 2.359262704849243, "learning_rate": 3.5509300833867867e-06, "loss": 0.8086, "step": 26695 }, { "epoch": 0.32539943694928886, "grad_norm": 2.0214011669158936, "learning_rate": 3.55060936497755e-06, "loss": 0.8243, "step": 26700 }, { "epoch": 0.3254603731734367, "grad_norm": 2.155104875564575, "learning_rate": 3.5502886465683136e-06, "loss": 0.8858, "step": 26705 }, { "epoch": 0.3255213093975845, "grad_norm": 2.23854923248291, "learning_rate": 3.5499679281590766e-06, "loss": 0.9238, "step": 26710 }, { "epoch": 0.32558224562173227, "grad_norm": 1.8356387615203857, "learning_rate": 3.5496472097498396e-06, "loss": 0.9051, "step": 26715 }, { "epoch": 0.3256431818458801, "grad_norm": 2.0407216548919678, "learning_rate": 3.5493264913406035e-06, "loss": 0.8838, "step": 26720 }, { "epoch": 0.3257041180700279, "grad_norm": 1.7905735969543457, "learning_rate": 3.5490057729313665e-06, "loss": 0.9641, "step": 26725 }, { "epoch": 0.32576505429417574, "grad_norm": 2.090627670288086, "learning_rate": 3.5486850545221295e-06, "loss": 0.8631, "step": 26730 }, { "epoch": 0.3258259905183235, "grad_norm": 1.714111328125, "learning_rate": 3.5483643361128934e-06, "loss": 0.8406, "step": 26735 }, { "epoch": 0.32588692674247133, "grad_norm": 1.7471410036087036, "learning_rate": 3.5480436177036564e-06, "loss": 0.9223, "step": 26740 }, { "epoch": 0.32594786296661915, "grad_norm": 2.2641100883483887, "learning_rate": 3.5477228992944194e-06, "loss": 0.7789, "step": 26745 }, { "epoch": 0.3260087991907669, "grad_norm": 1.978163242340088, "learning_rate": 3.5474021808851833e-06, "loss": 0.9043, "step": 26750 }, { "epoch": 0.32606973541491474, "grad_norm": 2.235997200012207, "learning_rate": 3.5470814624759463e-06, "loss": 0.916, "step": 26755 }, { "epoch": 0.32613067163906256, "grad_norm": 1.8861935138702393, "learning_rate": 3.5467607440667097e-06, "loss": 0.8525, "step": 26760 }, { "epoch": 0.3261916078632104, "grad_norm": 1.887093424797058, "learning_rate": 3.546440025657473e-06, "loss": 0.8605, "step": 26765 }, { "epoch": 0.32625254408735815, "grad_norm": 1.9456852674484253, "learning_rate": 3.546119307248236e-06, "loss": 0.8669, "step": 26770 }, { "epoch": 0.326313480311506, "grad_norm": 1.9919466972351074, "learning_rate": 3.5457985888389996e-06, "loss": 0.8728, "step": 26775 }, { "epoch": 0.3263744165356538, "grad_norm": 1.8983436822891235, "learning_rate": 3.545477870429763e-06, "loss": 0.8107, "step": 26780 }, { "epoch": 0.32643535275980157, "grad_norm": 1.7704683542251587, "learning_rate": 3.5451571520205265e-06, "loss": 0.8205, "step": 26785 }, { "epoch": 0.3264962889839494, "grad_norm": 2.08945631980896, "learning_rate": 3.5448364336112895e-06, "loss": 0.8569, "step": 26790 }, { "epoch": 0.3265572252080972, "grad_norm": 1.7957007884979248, "learning_rate": 3.5445157152020526e-06, "loss": 0.8376, "step": 26795 }, { "epoch": 0.32661816143224504, "grad_norm": 2.069326162338257, "learning_rate": 3.5441949967928164e-06, "loss": 0.8279, "step": 26800 }, { "epoch": 0.3266790976563928, "grad_norm": 2.0393636226654053, "learning_rate": 3.5438742783835794e-06, "loss": 0.8865, "step": 26805 }, { "epoch": 0.3267400338805406, "grad_norm": 1.834102988243103, "learning_rate": 3.5435535599743425e-06, "loss": 0.8526, "step": 26810 }, { "epoch": 0.32680097010468845, "grad_norm": 1.8506875038146973, "learning_rate": 3.5432328415651063e-06, "loss": 0.7971, "step": 26815 }, { "epoch": 0.3268619063288362, "grad_norm": 1.9848566055297852, "learning_rate": 3.5429121231558693e-06, "loss": 0.8158, "step": 26820 }, { "epoch": 0.32692284255298404, "grad_norm": 1.9550994634628296, "learning_rate": 3.5425914047466324e-06, "loss": 0.8235, "step": 26825 }, { "epoch": 0.32698377877713186, "grad_norm": 1.9900113344192505, "learning_rate": 3.5422706863373962e-06, "loss": 0.8784, "step": 26830 }, { "epoch": 0.3270447150012797, "grad_norm": 2.4884371757507324, "learning_rate": 3.5419499679281592e-06, "loss": 0.8306, "step": 26835 }, { "epoch": 0.32710565122542745, "grad_norm": 2.0110456943511963, "learning_rate": 3.5416292495189227e-06, "loss": 0.8791, "step": 26840 }, { "epoch": 0.3271665874495753, "grad_norm": 1.989996314048767, "learning_rate": 3.541308531109686e-06, "loss": 0.8424, "step": 26845 }, { "epoch": 0.3272275236737231, "grad_norm": 2.0219931602478027, "learning_rate": 3.540987812700449e-06, "loss": 0.8674, "step": 26850 }, { "epoch": 0.32728845989787086, "grad_norm": 2.445566415786743, "learning_rate": 3.5406670942912126e-06, "loss": 0.9164, "step": 26855 }, { "epoch": 0.3273493961220187, "grad_norm": 2.2105040550231934, "learning_rate": 3.540346375881976e-06, "loss": 0.8107, "step": 26860 }, { "epoch": 0.3274103323461665, "grad_norm": 1.8024853467941284, "learning_rate": 3.5400256574727395e-06, "loss": 0.8696, "step": 26865 }, { "epoch": 0.32747126857031433, "grad_norm": 1.8879791498184204, "learning_rate": 3.5397049390635025e-06, "loss": 0.9023, "step": 26870 }, { "epoch": 0.3275322047944621, "grad_norm": 1.9371426105499268, "learning_rate": 3.5393842206542655e-06, "loss": 0.8957, "step": 26875 }, { "epoch": 0.3275931410186099, "grad_norm": 2.0746259689331055, "learning_rate": 3.5390635022450294e-06, "loss": 0.8365, "step": 26880 }, { "epoch": 0.32765407724275775, "grad_norm": 1.86204993724823, "learning_rate": 3.5387427838357924e-06, "loss": 0.8843, "step": 26885 }, { "epoch": 0.3277150134669055, "grad_norm": 2.667891263961792, "learning_rate": 3.5384220654265554e-06, "loss": 0.8735, "step": 26890 }, { "epoch": 0.32777594969105334, "grad_norm": 2.074852228164673, "learning_rate": 3.5381013470173192e-06, "loss": 0.8538, "step": 26895 }, { "epoch": 0.32783688591520116, "grad_norm": 1.9106565713882446, "learning_rate": 3.5377806286080823e-06, "loss": 0.884, "step": 26900 }, { "epoch": 0.327897822139349, "grad_norm": 2.193814516067505, "learning_rate": 3.5374599101988453e-06, "loss": 0.8712, "step": 26905 }, { "epoch": 0.32795875836349675, "grad_norm": 2.025207042694092, "learning_rate": 3.537139191789609e-06, "loss": 0.8368, "step": 26910 }, { "epoch": 0.3280196945876446, "grad_norm": 1.8168396949768066, "learning_rate": 3.536818473380372e-06, "loss": 0.8705, "step": 26915 }, { "epoch": 0.3280806308117924, "grad_norm": 1.8307993412017822, "learning_rate": 3.5364977549711356e-06, "loss": 0.858, "step": 26920 }, { "epoch": 0.32814156703594016, "grad_norm": 1.848962664604187, "learning_rate": 3.536177036561899e-06, "loss": 0.8534, "step": 26925 }, { "epoch": 0.328202503260088, "grad_norm": 2.256880283355713, "learning_rate": 3.5358563181526625e-06, "loss": 0.8322, "step": 26930 }, { "epoch": 0.3282634394842358, "grad_norm": 1.926592230796814, "learning_rate": 3.5355355997434255e-06, "loss": 0.8954, "step": 26935 }, { "epoch": 0.32832437570838363, "grad_norm": 1.8149192333221436, "learning_rate": 3.535214881334189e-06, "loss": 0.8064, "step": 26940 }, { "epoch": 0.3283853119325314, "grad_norm": 2.6940159797668457, "learning_rate": 3.5348941629249524e-06, "loss": 0.7808, "step": 26945 }, { "epoch": 0.3284462481566792, "grad_norm": 1.9595195055007935, "learning_rate": 3.5345734445157154e-06, "loss": 0.8533, "step": 26950 }, { "epoch": 0.32850718438082704, "grad_norm": 1.908811330795288, "learning_rate": 3.5342527261064793e-06, "loss": 0.9122, "step": 26955 }, { "epoch": 0.3285681206049748, "grad_norm": 1.8686145544052124, "learning_rate": 3.5339320076972423e-06, "loss": 0.8816, "step": 26960 }, { "epoch": 0.32862905682912263, "grad_norm": 1.8644351959228516, "learning_rate": 3.5336112892880053e-06, "loss": 0.8766, "step": 26965 }, { "epoch": 0.32868999305327046, "grad_norm": 2.24955677986145, "learning_rate": 3.5332905708787683e-06, "loss": 0.8935, "step": 26970 }, { "epoch": 0.3287509292774183, "grad_norm": 2.0901453495025635, "learning_rate": 3.532969852469532e-06, "loss": 0.9406, "step": 26975 }, { "epoch": 0.32881186550156605, "grad_norm": 1.7294139862060547, "learning_rate": 3.532649134060295e-06, "loss": 0.8315, "step": 26980 }, { "epoch": 0.32887280172571387, "grad_norm": 2.2363648414611816, "learning_rate": 3.5323284156510586e-06, "loss": 0.8434, "step": 26985 }, { "epoch": 0.3289337379498617, "grad_norm": 2.6842246055603027, "learning_rate": 3.532007697241822e-06, "loss": 0.8374, "step": 26990 }, { "epoch": 0.32899467417400946, "grad_norm": 2.068795680999756, "learning_rate": 3.531686978832585e-06, "loss": 0.9278, "step": 26995 }, { "epoch": 0.3290556103981573, "grad_norm": 1.7890887260437012, "learning_rate": 3.5313662604233485e-06, "loss": 0.8404, "step": 27000 }, { "epoch": 0.3291165466223051, "grad_norm": 2.237856149673462, "learning_rate": 3.531045542014112e-06, "loss": 0.8454, "step": 27005 }, { "epoch": 0.32917748284645293, "grad_norm": 2.005871057510376, "learning_rate": 3.5307248236048754e-06, "loss": 0.7705, "step": 27010 }, { "epoch": 0.3292384190706007, "grad_norm": 2.3206470012664795, "learning_rate": 3.5304041051956384e-06, "loss": 0.8089, "step": 27015 }, { "epoch": 0.3292993552947485, "grad_norm": 2.1586782932281494, "learning_rate": 3.530083386786402e-06, "loss": 0.874, "step": 27020 }, { "epoch": 0.32936029151889634, "grad_norm": 1.8782240152359009, "learning_rate": 3.5297626683771653e-06, "loss": 0.8711, "step": 27025 }, { "epoch": 0.3294212277430441, "grad_norm": 2.0269758701324463, "learning_rate": 3.5294419499679283e-06, "loss": 0.8826, "step": 27030 }, { "epoch": 0.32948216396719193, "grad_norm": 1.713515281677246, "learning_rate": 3.529121231558692e-06, "loss": 0.7913, "step": 27035 }, { "epoch": 0.32954310019133976, "grad_norm": 1.8424557447433472, "learning_rate": 3.528800513149455e-06, "loss": 0.9606, "step": 27040 }, { "epoch": 0.3296040364154876, "grad_norm": 2.2491533756256104, "learning_rate": 3.5284797947402182e-06, "loss": 0.8316, "step": 27045 }, { "epoch": 0.32966497263963535, "grad_norm": 1.6406397819519043, "learning_rate": 3.5281590763309812e-06, "loss": 0.8377, "step": 27050 }, { "epoch": 0.32972590886378317, "grad_norm": 2.0290544033050537, "learning_rate": 3.527838357921745e-06, "loss": 0.8272, "step": 27055 }, { "epoch": 0.329786845087931, "grad_norm": 2.0230443477630615, "learning_rate": 3.527517639512508e-06, "loss": 0.8919, "step": 27060 }, { "epoch": 0.32984778131207876, "grad_norm": 2.108262062072754, "learning_rate": 3.5271969211032716e-06, "loss": 0.8663, "step": 27065 }, { "epoch": 0.3299087175362266, "grad_norm": 1.7131932973861694, "learning_rate": 3.526876202694035e-06, "loss": 0.8169, "step": 27070 }, { "epoch": 0.3299696537603744, "grad_norm": 1.891013741493225, "learning_rate": 3.526555484284798e-06, "loss": 0.9106, "step": 27075 }, { "epoch": 0.33003058998452217, "grad_norm": 1.9245505332946777, "learning_rate": 3.5262347658755615e-06, "loss": 0.8714, "step": 27080 }, { "epoch": 0.33009152620867, "grad_norm": 1.8809255361557007, "learning_rate": 3.525914047466325e-06, "loss": 0.8776, "step": 27085 }, { "epoch": 0.3301524624328178, "grad_norm": 1.8781987428665161, "learning_rate": 3.5255933290570883e-06, "loss": 0.831, "step": 27090 }, { "epoch": 0.33021339865696564, "grad_norm": 1.6464358568191528, "learning_rate": 3.5252726106478514e-06, "loss": 0.8265, "step": 27095 }, { "epoch": 0.3302743348811134, "grad_norm": 1.7944754362106323, "learning_rate": 3.524951892238615e-06, "loss": 0.9264, "step": 27100 }, { "epoch": 0.33033527110526123, "grad_norm": 2.2908172607421875, "learning_rate": 3.5246311738293782e-06, "loss": 0.8898, "step": 27105 }, { "epoch": 0.33039620732940905, "grad_norm": 1.9310766458511353, "learning_rate": 3.5243104554201413e-06, "loss": 0.8593, "step": 27110 }, { "epoch": 0.3304571435535568, "grad_norm": 2.3715906143188477, "learning_rate": 3.523989737010905e-06, "loss": 0.8277, "step": 27115 }, { "epoch": 0.33051807977770464, "grad_norm": 2.187401533126831, "learning_rate": 3.523669018601668e-06, "loss": 0.8659, "step": 27120 }, { "epoch": 0.33057901600185247, "grad_norm": 1.988052248954773, "learning_rate": 3.523348300192431e-06, "loss": 0.8416, "step": 27125 }, { "epoch": 0.3306399522260003, "grad_norm": 2.133500576019287, "learning_rate": 3.523027581783194e-06, "loss": 0.8777, "step": 27130 }, { "epoch": 0.33070088845014806, "grad_norm": 2.140495538711548, "learning_rate": 3.522706863373958e-06, "loss": 0.8964, "step": 27135 }, { "epoch": 0.3307618246742959, "grad_norm": 1.9549524784088135, "learning_rate": 3.522386144964721e-06, "loss": 0.8833, "step": 27140 }, { "epoch": 0.3308227608984437, "grad_norm": 2.2014615535736084, "learning_rate": 3.5220654265554845e-06, "loss": 0.9094, "step": 27145 }, { "epoch": 0.33088369712259147, "grad_norm": 1.7768455743789673, "learning_rate": 3.521744708146248e-06, "loss": 0.9033, "step": 27150 }, { "epoch": 0.3309446333467393, "grad_norm": 1.8830461502075195, "learning_rate": 3.5214239897370114e-06, "loss": 0.8915, "step": 27155 }, { "epoch": 0.3310055695708871, "grad_norm": 1.808749794960022, "learning_rate": 3.5211032713277744e-06, "loss": 0.8405, "step": 27160 }, { "epoch": 0.33106650579503494, "grad_norm": 1.8506778478622437, "learning_rate": 3.520782552918538e-06, "loss": 0.8563, "step": 27165 }, { "epoch": 0.3311274420191827, "grad_norm": 2.4715416431427, "learning_rate": 3.5204618345093013e-06, "loss": 0.9366, "step": 27170 }, { "epoch": 0.33118837824333053, "grad_norm": 1.6310818195343018, "learning_rate": 3.5201411161000643e-06, "loss": 0.819, "step": 27175 }, { "epoch": 0.33124931446747835, "grad_norm": 2.0182392597198486, "learning_rate": 3.519820397690828e-06, "loss": 0.8204, "step": 27180 }, { "epoch": 0.3313102506916261, "grad_norm": 2.2881546020507812, "learning_rate": 3.519499679281591e-06, "loss": 0.8844, "step": 27185 }, { "epoch": 0.33137118691577394, "grad_norm": 1.9656658172607422, "learning_rate": 3.519178960872354e-06, "loss": 0.8072, "step": 27190 }, { "epoch": 0.33143212313992176, "grad_norm": 2.105130434036255, "learning_rate": 3.518858242463118e-06, "loss": 0.8125, "step": 27195 }, { "epoch": 0.3314930593640696, "grad_norm": 1.8327980041503906, "learning_rate": 3.518537524053881e-06, "loss": 0.7986, "step": 27200 }, { "epoch": 0.33155399558821735, "grad_norm": 1.7731564044952393, "learning_rate": 3.518216805644644e-06, "loss": 0.909, "step": 27205 }, { "epoch": 0.3316149318123652, "grad_norm": 1.9398630857467651, "learning_rate": 3.517896087235408e-06, "loss": 0.9082, "step": 27210 }, { "epoch": 0.331675868036513, "grad_norm": 1.8130160570144653, "learning_rate": 3.517575368826171e-06, "loss": 0.8081, "step": 27215 }, { "epoch": 0.33173680426066077, "grad_norm": 2.082362651824951, "learning_rate": 3.517254650416934e-06, "loss": 0.8804, "step": 27220 }, { "epoch": 0.3317977404848086, "grad_norm": 1.736883282661438, "learning_rate": 3.5169339320076974e-06, "loss": 0.9151, "step": 27225 }, { "epoch": 0.3318586767089564, "grad_norm": 1.830662488937378, "learning_rate": 3.516613213598461e-06, "loss": 0.8591, "step": 27230 }, { "epoch": 0.33191961293310424, "grad_norm": 1.6794347763061523, "learning_rate": 3.5162924951892243e-06, "loss": 0.8059, "step": 27235 }, { "epoch": 0.331980549157252, "grad_norm": 1.9460138082504272, "learning_rate": 3.5159717767799873e-06, "loss": 0.8181, "step": 27240 }, { "epoch": 0.3320414853813998, "grad_norm": 1.86909019947052, "learning_rate": 3.5156510583707508e-06, "loss": 0.8149, "step": 27245 }, { "epoch": 0.33210242160554765, "grad_norm": 2.0156009197235107, "learning_rate": 3.515330339961514e-06, "loss": 0.8353, "step": 27250 }, { "epoch": 0.3321633578296954, "grad_norm": 2.06481671333313, "learning_rate": 3.515009621552277e-06, "loss": 0.802, "step": 27255 }, { "epoch": 0.33222429405384324, "grad_norm": 1.9518617391586304, "learning_rate": 3.514688903143041e-06, "loss": 0.9076, "step": 27260 }, { "epoch": 0.33228523027799106, "grad_norm": 2.4231462478637695, "learning_rate": 3.514368184733804e-06, "loss": 0.7988, "step": 27265 }, { "epoch": 0.3323461665021389, "grad_norm": 1.7437669038772583, "learning_rate": 3.514047466324567e-06, "loss": 0.9124, "step": 27270 }, { "epoch": 0.33240710272628665, "grad_norm": 1.833422303199768, "learning_rate": 3.513726747915331e-06, "loss": 0.8452, "step": 27275 }, { "epoch": 0.3324680389504345, "grad_norm": 2.5068719387054443, "learning_rate": 3.513406029506094e-06, "loss": 0.9046, "step": 27280 }, { "epoch": 0.3325289751745823, "grad_norm": 1.8085238933563232, "learning_rate": 3.513085311096857e-06, "loss": 0.8324, "step": 27285 }, { "epoch": 0.33258991139873006, "grad_norm": 1.9981263875961304, "learning_rate": 3.512764592687621e-06, "loss": 0.8379, "step": 27290 }, { "epoch": 0.3326508476228779, "grad_norm": 1.9481881856918335, "learning_rate": 3.512443874278384e-06, "loss": 0.879, "step": 27295 }, { "epoch": 0.3327117838470257, "grad_norm": 1.9922943115234375, "learning_rate": 3.512123155869147e-06, "loss": 0.842, "step": 27300 }, { "epoch": 0.33277272007117353, "grad_norm": 1.7937337160110474, "learning_rate": 3.5118024374599103e-06, "loss": 0.8586, "step": 27305 }, { "epoch": 0.3328336562953213, "grad_norm": 1.5861866474151611, "learning_rate": 3.5114817190506738e-06, "loss": 0.8865, "step": 27310 }, { "epoch": 0.3328945925194691, "grad_norm": 2.0009875297546387, "learning_rate": 3.5111610006414372e-06, "loss": 0.9291, "step": 27315 }, { "epoch": 0.33295552874361695, "grad_norm": 2.2340810298919678, "learning_rate": 3.5108402822322002e-06, "loss": 0.9187, "step": 27320 }, { "epoch": 0.3330164649677647, "grad_norm": 2.1473069190979004, "learning_rate": 3.5105195638229637e-06, "loss": 0.854, "step": 27325 }, { "epoch": 0.33307740119191254, "grad_norm": 2.2179481983184814, "learning_rate": 3.510198845413727e-06, "loss": 0.8335, "step": 27330 }, { "epoch": 0.33313833741606036, "grad_norm": 1.9668292999267578, "learning_rate": 3.50987812700449e-06, "loss": 0.794, "step": 27335 }, { "epoch": 0.3331992736402082, "grad_norm": 2.015955924987793, "learning_rate": 3.509557408595254e-06, "loss": 0.9235, "step": 27340 }, { "epoch": 0.33326020986435595, "grad_norm": 1.7632473707199097, "learning_rate": 3.509236690186017e-06, "loss": 0.8526, "step": 27345 }, { "epoch": 0.3333211460885038, "grad_norm": 2.098240613937378, "learning_rate": 3.50891597177678e-06, "loss": 0.8052, "step": 27350 }, { "epoch": 0.3333820823126516, "grad_norm": 2.500605583190918, "learning_rate": 3.508595253367544e-06, "loss": 0.8279, "step": 27355 }, { "epoch": 0.33344301853679936, "grad_norm": 2.033046245574951, "learning_rate": 3.508274534958307e-06, "loss": 0.8577, "step": 27360 }, { "epoch": 0.3335039547609472, "grad_norm": 1.8385417461395264, "learning_rate": 3.50795381654907e-06, "loss": 0.8126, "step": 27365 }, { "epoch": 0.333564890985095, "grad_norm": 1.9307386875152588, "learning_rate": 3.507633098139834e-06, "loss": 0.876, "step": 27370 }, { "epoch": 0.33362582720924283, "grad_norm": 2.1301746368408203, "learning_rate": 3.507312379730597e-06, "loss": 0.8667, "step": 27375 }, { "epoch": 0.3336867634333906, "grad_norm": 1.6361509561538696, "learning_rate": 3.5069916613213603e-06, "loss": 0.8503, "step": 27380 }, { "epoch": 0.3337476996575384, "grad_norm": 2.0880794525146484, "learning_rate": 3.5066709429121233e-06, "loss": 0.919, "step": 27385 }, { "epoch": 0.33380863588168624, "grad_norm": 1.9805413484573364, "learning_rate": 3.5063502245028867e-06, "loss": 0.8806, "step": 27390 }, { "epoch": 0.333869572105834, "grad_norm": 1.6996397972106934, "learning_rate": 3.50602950609365e-06, "loss": 0.8358, "step": 27395 }, { "epoch": 0.33393050832998183, "grad_norm": 2.0084686279296875, "learning_rate": 3.505708787684413e-06, "loss": 0.8157, "step": 27400 }, { "epoch": 0.33399144455412966, "grad_norm": 2.0362253189086914, "learning_rate": 3.505388069275177e-06, "loss": 0.7793, "step": 27405 }, { "epoch": 0.3340523807782775, "grad_norm": 1.848746418952942, "learning_rate": 3.50506735086594e-06, "loss": 0.8126, "step": 27410 }, { "epoch": 0.33411331700242525, "grad_norm": 2.1130902767181396, "learning_rate": 3.504746632456703e-06, "loss": 0.8077, "step": 27415 }, { "epoch": 0.33417425322657307, "grad_norm": 1.7833690643310547, "learning_rate": 3.504425914047467e-06, "loss": 0.9181, "step": 27420 }, { "epoch": 0.3342351894507209, "grad_norm": 2.380023717880249, "learning_rate": 3.50410519563823e-06, "loss": 0.8287, "step": 27425 }, { "epoch": 0.33429612567486866, "grad_norm": 1.8645331859588623, "learning_rate": 3.503784477228993e-06, "loss": 0.8331, "step": 27430 }, { "epoch": 0.3343570618990165, "grad_norm": 2.0177078247070312, "learning_rate": 3.503463758819757e-06, "loss": 0.9009, "step": 27435 }, { "epoch": 0.3344179981231643, "grad_norm": 1.9519189596176147, "learning_rate": 3.50314304041052e-06, "loss": 0.9076, "step": 27440 }, { "epoch": 0.33447893434731213, "grad_norm": 1.9667025804519653, "learning_rate": 3.502822322001283e-06, "loss": 0.8475, "step": 27445 }, { "epoch": 0.3345398705714599, "grad_norm": 1.8891689777374268, "learning_rate": 3.5025016035920467e-06, "loss": 0.9033, "step": 27450 }, { "epoch": 0.3346008067956077, "grad_norm": 1.9432954788208008, "learning_rate": 3.5021808851828097e-06, "loss": 0.9021, "step": 27455 }, { "epoch": 0.33466174301975554, "grad_norm": 2.2760448455810547, "learning_rate": 3.501860166773573e-06, "loss": 0.8392, "step": 27460 }, { "epoch": 0.3347226792439033, "grad_norm": 1.89739990234375, "learning_rate": 3.501539448364336e-06, "loss": 0.8673, "step": 27465 }, { "epoch": 0.33478361546805113, "grad_norm": 2.124958038330078, "learning_rate": 3.5012187299550996e-06, "loss": 0.9004, "step": 27470 }, { "epoch": 0.33484455169219896, "grad_norm": 2.2934939861297607, "learning_rate": 3.500898011545863e-06, "loss": 0.8159, "step": 27475 }, { "epoch": 0.3349054879163468, "grad_norm": 1.8190809488296509, "learning_rate": 3.500577293136626e-06, "loss": 0.856, "step": 27480 }, { "epoch": 0.33496642414049455, "grad_norm": 2.4282939434051514, "learning_rate": 3.50025657472739e-06, "loss": 0.8226, "step": 27485 }, { "epoch": 0.33502736036464237, "grad_norm": 2.2853691577911377, "learning_rate": 3.499935856318153e-06, "loss": 0.8928, "step": 27490 }, { "epoch": 0.3350882965887902, "grad_norm": 1.976051688194275, "learning_rate": 3.499615137908916e-06, "loss": 0.8562, "step": 27495 }, { "epoch": 0.33514923281293796, "grad_norm": 1.6721861362457275, "learning_rate": 3.49929441949968e-06, "loss": 0.8485, "step": 27500 }, { "epoch": 0.3352101690370858, "grad_norm": 1.8344861268997192, "learning_rate": 3.498973701090443e-06, "loss": 0.865, "step": 27505 }, { "epoch": 0.3352711052612336, "grad_norm": 2.055629014968872, "learning_rate": 3.498652982681206e-06, "loss": 0.8737, "step": 27510 }, { "epoch": 0.3353320414853814, "grad_norm": 1.977081298828125, "learning_rate": 3.4983322642719698e-06, "loss": 0.8964, "step": 27515 }, { "epoch": 0.3353929777095292, "grad_norm": 2.0329556465148926, "learning_rate": 3.4980115458627328e-06, "loss": 0.8802, "step": 27520 }, { "epoch": 0.335453913933677, "grad_norm": 1.924599289894104, "learning_rate": 3.4976908274534958e-06, "loss": 0.8188, "step": 27525 }, { "epoch": 0.33551485015782484, "grad_norm": 2.1366727352142334, "learning_rate": 3.4973701090442596e-06, "loss": 0.837, "step": 27530 }, { "epoch": 0.3355757863819726, "grad_norm": 1.9354944229125977, "learning_rate": 3.4970493906350227e-06, "loss": 0.8385, "step": 27535 }, { "epoch": 0.33563672260612043, "grad_norm": 1.8499152660369873, "learning_rate": 3.496728672225786e-06, "loss": 0.8864, "step": 27540 }, { "epoch": 0.33569765883026825, "grad_norm": 1.9206175804138184, "learning_rate": 3.4964079538165495e-06, "loss": 0.871, "step": 27545 }, { "epoch": 0.3357585950544161, "grad_norm": 1.6734304428100586, "learning_rate": 3.4960872354073126e-06, "loss": 0.8201, "step": 27550 }, { "epoch": 0.33581953127856384, "grad_norm": 2.0957515239715576, "learning_rate": 3.495766516998076e-06, "loss": 0.8277, "step": 27555 }, { "epoch": 0.33588046750271167, "grad_norm": 1.8974602222442627, "learning_rate": 3.495445798588839e-06, "loss": 0.9357, "step": 27560 }, { "epoch": 0.3359414037268595, "grad_norm": 1.9691826105117798, "learning_rate": 3.495125080179603e-06, "loss": 0.8663, "step": 27565 }, { "epoch": 0.33600233995100726, "grad_norm": 2.0406970977783203, "learning_rate": 3.494804361770366e-06, "loss": 0.8914, "step": 27570 }, { "epoch": 0.3360632761751551, "grad_norm": 2.055410146713257, "learning_rate": 3.494483643361129e-06, "loss": 0.8256, "step": 27575 }, { "epoch": 0.3361242123993029, "grad_norm": 2.2147650718688965, "learning_rate": 3.4941629249518928e-06, "loss": 0.7516, "step": 27580 }, { "epoch": 0.33618514862345067, "grad_norm": 2.2130868434906006, "learning_rate": 3.493842206542656e-06, "loss": 0.8913, "step": 27585 }, { "epoch": 0.3362460848475985, "grad_norm": 2.0639991760253906, "learning_rate": 3.493521488133419e-06, "loss": 0.8316, "step": 27590 }, { "epoch": 0.3363070210717463, "grad_norm": 1.6360386610031128, "learning_rate": 3.4932007697241827e-06, "loss": 0.8779, "step": 27595 }, { "epoch": 0.33636795729589414, "grad_norm": 1.769686222076416, "learning_rate": 3.4928800513149457e-06, "loss": 0.7978, "step": 27600 }, { "epoch": 0.3364288935200419, "grad_norm": 1.8863458633422852, "learning_rate": 3.4925593329057087e-06, "loss": 0.8348, "step": 27605 }, { "epoch": 0.33648982974418973, "grad_norm": 2.1728930473327637, "learning_rate": 3.4922386144964726e-06, "loss": 0.8211, "step": 27610 }, { "epoch": 0.33655076596833755, "grad_norm": 1.746979832649231, "learning_rate": 3.4919178960872356e-06, "loss": 0.9035, "step": 27615 }, { "epoch": 0.3366117021924853, "grad_norm": 1.9722219705581665, "learning_rate": 3.491597177677999e-06, "loss": 0.837, "step": 27620 }, { "epoch": 0.33667263841663314, "grad_norm": 2.0177536010742188, "learning_rate": 3.4912764592687625e-06, "loss": 0.8189, "step": 27625 }, { "epoch": 0.33673357464078096, "grad_norm": 1.8872840404510498, "learning_rate": 3.490955740859526e-06, "loss": 0.7922, "step": 27630 }, { "epoch": 0.3367945108649288, "grad_norm": 2.081433057785034, "learning_rate": 3.490635022450289e-06, "loss": 0.8268, "step": 27635 }, { "epoch": 0.33685544708907655, "grad_norm": 1.9807085990905762, "learning_rate": 3.490314304041052e-06, "loss": 0.8953, "step": 27640 }, { "epoch": 0.3369163833132244, "grad_norm": 1.890512228012085, "learning_rate": 3.489993585631816e-06, "loss": 0.8218, "step": 27645 }, { "epoch": 0.3369773195373722, "grad_norm": 2.10831618309021, "learning_rate": 3.489672867222579e-06, "loss": 0.8366, "step": 27650 }, { "epoch": 0.33703825576151997, "grad_norm": 2.3760323524475098, "learning_rate": 3.489352148813342e-06, "loss": 0.8837, "step": 27655 }, { "epoch": 0.3370991919856678, "grad_norm": 1.8922337293624878, "learning_rate": 3.4890314304041057e-06, "loss": 0.9061, "step": 27660 }, { "epoch": 0.3371601282098156, "grad_norm": 2.3085365295410156, "learning_rate": 3.4887107119948687e-06, "loss": 0.8583, "step": 27665 }, { "epoch": 0.33722106443396344, "grad_norm": 1.7353068590164185, "learning_rate": 3.4883899935856317e-06, "loss": 0.8299, "step": 27670 }, { "epoch": 0.3372820006581112, "grad_norm": 2.0350773334503174, "learning_rate": 3.4880692751763956e-06, "loss": 0.8479, "step": 27675 }, { "epoch": 0.337342936882259, "grad_norm": 1.9712862968444824, "learning_rate": 3.4877485567671586e-06, "loss": 0.8529, "step": 27680 }, { "epoch": 0.33740387310640685, "grad_norm": 1.7687700986862183, "learning_rate": 3.487427838357922e-06, "loss": 0.883, "step": 27685 }, { "epoch": 0.3374648093305546, "grad_norm": 2.804717540740967, "learning_rate": 3.4871071199486855e-06, "loss": 0.8383, "step": 27690 }, { "epoch": 0.33752574555470244, "grad_norm": 1.753212809562683, "learning_rate": 3.4867864015394485e-06, "loss": 0.8484, "step": 27695 }, { "epoch": 0.33758668177885026, "grad_norm": 2.0298256874084473, "learning_rate": 3.486465683130212e-06, "loss": 0.8334, "step": 27700 }, { "epoch": 0.3376476180029981, "grad_norm": 1.9749038219451904, "learning_rate": 3.4861449647209754e-06, "loss": 0.8436, "step": 27705 }, { "epoch": 0.33770855422714585, "grad_norm": 2.1448593139648438, "learning_rate": 3.485824246311739e-06, "loss": 0.9259, "step": 27710 }, { "epoch": 0.3377694904512937, "grad_norm": 2.163451671600342, "learning_rate": 3.485503527902502e-06, "loss": 0.8742, "step": 27715 }, { "epoch": 0.3378304266754415, "grad_norm": 1.993216633796692, "learning_rate": 3.485182809493265e-06, "loss": 0.9224, "step": 27720 }, { "epoch": 0.33789136289958926, "grad_norm": 1.9301044940948486, "learning_rate": 3.4848620910840287e-06, "loss": 0.8561, "step": 27725 }, { "epoch": 0.3379522991237371, "grad_norm": 1.7772979736328125, "learning_rate": 3.4845413726747918e-06, "loss": 0.8251, "step": 27730 }, { "epoch": 0.3380132353478849, "grad_norm": 3.2619924545288086, "learning_rate": 3.4842206542655548e-06, "loss": 0.8488, "step": 27735 }, { "epoch": 0.33807417157203273, "grad_norm": 2.0641815662384033, "learning_rate": 3.4838999358563186e-06, "loss": 0.9582, "step": 27740 }, { "epoch": 0.3381351077961805, "grad_norm": 2.100659132003784, "learning_rate": 3.4835792174470817e-06, "loss": 0.9265, "step": 27745 }, { "epoch": 0.3381960440203283, "grad_norm": 1.907747745513916, "learning_rate": 3.4832584990378447e-06, "loss": 0.864, "step": 27750 }, { "epoch": 0.33825698024447615, "grad_norm": 2.18819260597229, "learning_rate": 3.4829377806286085e-06, "loss": 0.8649, "step": 27755 }, { "epoch": 0.3383179164686239, "grad_norm": 2.10894513130188, "learning_rate": 3.4826170622193716e-06, "loss": 0.8472, "step": 27760 }, { "epoch": 0.33837885269277174, "grad_norm": 2.3608851432800293, "learning_rate": 3.482296343810135e-06, "loss": 0.9171, "step": 27765 }, { "epoch": 0.33843978891691956, "grad_norm": 1.9564192295074463, "learning_rate": 3.4819756254008984e-06, "loss": 0.8667, "step": 27770 }, { "epoch": 0.3385007251410674, "grad_norm": 2.0379834175109863, "learning_rate": 3.4816549069916614e-06, "loss": 0.8967, "step": 27775 }, { "epoch": 0.33856166136521515, "grad_norm": 1.946987271308899, "learning_rate": 3.481334188582425e-06, "loss": 0.8557, "step": 27780 }, { "epoch": 0.338622597589363, "grad_norm": 1.9089789390563965, "learning_rate": 3.4810134701731883e-06, "loss": 0.9084, "step": 27785 }, { "epoch": 0.3386835338135108, "grad_norm": 1.9716246128082275, "learning_rate": 3.4806927517639518e-06, "loss": 0.8821, "step": 27790 }, { "epoch": 0.33874447003765856, "grad_norm": 2.216315984725952, "learning_rate": 3.4803720333547148e-06, "loss": 0.9001, "step": 27795 }, { "epoch": 0.3388054062618064, "grad_norm": 1.8293089866638184, "learning_rate": 3.4800513149454782e-06, "loss": 0.8491, "step": 27800 }, { "epoch": 0.3388663424859542, "grad_norm": 2.278064727783203, "learning_rate": 3.4797305965362417e-06, "loss": 0.8486, "step": 27805 }, { "epoch": 0.33892727871010203, "grad_norm": 1.9571294784545898, "learning_rate": 3.4794098781270047e-06, "loss": 0.7577, "step": 27810 }, { "epoch": 0.3389882149342498, "grad_norm": 1.6623108386993408, "learning_rate": 3.4790891597177677e-06, "loss": 0.8721, "step": 27815 }, { "epoch": 0.3390491511583976, "grad_norm": 1.6346815824508667, "learning_rate": 3.4787684413085316e-06, "loss": 0.7849, "step": 27820 }, { "epoch": 0.33911008738254544, "grad_norm": 2.2375004291534424, "learning_rate": 3.4784477228992946e-06, "loss": 0.8676, "step": 27825 }, { "epoch": 0.3391710236066932, "grad_norm": 2.0856082439422607, "learning_rate": 3.4781270044900576e-06, "loss": 0.8397, "step": 27830 }, { "epoch": 0.33923195983084103, "grad_norm": 1.9865195751190186, "learning_rate": 3.4778062860808215e-06, "loss": 0.9676, "step": 27835 }, { "epoch": 0.33929289605498886, "grad_norm": 2.327235698699951, "learning_rate": 3.4774855676715845e-06, "loss": 0.7826, "step": 27840 }, { "epoch": 0.3393538322791367, "grad_norm": 1.9064582586288452, "learning_rate": 3.477164849262348e-06, "loss": 0.8467, "step": 27845 }, { "epoch": 0.33941476850328445, "grad_norm": 2.163689374923706, "learning_rate": 3.4768441308531114e-06, "loss": 0.8831, "step": 27850 }, { "epoch": 0.33947570472743227, "grad_norm": 1.8229340314865112, "learning_rate": 3.476523412443875e-06, "loss": 0.8727, "step": 27855 }, { "epoch": 0.3395366409515801, "grad_norm": 2.139338970184326, "learning_rate": 3.476202694034638e-06, "loss": 0.8751, "step": 27860 }, { "epoch": 0.33959757717572786, "grad_norm": 1.9906492233276367, "learning_rate": 3.4758819756254013e-06, "loss": 0.8981, "step": 27865 }, { "epoch": 0.3396585133998757, "grad_norm": 1.8370963335037231, "learning_rate": 3.4755612572161647e-06, "loss": 0.8135, "step": 27870 }, { "epoch": 0.3397194496240235, "grad_norm": 2.1886239051818848, "learning_rate": 3.4752405388069277e-06, "loss": 0.8062, "step": 27875 }, { "epoch": 0.33978038584817133, "grad_norm": 2.316134214401245, "learning_rate": 3.4749198203976916e-06, "loss": 0.816, "step": 27880 }, { "epoch": 0.3398413220723191, "grad_norm": 1.9166828393936157, "learning_rate": 3.4745991019884546e-06, "loss": 0.8226, "step": 27885 }, { "epoch": 0.3399022582964669, "grad_norm": 2.0012099742889404, "learning_rate": 3.4742783835792176e-06, "loss": 0.8789, "step": 27890 }, { "epoch": 0.33996319452061474, "grad_norm": 1.6331483125686646, "learning_rate": 3.4739576651699806e-06, "loss": 0.7984, "step": 27895 }, { "epoch": 0.3400241307447625, "grad_norm": 1.9815330505371094, "learning_rate": 3.4736369467607445e-06, "loss": 0.879, "step": 27900 }, { "epoch": 0.34008506696891033, "grad_norm": 1.9247386455535889, "learning_rate": 3.4733162283515075e-06, "loss": 0.823, "step": 27905 }, { "epoch": 0.34014600319305816, "grad_norm": 2.0883796215057373, "learning_rate": 3.472995509942271e-06, "loss": 0.8501, "step": 27910 }, { "epoch": 0.340206939417206, "grad_norm": 1.8680139780044556, "learning_rate": 3.4726747915330344e-06, "loss": 0.9042, "step": 27915 }, { "epoch": 0.34026787564135375, "grad_norm": 1.8753060102462769, "learning_rate": 3.4723540731237974e-06, "loss": 0.8162, "step": 27920 }, { "epoch": 0.34032881186550157, "grad_norm": 1.9414483308792114, "learning_rate": 3.472033354714561e-06, "loss": 0.899, "step": 27925 }, { "epoch": 0.3403897480896494, "grad_norm": 1.9792143106460571, "learning_rate": 3.4717126363053243e-06, "loss": 0.8541, "step": 27930 }, { "epoch": 0.34045068431379716, "grad_norm": 2.2320539951324463, "learning_rate": 3.4713919178960877e-06, "loss": 0.8669, "step": 27935 }, { "epoch": 0.340511620537945, "grad_norm": 2.1481080055236816, "learning_rate": 3.4710711994868507e-06, "loss": 0.939, "step": 27940 }, { "epoch": 0.3405725567620928, "grad_norm": 2.560033082962036, "learning_rate": 3.470750481077614e-06, "loss": 0.827, "step": 27945 }, { "epoch": 0.3406334929862406, "grad_norm": 1.7857937812805176, "learning_rate": 3.4704297626683776e-06, "loss": 0.875, "step": 27950 }, { "epoch": 0.3406944292103884, "grad_norm": 1.8583111763000488, "learning_rate": 3.4701090442591406e-06, "loss": 0.879, "step": 27955 }, { "epoch": 0.3407553654345362, "grad_norm": 1.7763360738754272, "learning_rate": 3.4697883258499045e-06, "loss": 0.7718, "step": 27960 }, { "epoch": 0.34081630165868404, "grad_norm": 1.8723583221435547, "learning_rate": 3.4694676074406675e-06, "loss": 0.8025, "step": 27965 }, { "epoch": 0.3408772378828318, "grad_norm": 2.097410202026367, "learning_rate": 3.4691468890314305e-06, "loss": 0.9213, "step": 27970 }, { "epoch": 0.34093817410697963, "grad_norm": 1.9306092262268066, "learning_rate": 3.4688261706221936e-06, "loss": 0.8413, "step": 27975 }, { "epoch": 0.34099911033112745, "grad_norm": 2.8160574436187744, "learning_rate": 3.4685054522129574e-06, "loss": 0.9311, "step": 27980 }, { "epoch": 0.3410600465552753, "grad_norm": 2.0737385749816895, "learning_rate": 3.4681847338037204e-06, "loss": 0.8438, "step": 27985 }, { "epoch": 0.34112098277942304, "grad_norm": 2.03308367729187, "learning_rate": 3.467864015394484e-06, "loss": 0.842, "step": 27990 }, { "epoch": 0.34118191900357087, "grad_norm": 2.19490385055542, "learning_rate": 3.4675432969852473e-06, "loss": 0.8098, "step": 27995 }, { "epoch": 0.3412428552277187, "grad_norm": 1.8324058055877686, "learning_rate": 3.4672225785760103e-06, "loss": 0.9155, "step": 28000 }, { "epoch": 0.34130379145186646, "grad_norm": 2.0143747329711914, "learning_rate": 3.4669018601667738e-06, "loss": 0.8471, "step": 28005 }, { "epoch": 0.3413647276760143, "grad_norm": 2.0600550174713135, "learning_rate": 3.4665811417575372e-06, "loss": 0.8356, "step": 28010 }, { "epoch": 0.3414256639001621, "grad_norm": 2.079843759536743, "learning_rate": 3.4662604233483007e-06, "loss": 0.9138, "step": 28015 }, { "epoch": 0.3414866001243099, "grad_norm": 1.9858838319778442, "learning_rate": 3.4659397049390637e-06, "loss": 0.8379, "step": 28020 }, { "epoch": 0.3415475363484577, "grad_norm": 2.028299570083618, "learning_rate": 3.465618986529827e-06, "loss": 0.866, "step": 28025 }, { "epoch": 0.3416084725726055, "grad_norm": 1.8480626344680786, "learning_rate": 3.4652982681205905e-06, "loss": 0.8218, "step": 28030 }, { "epoch": 0.34166940879675334, "grad_norm": 1.7212824821472168, "learning_rate": 3.4649775497113536e-06, "loss": 0.8937, "step": 28035 }, { "epoch": 0.3417303450209011, "grad_norm": 1.8553359508514404, "learning_rate": 3.4646568313021174e-06, "loss": 0.7893, "step": 28040 }, { "epoch": 0.34179128124504893, "grad_norm": 1.640721082687378, "learning_rate": 3.4643361128928804e-06, "loss": 0.8657, "step": 28045 }, { "epoch": 0.34185221746919675, "grad_norm": 2.6541197299957275, "learning_rate": 3.4640153944836435e-06, "loss": 0.8606, "step": 28050 }, { "epoch": 0.3419131536933445, "grad_norm": 2.325136423110962, "learning_rate": 3.4636946760744065e-06, "loss": 0.8742, "step": 28055 }, { "epoch": 0.34197408991749234, "grad_norm": 1.7830641269683838, "learning_rate": 3.4633739576651703e-06, "loss": 0.8521, "step": 28060 }, { "epoch": 0.34203502614164016, "grad_norm": 2.3791310787200928, "learning_rate": 3.4630532392559334e-06, "loss": 0.7924, "step": 28065 }, { "epoch": 0.342095962365788, "grad_norm": 2.166959762573242, "learning_rate": 3.462732520846697e-06, "loss": 0.8514, "step": 28070 }, { "epoch": 0.34215689858993575, "grad_norm": 2.138227701187134, "learning_rate": 3.4624118024374602e-06, "loss": 0.939, "step": 28075 }, { "epoch": 0.3422178348140836, "grad_norm": 2.1417531967163086, "learning_rate": 3.4620910840282233e-06, "loss": 0.8579, "step": 28080 }, { "epoch": 0.3422787710382314, "grad_norm": 1.7697771787643433, "learning_rate": 3.4617703656189867e-06, "loss": 0.8267, "step": 28085 }, { "epoch": 0.34233970726237917, "grad_norm": 2.3895883560180664, "learning_rate": 3.46144964720975e-06, "loss": 0.8499, "step": 28090 }, { "epoch": 0.342400643486527, "grad_norm": 1.9259158372879028, "learning_rate": 3.4611289288005136e-06, "loss": 0.9061, "step": 28095 }, { "epoch": 0.3424615797106748, "grad_norm": 1.9840290546417236, "learning_rate": 3.4608082103912766e-06, "loss": 0.8419, "step": 28100 }, { "epoch": 0.34252251593482264, "grad_norm": 1.8293392658233643, "learning_rate": 3.4604874919820405e-06, "loss": 0.9015, "step": 28105 }, { "epoch": 0.3425834521589704, "grad_norm": 1.844291090965271, "learning_rate": 3.4601667735728035e-06, "loss": 0.9353, "step": 28110 }, { "epoch": 0.3426443883831182, "grad_norm": 2.106790065765381, "learning_rate": 3.4598460551635665e-06, "loss": 0.8173, "step": 28115 }, { "epoch": 0.34270532460726605, "grad_norm": 2.313953161239624, "learning_rate": 3.4595253367543304e-06, "loss": 0.8485, "step": 28120 }, { "epoch": 0.3427662608314138, "grad_norm": 2.0292160511016846, "learning_rate": 3.4592046183450934e-06, "loss": 0.8344, "step": 28125 }, { "epoch": 0.34282719705556164, "grad_norm": 2.1462411880493164, "learning_rate": 3.4588838999358564e-06, "loss": 0.8905, "step": 28130 }, { "epoch": 0.34288813327970946, "grad_norm": 2.0500028133392334, "learning_rate": 3.4585631815266203e-06, "loss": 0.8218, "step": 28135 }, { "epoch": 0.3429490695038573, "grad_norm": 2.5527541637420654, "learning_rate": 3.4582424631173833e-06, "loss": 0.8246, "step": 28140 }, { "epoch": 0.34301000572800505, "grad_norm": 1.8119878768920898, "learning_rate": 3.4579217447081463e-06, "loss": 0.8059, "step": 28145 }, { "epoch": 0.3430709419521529, "grad_norm": 2.0189101696014404, "learning_rate": 3.4576010262989097e-06, "loss": 0.8812, "step": 28150 }, { "epoch": 0.3431318781763007, "grad_norm": 1.722844123840332, "learning_rate": 3.457280307889673e-06, "loss": 0.7893, "step": 28155 }, { "epoch": 0.34319281440044846, "grad_norm": 1.829987645149231, "learning_rate": 3.4569595894804366e-06, "loss": 0.8652, "step": 28160 }, { "epoch": 0.3432537506245963, "grad_norm": 2.118199586868286, "learning_rate": 3.4566388710711996e-06, "loss": 0.9031, "step": 28165 }, { "epoch": 0.3433146868487441, "grad_norm": 1.8970239162445068, "learning_rate": 3.456318152661963e-06, "loss": 0.863, "step": 28170 }, { "epoch": 0.34337562307289193, "grad_norm": 1.980122685432434, "learning_rate": 3.4559974342527265e-06, "loss": 0.8322, "step": 28175 }, { "epoch": 0.3434365592970397, "grad_norm": 1.6620376110076904, "learning_rate": 3.4556767158434895e-06, "loss": 0.7984, "step": 28180 }, { "epoch": 0.3434974955211875, "grad_norm": 2.6322946548461914, "learning_rate": 3.4553559974342534e-06, "loss": 0.9143, "step": 28185 }, { "epoch": 0.34355843174533535, "grad_norm": 2.2665414810180664, "learning_rate": 3.4550352790250164e-06, "loss": 0.7994, "step": 28190 }, { "epoch": 0.3436193679694831, "grad_norm": 1.7358280420303345, "learning_rate": 3.4547145606157794e-06, "loss": 0.8076, "step": 28195 }, { "epoch": 0.34368030419363094, "grad_norm": 1.834221363067627, "learning_rate": 3.4543938422065433e-06, "loss": 0.8511, "step": 28200 }, { "epoch": 0.34374124041777876, "grad_norm": 2.5573487281799316, "learning_rate": 3.4540731237973063e-06, "loss": 0.9208, "step": 28205 }, { "epoch": 0.3438021766419266, "grad_norm": 1.8522175550460815, "learning_rate": 3.4537524053880693e-06, "loss": 0.8601, "step": 28210 }, { "epoch": 0.34386311286607435, "grad_norm": 1.8523821830749512, "learning_rate": 3.453431686978833e-06, "loss": 0.878, "step": 28215 }, { "epoch": 0.3439240490902222, "grad_norm": 2.058311939239502, "learning_rate": 3.453110968569596e-06, "loss": 0.8465, "step": 28220 }, { "epoch": 0.34398498531437, "grad_norm": 2.091525077819824, "learning_rate": 3.4527902501603592e-06, "loss": 0.903, "step": 28225 }, { "epoch": 0.34404592153851776, "grad_norm": 2.170583724975586, "learning_rate": 3.4524695317511227e-06, "loss": 0.8951, "step": 28230 }, { "epoch": 0.3441068577626656, "grad_norm": 1.973597526550293, "learning_rate": 3.452148813341886e-06, "loss": 0.8331, "step": 28235 }, { "epoch": 0.3441677939868134, "grad_norm": 1.7353785037994385, "learning_rate": 3.4518280949326495e-06, "loss": 0.8835, "step": 28240 }, { "epoch": 0.34422873021096123, "grad_norm": 2.4653513431549072, "learning_rate": 3.4515073765234126e-06, "loss": 0.8288, "step": 28245 }, { "epoch": 0.344289666435109, "grad_norm": 1.792108178138733, "learning_rate": 3.451186658114176e-06, "loss": 0.8859, "step": 28250 }, { "epoch": 0.3443506026592568, "grad_norm": 1.864908218383789, "learning_rate": 3.4508659397049394e-06, "loss": 0.9145, "step": 28255 }, { "epoch": 0.34441153888340464, "grad_norm": 1.8611546754837036, "learning_rate": 3.4505452212957025e-06, "loss": 0.8324, "step": 28260 }, { "epoch": 0.3444724751075524, "grad_norm": 2.051558017730713, "learning_rate": 3.4502245028864663e-06, "loss": 0.8773, "step": 28265 }, { "epoch": 0.34453341133170023, "grad_norm": 2.4593772888183594, "learning_rate": 3.4499037844772293e-06, "loss": 0.853, "step": 28270 }, { "epoch": 0.34459434755584806, "grad_norm": 1.9509028196334839, "learning_rate": 3.4495830660679923e-06, "loss": 0.885, "step": 28275 }, { "epoch": 0.3446552837799959, "grad_norm": 1.6550183296203613, "learning_rate": 3.4492623476587562e-06, "loss": 0.8532, "step": 28280 }, { "epoch": 0.34471622000414365, "grad_norm": 1.8407816886901855, "learning_rate": 3.4489416292495192e-06, "loss": 0.8094, "step": 28285 }, { "epoch": 0.34477715622829147, "grad_norm": 2.362889051437378, "learning_rate": 3.4486209108402822e-06, "loss": 0.8115, "step": 28290 }, { "epoch": 0.3448380924524393, "grad_norm": 1.7071418762207031, "learning_rate": 3.448300192431046e-06, "loss": 0.8011, "step": 28295 }, { "epoch": 0.34489902867658706, "grad_norm": 1.7200524806976318, "learning_rate": 3.447979474021809e-06, "loss": 0.7961, "step": 28300 }, { "epoch": 0.3449599649007349, "grad_norm": 2.2950706481933594, "learning_rate": 3.447658755612572e-06, "loss": 0.9026, "step": 28305 }, { "epoch": 0.3450209011248827, "grad_norm": 1.7846969366073608, "learning_rate": 3.4473380372033356e-06, "loss": 0.8118, "step": 28310 }, { "epoch": 0.34508183734903053, "grad_norm": 1.8693948984146118, "learning_rate": 3.447017318794099e-06, "loss": 0.908, "step": 28315 }, { "epoch": 0.3451427735731783, "grad_norm": 1.7104096412658691, "learning_rate": 3.4466966003848625e-06, "loss": 0.8182, "step": 28320 }, { "epoch": 0.3452037097973261, "grad_norm": 1.728878140449524, "learning_rate": 3.4463758819756255e-06, "loss": 0.8663, "step": 28325 }, { "epoch": 0.34526464602147394, "grad_norm": 1.9995733499526978, "learning_rate": 3.4460551635663893e-06, "loss": 0.7899, "step": 28330 }, { "epoch": 0.3453255822456217, "grad_norm": 1.9730732440948486, "learning_rate": 3.4457344451571524e-06, "loss": 0.9024, "step": 28335 }, { "epoch": 0.34538651846976953, "grad_norm": 1.9063365459442139, "learning_rate": 3.4454137267479154e-06, "loss": 0.8752, "step": 28340 }, { "epoch": 0.34544745469391736, "grad_norm": 2.0111122131347656, "learning_rate": 3.4450930083386792e-06, "loss": 0.8861, "step": 28345 }, { "epoch": 0.3455083909180652, "grad_norm": 1.8663233518600464, "learning_rate": 3.4447722899294423e-06, "loss": 0.8699, "step": 28350 }, { "epoch": 0.34556932714221295, "grad_norm": 2.0031681060791016, "learning_rate": 3.4444515715202053e-06, "loss": 0.8945, "step": 28355 }, { "epoch": 0.34563026336636077, "grad_norm": 2.7181084156036377, "learning_rate": 3.444130853110969e-06, "loss": 0.8711, "step": 28360 }, { "epoch": 0.3456911995905086, "grad_norm": 1.9937993288040161, "learning_rate": 3.443810134701732e-06, "loss": 0.7877, "step": 28365 }, { "epoch": 0.34575213581465636, "grad_norm": 1.8126015663146973, "learning_rate": 3.443489416292495e-06, "loss": 0.8422, "step": 28370 }, { "epoch": 0.3458130720388042, "grad_norm": 2.681220293045044, "learning_rate": 3.443168697883259e-06, "loss": 0.8154, "step": 28375 }, { "epoch": 0.345874008262952, "grad_norm": 1.8468753099441528, "learning_rate": 3.442847979474022e-06, "loss": 0.8835, "step": 28380 }, { "epoch": 0.3459349444870998, "grad_norm": 2.1144609451293945, "learning_rate": 3.4425272610647855e-06, "loss": 0.91, "step": 28385 }, { "epoch": 0.3459958807112476, "grad_norm": 1.9301114082336426, "learning_rate": 3.4422065426555485e-06, "loss": 0.9374, "step": 28390 }, { "epoch": 0.3460568169353954, "grad_norm": 2.0473926067352295, "learning_rate": 3.441885824246312e-06, "loss": 0.8868, "step": 28395 }, { "epoch": 0.34611775315954324, "grad_norm": 2.173210620880127, "learning_rate": 3.4415651058370754e-06, "loss": 0.9207, "step": 28400 }, { "epoch": 0.346178689383691, "grad_norm": 2.0737085342407227, "learning_rate": 3.4412443874278384e-06, "loss": 0.8239, "step": 28405 }, { "epoch": 0.34623962560783883, "grad_norm": 2.130295753479004, "learning_rate": 3.4409236690186023e-06, "loss": 0.8646, "step": 28410 }, { "epoch": 0.34630056183198665, "grad_norm": 2.014815092086792, "learning_rate": 3.4406029506093653e-06, "loss": 0.8042, "step": 28415 }, { "epoch": 0.3463614980561345, "grad_norm": 2.0690102577209473, "learning_rate": 3.4402822322001283e-06, "loss": 0.8927, "step": 28420 }, { "epoch": 0.34642243428028224, "grad_norm": 1.837876558303833, "learning_rate": 3.439961513790892e-06, "loss": 0.8497, "step": 28425 }, { "epoch": 0.34648337050443007, "grad_norm": 2.060124158859253, "learning_rate": 3.439640795381655e-06, "loss": 0.7471, "step": 28430 }, { "epoch": 0.3465443067285779, "grad_norm": 2.09920334815979, "learning_rate": 3.439320076972418e-06, "loss": 0.8526, "step": 28435 }, { "epoch": 0.34660524295272566, "grad_norm": 1.8264498710632324, "learning_rate": 3.438999358563182e-06, "loss": 0.8883, "step": 28440 }, { "epoch": 0.3466661791768735, "grad_norm": 1.725611686706543, "learning_rate": 3.438678640153945e-06, "loss": 0.8913, "step": 28445 }, { "epoch": 0.3467271154010213, "grad_norm": 2.274949550628662, "learning_rate": 3.438357921744708e-06, "loss": 0.8427, "step": 28450 }, { "epoch": 0.3467880516251691, "grad_norm": 1.8378175497055054, "learning_rate": 3.438037203335472e-06, "loss": 0.838, "step": 28455 }, { "epoch": 0.3468489878493169, "grad_norm": 2.163796901702881, "learning_rate": 3.437716484926235e-06, "loss": 0.8972, "step": 28460 }, { "epoch": 0.3469099240734647, "grad_norm": 2.0019891262054443, "learning_rate": 3.4373957665169984e-06, "loss": 0.8212, "step": 28465 }, { "epoch": 0.34697086029761254, "grad_norm": 2.3831889629364014, "learning_rate": 3.437075048107762e-06, "loss": 0.7625, "step": 28470 }, { "epoch": 0.3470317965217603, "grad_norm": 2.1556947231292725, "learning_rate": 3.436754329698525e-06, "loss": 0.8228, "step": 28475 }, { "epoch": 0.34709273274590813, "grad_norm": 1.8610881567001343, "learning_rate": 3.4364336112892883e-06, "loss": 0.7927, "step": 28480 }, { "epoch": 0.34715366897005595, "grad_norm": 1.8140745162963867, "learning_rate": 3.4361128928800513e-06, "loss": 0.7832, "step": 28485 }, { "epoch": 0.3472146051942038, "grad_norm": 2.186542510986328, "learning_rate": 3.435792174470815e-06, "loss": 0.8843, "step": 28490 }, { "epoch": 0.34727554141835154, "grad_norm": 1.7509549856185913, "learning_rate": 3.4354714560615782e-06, "loss": 0.8559, "step": 28495 }, { "epoch": 0.34733647764249936, "grad_norm": 2.1198604106903076, "learning_rate": 3.4351507376523412e-06, "loss": 0.8462, "step": 28500 }, { "epoch": 0.3473974138666472, "grad_norm": 1.8330378532409668, "learning_rate": 3.434830019243105e-06, "loss": 0.8848, "step": 28505 }, { "epoch": 0.34745835009079495, "grad_norm": 1.708199143409729, "learning_rate": 3.434509300833868e-06, "loss": 0.811, "step": 28510 }, { "epoch": 0.3475192863149428, "grad_norm": 2.1435885429382324, "learning_rate": 3.434188582424631e-06, "loss": 0.864, "step": 28515 }, { "epoch": 0.3475802225390906, "grad_norm": 2.1303744316101074, "learning_rate": 3.433867864015395e-06, "loss": 0.8855, "step": 28520 }, { "epoch": 0.34764115876323837, "grad_norm": 1.7352521419525146, "learning_rate": 3.433547145606158e-06, "loss": 0.9905, "step": 28525 }, { "epoch": 0.3477020949873862, "grad_norm": 1.935727834701538, "learning_rate": 3.433226427196921e-06, "loss": 0.789, "step": 28530 }, { "epoch": 0.347763031211534, "grad_norm": 1.9681649208068848, "learning_rate": 3.432905708787685e-06, "loss": 0.95, "step": 28535 }, { "epoch": 0.34782396743568184, "grad_norm": 1.6510908603668213, "learning_rate": 3.432584990378448e-06, "loss": 0.8352, "step": 28540 }, { "epoch": 0.3478849036598296, "grad_norm": 1.8275600671768188, "learning_rate": 3.4322642719692113e-06, "loss": 0.8654, "step": 28545 }, { "epoch": 0.3479458398839774, "grad_norm": 2.407805919647217, "learning_rate": 3.4319435535599748e-06, "loss": 0.8225, "step": 28550 }, { "epoch": 0.34800677610812525, "grad_norm": 2.100527286529541, "learning_rate": 3.4316228351507382e-06, "loss": 0.8274, "step": 28555 }, { "epoch": 0.348067712332273, "grad_norm": 2.2500391006469727, "learning_rate": 3.4313021167415012e-06, "loss": 0.8264, "step": 28560 }, { "epoch": 0.34812864855642084, "grad_norm": 1.8539423942565918, "learning_rate": 3.4309813983322643e-06, "loss": 0.8548, "step": 28565 }, { "epoch": 0.34818958478056866, "grad_norm": 1.7978336811065674, "learning_rate": 3.430660679923028e-06, "loss": 0.8172, "step": 28570 }, { "epoch": 0.3482505210047165, "grad_norm": 1.9764528274536133, "learning_rate": 3.430339961513791e-06, "loss": 0.8019, "step": 28575 }, { "epoch": 0.34831145722886425, "grad_norm": 2.107961416244507, "learning_rate": 3.430019243104554e-06, "loss": 0.8126, "step": 28580 }, { "epoch": 0.3483723934530121, "grad_norm": 2.0152130126953125, "learning_rate": 3.429698524695318e-06, "loss": 0.8558, "step": 28585 }, { "epoch": 0.3484333296771599, "grad_norm": 2.141890287399292, "learning_rate": 3.429377806286081e-06, "loss": 0.9626, "step": 28590 }, { "epoch": 0.34849426590130766, "grad_norm": 2.116732358932495, "learning_rate": 3.429057087876844e-06, "loss": 0.8927, "step": 28595 }, { "epoch": 0.3485552021254555, "grad_norm": 1.7595678567886353, "learning_rate": 3.428736369467608e-06, "loss": 0.861, "step": 28600 }, { "epoch": 0.3486161383496033, "grad_norm": 1.9982917308807373, "learning_rate": 3.428415651058371e-06, "loss": 0.8744, "step": 28605 }, { "epoch": 0.34867707457375113, "grad_norm": 1.7712372541427612, "learning_rate": 3.4280949326491344e-06, "loss": 0.845, "step": 28610 }, { "epoch": 0.3487380107978989, "grad_norm": 2.2727370262145996, "learning_rate": 3.427774214239898e-06, "loss": 0.9238, "step": 28615 }, { "epoch": 0.3487989470220467, "grad_norm": 1.8435674905776978, "learning_rate": 3.427453495830661e-06, "loss": 0.85, "step": 28620 }, { "epoch": 0.34885988324619455, "grad_norm": 2.448951244354248, "learning_rate": 3.4271327774214243e-06, "loss": 0.8314, "step": 28625 }, { "epoch": 0.3489208194703423, "grad_norm": 2.132117748260498, "learning_rate": 3.4268120590121877e-06, "loss": 0.8247, "step": 28630 }, { "epoch": 0.34898175569449014, "grad_norm": 1.7633781433105469, "learning_rate": 3.426491340602951e-06, "loss": 0.8725, "step": 28635 }, { "epoch": 0.34904269191863796, "grad_norm": 2.225382089614868, "learning_rate": 3.426170622193714e-06, "loss": 0.8818, "step": 28640 }, { "epoch": 0.3491036281427858, "grad_norm": 1.9283461570739746, "learning_rate": 3.425849903784477e-06, "loss": 0.8877, "step": 28645 }, { "epoch": 0.34916456436693355, "grad_norm": 1.6591668128967285, "learning_rate": 3.425529185375241e-06, "loss": 0.8929, "step": 28650 }, { "epoch": 0.3492255005910814, "grad_norm": 1.742246150970459, "learning_rate": 3.425208466966004e-06, "loss": 0.851, "step": 28655 }, { "epoch": 0.3492864368152292, "grad_norm": 1.7061647176742554, "learning_rate": 3.424887748556767e-06, "loss": 0.8249, "step": 28660 }, { "epoch": 0.34934737303937696, "grad_norm": 1.8403284549713135, "learning_rate": 3.424567030147531e-06, "loss": 0.791, "step": 28665 }, { "epoch": 0.3494083092635248, "grad_norm": 1.7513619661331177, "learning_rate": 3.424246311738294e-06, "loss": 0.8236, "step": 28670 }, { "epoch": 0.3494692454876726, "grad_norm": 2.2601592540740967, "learning_rate": 3.423925593329057e-06, "loss": 0.8982, "step": 28675 }, { "epoch": 0.34953018171182043, "grad_norm": 1.7088695764541626, "learning_rate": 3.423604874919821e-06, "loss": 0.8803, "step": 28680 }, { "epoch": 0.3495911179359682, "grad_norm": 1.963828682899475, "learning_rate": 3.423284156510584e-06, "loss": 0.8868, "step": 28685 }, { "epoch": 0.349652054160116, "grad_norm": 2.2416114807128906, "learning_rate": 3.4229634381013473e-06, "loss": 0.8311, "step": 28690 }, { "epoch": 0.34971299038426384, "grad_norm": 1.9493821859359741, "learning_rate": 3.4226427196921107e-06, "loss": 0.8507, "step": 28695 }, { "epoch": 0.3497739266084116, "grad_norm": 2.5290329456329346, "learning_rate": 3.4223220012828738e-06, "loss": 0.8297, "step": 28700 }, { "epoch": 0.34983486283255943, "grad_norm": 1.9704930782318115, "learning_rate": 3.422001282873637e-06, "loss": 0.84, "step": 28705 }, { "epoch": 0.34989579905670726, "grad_norm": 2.245854616165161, "learning_rate": 3.4216805644644006e-06, "loss": 0.8756, "step": 28710 }, { "epoch": 0.3499567352808551, "grad_norm": 1.9873991012573242, "learning_rate": 3.421359846055164e-06, "loss": 0.8893, "step": 28715 }, { "epoch": 0.35001767150500285, "grad_norm": 2.128776788711548, "learning_rate": 3.421039127645927e-06, "loss": 0.8486, "step": 28720 }, { "epoch": 0.35007860772915067, "grad_norm": 1.8056570291519165, "learning_rate": 3.4207184092366905e-06, "loss": 0.8933, "step": 28725 }, { "epoch": 0.3501395439532985, "grad_norm": 2.1181252002716064, "learning_rate": 3.420397690827454e-06, "loss": 0.8822, "step": 28730 }, { "epoch": 0.35020048017744626, "grad_norm": 2.2580392360687256, "learning_rate": 3.420076972418217e-06, "loss": 0.9136, "step": 28735 }, { "epoch": 0.3502614164015941, "grad_norm": 2.3396430015563965, "learning_rate": 3.41975625400898e-06, "loss": 0.8778, "step": 28740 }, { "epoch": 0.3503223526257419, "grad_norm": 1.8846461772918701, "learning_rate": 3.419435535599744e-06, "loss": 0.7753, "step": 28745 }, { "epoch": 0.35038328884988973, "grad_norm": 2.315078020095825, "learning_rate": 3.419114817190507e-06, "loss": 0.8773, "step": 28750 }, { "epoch": 0.3504442250740375, "grad_norm": 2.1750426292419434, "learning_rate": 3.41879409878127e-06, "loss": 0.8786, "step": 28755 }, { "epoch": 0.3505051612981853, "grad_norm": 2.0627331733703613, "learning_rate": 3.4184733803720338e-06, "loss": 0.8013, "step": 28760 }, { "epoch": 0.35056609752233314, "grad_norm": 2.3723526000976562, "learning_rate": 3.418152661962797e-06, "loss": 0.9077, "step": 28765 }, { "epoch": 0.3506270337464809, "grad_norm": 2.085905075073242, "learning_rate": 3.4178319435535602e-06, "loss": 0.8885, "step": 28770 }, { "epoch": 0.35068796997062873, "grad_norm": 2.2261219024658203, "learning_rate": 3.4175112251443237e-06, "loss": 0.842, "step": 28775 }, { "epoch": 0.35074890619477656, "grad_norm": 2.1531834602355957, "learning_rate": 3.4171905067350867e-06, "loss": 0.8092, "step": 28780 }, { "epoch": 0.3508098424189244, "grad_norm": 1.7000528573989868, "learning_rate": 3.41686978832585e-06, "loss": 0.7791, "step": 28785 }, { "epoch": 0.35087077864307215, "grad_norm": 2.2241063117980957, "learning_rate": 3.4165490699166136e-06, "loss": 0.8269, "step": 28790 }, { "epoch": 0.35093171486721997, "grad_norm": 1.9240764379501343, "learning_rate": 3.416228351507377e-06, "loss": 0.8548, "step": 28795 }, { "epoch": 0.3509926510913678, "grad_norm": 2.3716111183166504, "learning_rate": 3.41590763309814e-06, "loss": 0.9044, "step": 28800 }, { "epoch": 0.35105358731551556, "grad_norm": 2.047149896621704, "learning_rate": 3.415586914688904e-06, "loss": 0.8085, "step": 28805 }, { "epoch": 0.3511145235396634, "grad_norm": 1.91868257522583, "learning_rate": 3.415266196279667e-06, "loss": 0.8799, "step": 28810 }, { "epoch": 0.3511754597638112, "grad_norm": 1.8993605375289917, "learning_rate": 3.41494547787043e-06, "loss": 0.85, "step": 28815 }, { "epoch": 0.351236395987959, "grad_norm": 2.860365629196167, "learning_rate": 3.414624759461193e-06, "loss": 0.9196, "step": 28820 }, { "epoch": 0.3512973322121068, "grad_norm": 2.3111324310302734, "learning_rate": 3.414304041051957e-06, "loss": 0.8982, "step": 28825 }, { "epoch": 0.3513582684362546, "grad_norm": 1.7926677465438843, "learning_rate": 3.41398332264272e-06, "loss": 0.8352, "step": 28830 }, { "epoch": 0.35141920466040244, "grad_norm": 2.159379243850708, "learning_rate": 3.4136626042334833e-06, "loss": 0.8509, "step": 28835 }, { "epoch": 0.3514801408845502, "grad_norm": 2.1341569423675537, "learning_rate": 3.4133418858242467e-06, "loss": 0.8499, "step": 28840 }, { "epoch": 0.35154107710869803, "grad_norm": 1.5978528261184692, "learning_rate": 3.4130211674150097e-06, "loss": 0.789, "step": 28845 }, { "epoch": 0.35160201333284585, "grad_norm": 2.086156129837036, "learning_rate": 3.412700449005773e-06, "loss": 0.9142, "step": 28850 }, { "epoch": 0.3516629495569937, "grad_norm": 2.113035202026367, "learning_rate": 3.4123797305965366e-06, "loss": 0.8996, "step": 28855 }, { "epoch": 0.35172388578114144, "grad_norm": 1.981428623199463, "learning_rate": 3.4120590121873e-06, "loss": 0.8694, "step": 28860 }, { "epoch": 0.35178482200528927, "grad_norm": 1.9222369194030762, "learning_rate": 3.411738293778063e-06, "loss": 0.8992, "step": 28865 }, { "epoch": 0.3518457582294371, "grad_norm": 1.8343456983566284, "learning_rate": 3.4114175753688265e-06, "loss": 0.8285, "step": 28870 }, { "epoch": 0.35190669445358486, "grad_norm": 1.7230569124221802, "learning_rate": 3.41109685695959e-06, "loss": 0.8589, "step": 28875 }, { "epoch": 0.3519676306777327, "grad_norm": 2.168802499771118, "learning_rate": 3.410776138550353e-06, "loss": 0.8333, "step": 28880 }, { "epoch": 0.3520285669018805, "grad_norm": 1.8864078521728516, "learning_rate": 3.410455420141117e-06, "loss": 0.8459, "step": 28885 }, { "epoch": 0.3520895031260283, "grad_norm": 2.135633707046509, "learning_rate": 3.41013470173188e-06, "loss": 0.8593, "step": 28890 }, { "epoch": 0.3521504393501761, "grad_norm": 2.120579242706299, "learning_rate": 3.409813983322643e-06, "loss": 0.865, "step": 28895 }, { "epoch": 0.3522113755743239, "grad_norm": 2.3453164100646973, "learning_rate": 3.409493264913406e-06, "loss": 0.9446, "step": 28900 }, { "epoch": 0.35227231179847174, "grad_norm": 1.9178998470306396, "learning_rate": 3.4091725465041697e-06, "loss": 0.9472, "step": 28905 }, { "epoch": 0.3523332480226195, "grad_norm": 2.2288687229156494, "learning_rate": 3.4088518280949327e-06, "loss": 0.8835, "step": 28910 }, { "epoch": 0.35239418424676733, "grad_norm": 2.584988832473755, "learning_rate": 3.408531109685696e-06, "loss": 0.8489, "step": 28915 }, { "epoch": 0.35245512047091515, "grad_norm": 1.9536364078521729, "learning_rate": 3.4082103912764596e-06, "loss": 0.8722, "step": 28920 }, { "epoch": 0.352516056695063, "grad_norm": 2.041745901107788, "learning_rate": 3.4078896728672226e-06, "loss": 0.9074, "step": 28925 }, { "epoch": 0.35257699291921074, "grad_norm": 2.0154683589935303, "learning_rate": 3.407568954457986e-06, "loss": 0.8698, "step": 28930 }, { "epoch": 0.35263792914335856, "grad_norm": 1.8374364376068115, "learning_rate": 3.4072482360487495e-06, "loss": 0.8744, "step": 28935 }, { "epoch": 0.3526988653675064, "grad_norm": 1.9764748811721802, "learning_rate": 3.406927517639513e-06, "loss": 0.8641, "step": 28940 }, { "epoch": 0.35275980159165415, "grad_norm": 1.9309781789779663, "learning_rate": 3.406606799230276e-06, "loss": 0.8972, "step": 28945 }, { "epoch": 0.352820737815802, "grad_norm": 1.6929255723953247, "learning_rate": 3.4062860808210394e-06, "loss": 0.8127, "step": 28950 }, { "epoch": 0.3528816740399498, "grad_norm": 1.9602391719818115, "learning_rate": 3.405965362411803e-06, "loss": 0.9061, "step": 28955 }, { "epoch": 0.3529426102640976, "grad_norm": 2.015493869781494, "learning_rate": 3.405644644002566e-06, "loss": 0.8503, "step": 28960 }, { "epoch": 0.3530035464882454, "grad_norm": 1.813391923904419, "learning_rate": 3.4053239255933297e-06, "loss": 0.8661, "step": 28965 }, { "epoch": 0.3530644827123932, "grad_norm": 1.9882092475891113, "learning_rate": 3.4050032071840928e-06, "loss": 0.8473, "step": 28970 }, { "epoch": 0.35312541893654104, "grad_norm": 2.1069562435150146, "learning_rate": 3.4046824887748558e-06, "loss": 0.8941, "step": 28975 }, { "epoch": 0.3531863551606888, "grad_norm": 2.125638246536255, "learning_rate": 3.404361770365619e-06, "loss": 0.8205, "step": 28980 }, { "epoch": 0.3532472913848366, "grad_norm": 2.1097395420074463, "learning_rate": 3.4040410519563827e-06, "loss": 0.9028, "step": 28985 }, { "epoch": 0.35330822760898445, "grad_norm": 1.7871016263961792, "learning_rate": 3.4037203335471457e-06, "loss": 0.8499, "step": 28990 }, { "epoch": 0.35336916383313227, "grad_norm": 2.066030502319336, "learning_rate": 3.403399615137909e-06, "loss": 0.844, "step": 28995 }, { "epoch": 0.35343010005728004, "grad_norm": 2.2293171882629395, "learning_rate": 3.4030788967286726e-06, "loss": 0.9061, "step": 29000 }, { "epoch": 0.35349103628142786, "grad_norm": 2.271761178970337, "learning_rate": 3.4027581783194356e-06, "loss": 0.8925, "step": 29005 }, { "epoch": 0.3535519725055757, "grad_norm": 1.8537431955337524, "learning_rate": 3.402437459910199e-06, "loss": 0.9447, "step": 29010 }, { "epoch": 0.35361290872972345, "grad_norm": 1.6632859706878662, "learning_rate": 3.4021167415009625e-06, "loss": 0.867, "step": 29015 }, { "epoch": 0.3536738449538713, "grad_norm": 2.385242223739624, "learning_rate": 3.401796023091726e-06, "loss": 0.8227, "step": 29020 }, { "epoch": 0.3537347811780191, "grad_norm": 2.087445020675659, "learning_rate": 3.401475304682489e-06, "loss": 0.8711, "step": 29025 }, { "epoch": 0.35379571740216686, "grad_norm": 2.184262990951538, "learning_rate": 3.4011545862732528e-06, "loss": 0.8992, "step": 29030 }, { "epoch": 0.3538566536263147, "grad_norm": 1.9040238857269287, "learning_rate": 3.400833867864016e-06, "loss": 0.8096, "step": 29035 }, { "epoch": 0.3539175898504625, "grad_norm": 2.388982057571411, "learning_rate": 3.400513149454779e-06, "loss": 0.8628, "step": 29040 }, { "epoch": 0.35397852607461033, "grad_norm": 2.16342830657959, "learning_rate": 3.4001924310455427e-06, "loss": 0.835, "step": 29045 }, { "epoch": 0.3540394622987581, "grad_norm": 2.174025774002075, "learning_rate": 3.3998717126363057e-06, "loss": 0.8864, "step": 29050 }, { "epoch": 0.3541003985229059, "grad_norm": 1.8683127164840698, "learning_rate": 3.3995509942270687e-06, "loss": 0.7907, "step": 29055 }, { "epoch": 0.35416133474705375, "grad_norm": 1.8913092613220215, "learning_rate": 3.3992302758178326e-06, "loss": 0.8025, "step": 29060 }, { "epoch": 0.3542222709712015, "grad_norm": 1.782033920288086, "learning_rate": 3.3989095574085956e-06, "loss": 0.8434, "step": 29065 }, { "epoch": 0.35428320719534934, "grad_norm": 1.9508377313613892, "learning_rate": 3.3985888389993586e-06, "loss": 0.865, "step": 29070 }, { "epoch": 0.35434414341949716, "grad_norm": 1.9974164962768555, "learning_rate": 3.398268120590122e-06, "loss": 0.8541, "step": 29075 }, { "epoch": 0.354405079643645, "grad_norm": 2.455235481262207, "learning_rate": 3.3979474021808855e-06, "loss": 0.8815, "step": 29080 }, { "epoch": 0.35446601586779275, "grad_norm": 1.752707839012146, "learning_rate": 3.397626683771649e-06, "loss": 0.8443, "step": 29085 }, { "epoch": 0.3545269520919406, "grad_norm": 1.7929589748382568, "learning_rate": 3.397305965362412e-06, "loss": 0.8662, "step": 29090 }, { "epoch": 0.3545878883160884, "grad_norm": 1.947135090827942, "learning_rate": 3.3969852469531754e-06, "loss": 0.8877, "step": 29095 }, { "epoch": 0.35464882454023616, "grad_norm": 1.8409969806671143, "learning_rate": 3.396664528543939e-06, "loss": 0.856, "step": 29100 }, { "epoch": 0.354709760764384, "grad_norm": 1.4469467401504517, "learning_rate": 3.396343810134702e-06, "loss": 0.8644, "step": 29105 }, { "epoch": 0.3547706969885318, "grad_norm": 1.939296841621399, "learning_rate": 3.3960230917254657e-06, "loss": 0.8531, "step": 29110 }, { "epoch": 0.35483163321267963, "grad_norm": 1.9102673530578613, "learning_rate": 3.3957023733162287e-06, "loss": 0.8699, "step": 29115 }, { "epoch": 0.3548925694368274, "grad_norm": 1.724233627319336, "learning_rate": 3.3953816549069917e-06, "loss": 0.8741, "step": 29120 }, { "epoch": 0.3549535056609752, "grad_norm": 2.0867488384246826, "learning_rate": 3.3950609364977556e-06, "loss": 0.8548, "step": 29125 }, { "epoch": 0.35501444188512304, "grad_norm": 2.0690839290618896, "learning_rate": 3.3947402180885186e-06, "loss": 0.7908, "step": 29130 }, { "epoch": 0.3550753781092708, "grad_norm": 1.7974642515182495, "learning_rate": 3.3944194996792816e-06, "loss": 0.8176, "step": 29135 }, { "epoch": 0.35513631433341863, "grad_norm": 1.6963645219802856, "learning_rate": 3.3940987812700455e-06, "loss": 0.7635, "step": 29140 }, { "epoch": 0.35519725055756646, "grad_norm": 1.5178639888763428, "learning_rate": 3.3937780628608085e-06, "loss": 0.8096, "step": 29145 }, { "epoch": 0.3552581867817143, "grad_norm": 1.95492422580719, "learning_rate": 3.3934573444515715e-06, "loss": 0.8769, "step": 29150 }, { "epoch": 0.35531912300586205, "grad_norm": 2.9603803157806396, "learning_rate": 3.393136626042335e-06, "loss": 0.8699, "step": 29155 }, { "epoch": 0.35538005923000987, "grad_norm": 2.0312671661376953, "learning_rate": 3.3928159076330984e-06, "loss": 0.8561, "step": 29160 }, { "epoch": 0.3554409954541577, "grad_norm": 2.108198642730713, "learning_rate": 3.392495189223862e-06, "loss": 0.81, "step": 29165 }, { "epoch": 0.35550193167830546, "grad_norm": 1.706794023513794, "learning_rate": 3.392174470814625e-06, "loss": 0.8482, "step": 29170 }, { "epoch": 0.3555628679024533, "grad_norm": 2.0701043605804443, "learning_rate": 3.3918537524053883e-06, "loss": 0.8321, "step": 29175 }, { "epoch": 0.3556238041266011, "grad_norm": 1.8476333618164062, "learning_rate": 3.3915330339961517e-06, "loss": 0.7971, "step": 29180 }, { "epoch": 0.35568474035074893, "grad_norm": 1.9913212060928345, "learning_rate": 3.3912123155869148e-06, "loss": 0.7737, "step": 29185 }, { "epoch": 0.3557456765748967, "grad_norm": 1.9832710027694702, "learning_rate": 3.3908915971776786e-06, "loss": 0.7541, "step": 29190 }, { "epoch": 0.3558066127990445, "grad_norm": 2.104641914367676, "learning_rate": 3.3905708787684416e-06, "loss": 0.8804, "step": 29195 }, { "epoch": 0.35586754902319234, "grad_norm": 2.2081449031829834, "learning_rate": 3.3902501603592047e-06, "loss": 0.855, "step": 29200 }, { "epoch": 0.3559284852473401, "grad_norm": 1.888540267944336, "learning_rate": 3.3899294419499685e-06, "loss": 0.8799, "step": 29205 }, { "epoch": 0.35598942147148793, "grad_norm": 2.2853524684906006, "learning_rate": 3.3896087235407315e-06, "loss": 0.827, "step": 29210 }, { "epoch": 0.35605035769563576, "grad_norm": 1.7662910223007202, "learning_rate": 3.3892880051314946e-06, "loss": 0.8733, "step": 29215 }, { "epoch": 0.3561112939197836, "grad_norm": 2.229604482650757, "learning_rate": 3.3889672867222584e-06, "loss": 0.8439, "step": 29220 }, { "epoch": 0.35617223014393135, "grad_norm": 3.4025704860687256, "learning_rate": 3.3886465683130214e-06, "loss": 0.9466, "step": 29225 }, { "epoch": 0.35623316636807917, "grad_norm": 1.966488242149353, "learning_rate": 3.3883258499037845e-06, "loss": 0.9022, "step": 29230 }, { "epoch": 0.356294102592227, "grad_norm": 2.2505908012390137, "learning_rate": 3.388005131494548e-06, "loss": 0.8849, "step": 29235 }, { "epoch": 0.35635503881637476, "grad_norm": 1.842039704322815, "learning_rate": 3.3876844130853113e-06, "loss": 0.8891, "step": 29240 }, { "epoch": 0.3564159750405226, "grad_norm": 1.7517039775848389, "learning_rate": 3.3873636946760748e-06, "loss": 0.7987, "step": 29245 }, { "epoch": 0.3564769112646704, "grad_norm": 2.3049707412719727, "learning_rate": 3.387042976266838e-06, "loss": 0.7723, "step": 29250 }, { "epoch": 0.3565378474888182, "grad_norm": 1.9065892696380615, "learning_rate": 3.3867222578576017e-06, "loss": 0.9031, "step": 29255 }, { "epoch": 0.356598783712966, "grad_norm": 1.7859867811203003, "learning_rate": 3.3864015394483647e-06, "loss": 0.8391, "step": 29260 }, { "epoch": 0.3566597199371138, "grad_norm": 1.6330357789993286, "learning_rate": 3.3860808210391277e-06, "loss": 0.8058, "step": 29265 }, { "epoch": 0.35672065616126164, "grad_norm": 2.3765385150909424, "learning_rate": 3.3857601026298916e-06, "loss": 0.8809, "step": 29270 }, { "epoch": 0.3567815923854094, "grad_norm": 2.0523018836975098, "learning_rate": 3.3854393842206546e-06, "loss": 0.8553, "step": 29275 }, { "epoch": 0.35684252860955723, "grad_norm": 2.075516700744629, "learning_rate": 3.3851186658114176e-06, "loss": 0.8098, "step": 29280 }, { "epoch": 0.35690346483370505, "grad_norm": 2.2230169773101807, "learning_rate": 3.3847979474021815e-06, "loss": 0.8655, "step": 29285 }, { "epoch": 0.3569644010578529, "grad_norm": 1.9814289808273315, "learning_rate": 3.3844772289929445e-06, "loss": 0.8381, "step": 29290 }, { "epoch": 0.35702533728200064, "grad_norm": 2.495772123336792, "learning_rate": 3.3841565105837075e-06, "loss": 0.9209, "step": 29295 }, { "epoch": 0.35708627350614847, "grad_norm": 1.8942426443099976, "learning_rate": 3.3838357921744714e-06, "loss": 0.8673, "step": 29300 }, { "epoch": 0.3571472097302963, "grad_norm": 2.013594627380371, "learning_rate": 3.3835150737652344e-06, "loss": 0.8312, "step": 29305 }, { "epoch": 0.35720814595444406, "grad_norm": 2.150282859802246, "learning_rate": 3.383194355355998e-06, "loss": 0.8756, "step": 29310 }, { "epoch": 0.3572690821785919, "grad_norm": 1.8257429599761963, "learning_rate": 3.3828736369467612e-06, "loss": 0.8548, "step": 29315 }, { "epoch": 0.3573300184027397, "grad_norm": 1.8724061250686646, "learning_rate": 3.3825529185375243e-06, "loss": 0.9058, "step": 29320 }, { "epoch": 0.3573909546268875, "grad_norm": 1.8160922527313232, "learning_rate": 3.3822322001282877e-06, "loss": 0.7558, "step": 29325 }, { "epoch": 0.3574518908510353, "grad_norm": 1.9277231693267822, "learning_rate": 3.3819114817190507e-06, "loss": 0.8591, "step": 29330 }, { "epoch": 0.3575128270751831, "grad_norm": 1.6281112432479858, "learning_rate": 3.3815907633098146e-06, "loss": 0.834, "step": 29335 }, { "epoch": 0.35757376329933094, "grad_norm": 2.3870432376861572, "learning_rate": 3.3812700449005776e-06, "loss": 0.8417, "step": 29340 }, { "epoch": 0.3576346995234787, "grad_norm": 1.9699478149414062, "learning_rate": 3.3809493264913406e-06, "loss": 0.8994, "step": 29345 }, { "epoch": 0.35769563574762653, "grad_norm": 1.9700216054916382, "learning_rate": 3.3806286080821045e-06, "loss": 0.8133, "step": 29350 }, { "epoch": 0.35775657197177435, "grad_norm": 2.046578884124756, "learning_rate": 3.3803078896728675e-06, "loss": 0.8228, "step": 29355 }, { "epoch": 0.3578175081959222, "grad_norm": 2.0036752223968506, "learning_rate": 3.3799871712636305e-06, "loss": 0.7804, "step": 29360 }, { "epoch": 0.35787844442006994, "grad_norm": 1.9547641277313232, "learning_rate": 3.3796664528543944e-06, "loss": 0.8563, "step": 29365 }, { "epoch": 0.35793938064421776, "grad_norm": 1.6365870237350464, "learning_rate": 3.3793457344451574e-06, "loss": 0.8001, "step": 29370 }, { "epoch": 0.3580003168683656, "grad_norm": 2.1071791648864746, "learning_rate": 3.3790250160359204e-06, "loss": 0.7981, "step": 29375 }, { "epoch": 0.35806125309251335, "grad_norm": 1.9611042737960815, "learning_rate": 3.3787042976266843e-06, "loss": 0.8301, "step": 29380 }, { "epoch": 0.3581221893166612, "grad_norm": 2.5375614166259766, "learning_rate": 3.3783835792174473e-06, "loss": 0.9246, "step": 29385 }, { "epoch": 0.358183125540809, "grad_norm": 2.1291558742523193, "learning_rate": 3.3780628608082107e-06, "loss": 0.8248, "step": 29390 }, { "epoch": 0.3582440617649568, "grad_norm": 2.0669429302215576, "learning_rate": 3.377742142398974e-06, "loss": 0.8046, "step": 29395 }, { "epoch": 0.3583049979891046, "grad_norm": 2.0290448665618896, "learning_rate": 3.377421423989737e-06, "loss": 0.8172, "step": 29400 }, { "epoch": 0.3583659342132524, "grad_norm": 1.9085533618927002, "learning_rate": 3.3771007055805006e-06, "loss": 0.8931, "step": 29405 }, { "epoch": 0.35842687043740024, "grad_norm": 1.885811686515808, "learning_rate": 3.3767799871712636e-06, "loss": 0.942, "step": 29410 }, { "epoch": 0.358487806661548, "grad_norm": 2.1572577953338623, "learning_rate": 3.3764592687620275e-06, "loss": 0.8852, "step": 29415 }, { "epoch": 0.3585487428856958, "grad_norm": 1.6653673648834229, "learning_rate": 3.3761385503527905e-06, "loss": 0.8664, "step": 29420 }, { "epoch": 0.35860967910984365, "grad_norm": 1.7911643981933594, "learning_rate": 3.3758178319435535e-06, "loss": 0.8468, "step": 29425 }, { "epoch": 0.35867061533399147, "grad_norm": 2.203073263168335, "learning_rate": 3.3754971135343174e-06, "loss": 0.8315, "step": 29430 }, { "epoch": 0.35873155155813924, "grad_norm": 1.7502213716506958, "learning_rate": 3.3751763951250804e-06, "loss": 0.788, "step": 29435 }, { "epoch": 0.35879248778228706, "grad_norm": 1.839088797569275, "learning_rate": 3.3748556767158434e-06, "loss": 0.8982, "step": 29440 }, { "epoch": 0.3588534240064349, "grad_norm": 2.162726879119873, "learning_rate": 3.3745349583066073e-06, "loss": 0.8756, "step": 29445 }, { "epoch": 0.35891436023058265, "grad_norm": 1.915940523147583, "learning_rate": 3.3742142398973703e-06, "loss": 0.8545, "step": 29450 }, { "epoch": 0.3589752964547305, "grad_norm": 2.1069180965423584, "learning_rate": 3.3738935214881333e-06, "loss": 0.7948, "step": 29455 }, { "epoch": 0.3590362326788783, "grad_norm": 2.0286660194396973, "learning_rate": 3.373572803078897e-06, "loss": 0.8197, "step": 29460 }, { "epoch": 0.3590971689030261, "grad_norm": 1.9813125133514404, "learning_rate": 3.3732520846696602e-06, "loss": 0.8495, "step": 29465 }, { "epoch": 0.3591581051271739, "grad_norm": 2.1412410736083984, "learning_rate": 3.3729313662604237e-06, "loss": 0.8661, "step": 29470 }, { "epoch": 0.3592190413513217, "grad_norm": 1.558400273323059, "learning_rate": 3.372610647851187e-06, "loss": 0.8016, "step": 29475 }, { "epoch": 0.35927997757546953, "grad_norm": 2.097731590270996, "learning_rate": 3.37228992944195e-06, "loss": 0.9255, "step": 29480 }, { "epoch": 0.3593409137996173, "grad_norm": 1.8950049877166748, "learning_rate": 3.3719692110327136e-06, "loss": 0.8246, "step": 29485 }, { "epoch": 0.3594018500237651, "grad_norm": 1.7755614519119263, "learning_rate": 3.3716484926234766e-06, "loss": 0.854, "step": 29490 }, { "epoch": 0.35946278624791295, "grad_norm": 1.747850775718689, "learning_rate": 3.3713277742142404e-06, "loss": 0.837, "step": 29495 }, { "epoch": 0.3595237224720607, "grad_norm": 1.990951657295227, "learning_rate": 3.3710070558050035e-06, "loss": 0.8341, "step": 29500 }, { "epoch": 0.35958465869620854, "grad_norm": 2.035407066345215, "learning_rate": 3.3706863373957665e-06, "loss": 0.8453, "step": 29505 }, { "epoch": 0.35964559492035636, "grad_norm": 2.4442665576934814, "learning_rate": 3.3703656189865303e-06, "loss": 0.9403, "step": 29510 }, { "epoch": 0.3597065311445042, "grad_norm": 2.110887289047241, "learning_rate": 3.3700449005772934e-06, "loss": 0.895, "step": 29515 }, { "epoch": 0.35976746736865195, "grad_norm": 1.8725204467773438, "learning_rate": 3.3697241821680564e-06, "loss": 0.8771, "step": 29520 }, { "epoch": 0.3598284035927998, "grad_norm": 1.8254982233047485, "learning_rate": 3.3694034637588202e-06, "loss": 0.8311, "step": 29525 }, { "epoch": 0.3598893398169476, "grad_norm": 1.7749019861221313, "learning_rate": 3.3690827453495833e-06, "loss": 0.81, "step": 29530 }, { "epoch": 0.35995027604109536, "grad_norm": 1.8654263019561768, "learning_rate": 3.3687620269403467e-06, "loss": 0.8321, "step": 29535 }, { "epoch": 0.3600112122652432, "grad_norm": 1.9626797437667847, "learning_rate": 3.36844130853111e-06, "loss": 0.8046, "step": 29540 }, { "epoch": 0.360072148489391, "grad_norm": 2.020573854446411, "learning_rate": 3.368120590121873e-06, "loss": 0.8384, "step": 29545 }, { "epoch": 0.36013308471353883, "grad_norm": 1.8777813911437988, "learning_rate": 3.3677998717126366e-06, "loss": 0.8387, "step": 29550 }, { "epoch": 0.3601940209376866, "grad_norm": 1.7198405265808105, "learning_rate": 3.3674791533034e-06, "loss": 0.782, "step": 29555 }, { "epoch": 0.3602549571618344, "grad_norm": 1.926681637763977, "learning_rate": 3.3671584348941635e-06, "loss": 0.8838, "step": 29560 }, { "epoch": 0.36031589338598224, "grad_norm": 2.2068517208099365, "learning_rate": 3.3668377164849265e-06, "loss": 0.8868, "step": 29565 }, { "epoch": 0.36037682961013, "grad_norm": 1.804715633392334, "learning_rate": 3.3665169980756895e-06, "loss": 0.8662, "step": 29570 }, { "epoch": 0.36043776583427783, "grad_norm": 2.0089402198791504, "learning_rate": 3.3661962796664534e-06, "loss": 0.8132, "step": 29575 }, { "epoch": 0.36049870205842566, "grad_norm": 2.3994460105895996, "learning_rate": 3.3658755612572164e-06, "loss": 0.8854, "step": 29580 }, { "epoch": 0.3605596382825735, "grad_norm": 2.1538991928100586, "learning_rate": 3.3655548428479794e-06, "loss": 0.7878, "step": 29585 }, { "epoch": 0.36062057450672125, "grad_norm": 2.049114942550659, "learning_rate": 3.3652341244387433e-06, "loss": 0.8932, "step": 29590 }, { "epoch": 0.36068151073086907, "grad_norm": 1.9584163427352905, "learning_rate": 3.3649134060295063e-06, "loss": 0.8512, "step": 29595 }, { "epoch": 0.3607424469550169, "grad_norm": 2.0663068294525146, "learning_rate": 3.3645926876202693e-06, "loss": 0.8482, "step": 29600 }, { "epoch": 0.36080338317916466, "grad_norm": 1.687536597251892, "learning_rate": 3.364271969211033e-06, "loss": 0.8236, "step": 29605 }, { "epoch": 0.3608643194033125, "grad_norm": 2.182408094406128, "learning_rate": 3.363951250801796e-06, "loss": 0.8359, "step": 29610 }, { "epoch": 0.3609252556274603, "grad_norm": 2.1748242378234863, "learning_rate": 3.3636305323925596e-06, "loss": 0.8069, "step": 29615 }, { "epoch": 0.36098619185160813, "grad_norm": 2.1341652870178223, "learning_rate": 3.363309813983323e-06, "loss": 0.8329, "step": 29620 }, { "epoch": 0.3610471280757559, "grad_norm": 1.961665153503418, "learning_rate": 3.362989095574086e-06, "loss": 0.7611, "step": 29625 }, { "epoch": 0.3611080642999037, "grad_norm": 2.141092538833618, "learning_rate": 3.3626683771648495e-06, "loss": 0.85, "step": 29630 }, { "epoch": 0.36116900052405154, "grad_norm": 2.0671226978302, "learning_rate": 3.362347658755613e-06, "loss": 0.8399, "step": 29635 }, { "epoch": 0.3612299367481993, "grad_norm": 1.8451251983642578, "learning_rate": 3.3620269403463764e-06, "loss": 0.9095, "step": 29640 }, { "epoch": 0.36129087297234713, "grad_norm": 2.0893659591674805, "learning_rate": 3.3617062219371394e-06, "loss": 0.8482, "step": 29645 }, { "epoch": 0.36135180919649496, "grad_norm": 1.7627445459365845, "learning_rate": 3.361385503527903e-06, "loss": 0.916, "step": 29650 }, { "epoch": 0.3614127454206428, "grad_norm": 1.8684238195419312, "learning_rate": 3.3610647851186663e-06, "loss": 0.9095, "step": 29655 }, { "epoch": 0.36147368164479055, "grad_norm": 2.084834575653076, "learning_rate": 3.3607440667094293e-06, "loss": 0.8465, "step": 29660 }, { "epoch": 0.36153461786893837, "grad_norm": 1.855825424194336, "learning_rate": 3.3604233483001923e-06, "loss": 0.8295, "step": 29665 }, { "epoch": 0.3615955540930862, "grad_norm": 2.0154261589050293, "learning_rate": 3.360102629890956e-06, "loss": 0.925, "step": 29670 }, { "epoch": 0.36165649031723396, "grad_norm": 1.6958420276641846, "learning_rate": 3.359781911481719e-06, "loss": 0.8077, "step": 29675 }, { "epoch": 0.3617174265413818, "grad_norm": 1.8879170417785645, "learning_rate": 3.3594611930724822e-06, "loss": 0.8884, "step": 29680 }, { "epoch": 0.3617783627655296, "grad_norm": 2.766242265701294, "learning_rate": 3.359140474663246e-06, "loss": 0.8911, "step": 29685 }, { "epoch": 0.3618392989896774, "grad_norm": 1.8593213558197021, "learning_rate": 3.358819756254009e-06, "loss": 0.8512, "step": 29690 }, { "epoch": 0.3619002352138252, "grad_norm": 2.491223096847534, "learning_rate": 3.3584990378447725e-06, "loss": 0.9304, "step": 29695 }, { "epoch": 0.361961171437973, "grad_norm": 2.0102226734161377, "learning_rate": 3.358178319435536e-06, "loss": 0.9537, "step": 29700 }, { "epoch": 0.36202210766212084, "grad_norm": 1.7456971406936646, "learning_rate": 3.357857601026299e-06, "loss": 0.8602, "step": 29705 }, { "epoch": 0.3620830438862686, "grad_norm": 2.0470194816589355, "learning_rate": 3.3575368826170624e-06, "loss": 0.985, "step": 29710 }, { "epoch": 0.36214398011041643, "grad_norm": 2.043797016143799, "learning_rate": 3.357216164207826e-06, "loss": 0.819, "step": 29715 }, { "epoch": 0.36220491633456425, "grad_norm": 1.9374665021896362, "learning_rate": 3.3568954457985893e-06, "loss": 0.8584, "step": 29720 }, { "epoch": 0.3622658525587121, "grad_norm": 2.0716304779052734, "learning_rate": 3.3565747273893523e-06, "loss": 0.8954, "step": 29725 }, { "epoch": 0.36232678878285984, "grad_norm": 1.8258239030838013, "learning_rate": 3.356254008980116e-06, "loss": 0.8377, "step": 29730 }, { "epoch": 0.36238772500700767, "grad_norm": 2.1966309547424316, "learning_rate": 3.3559332905708792e-06, "loss": 0.7679, "step": 29735 }, { "epoch": 0.3624486612311555, "grad_norm": 1.869210124015808, "learning_rate": 3.3556125721616422e-06, "loss": 0.8277, "step": 29740 }, { "epoch": 0.36250959745530326, "grad_norm": 2.4611339569091797, "learning_rate": 3.3552918537524053e-06, "loss": 0.7479, "step": 29745 }, { "epoch": 0.3625705336794511, "grad_norm": 1.9487501382827759, "learning_rate": 3.354971135343169e-06, "loss": 0.8056, "step": 29750 }, { "epoch": 0.3626314699035989, "grad_norm": 1.8764408826828003, "learning_rate": 3.354650416933932e-06, "loss": 0.8553, "step": 29755 }, { "epoch": 0.3626924061277467, "grad_norm": 2.1192870140075684, "learning_rate": 3.3543296985246956e-06, "loss": 0.8392, "step": 29760 }, { "epoch": 0.3627533423518945, "grad_norm": 1.7546632289886475, "learning_rate": 3.354008980115459e-06, "loss": 0.9419, "step": 29765 }, { "epoch": 0.3628142785760423, "grad_norm": 1.8728560209274292, "learning_rate": 3.353688261706222e-06, "loss": 0.8479, "step": 29770 }, { "epoch": 0.36287521480019014, "grad_norm": 1.7407896518707275, "learning_rate": 3.3533675432969855e-06, "loss": 0.7517, "step": 29775 }, { "epoch": 0.3629361510243379, "grad_norm": 2.2069077491760254, "learning_rate": 3.353046824887749e-06, "loss": 0.9178, "step": 29780 }, { "epoch": 0.36299708724848573, "grad_norm": 2.0455803871154785, "learning_rate": 3.3527261064785124e-06, "loss": 0.8842, "step": 29785 }, { "epoch": 0.36305802347263355, "grad_norm": 2.1308047771453857, "learning_rate": 3.3524053880692754e-06, "loss": 0.7985, "step": 29790 }, { "epoch": 0.3631189596967814, "grad_norm": 1.8028483390808105, "learning_rate": 3.352084669660039e-06, "loss": 0.8505, "step": 29795 }, { "epoch": 0.36317989592092914, "grad_norm": 1.810732126235962, "learning_rate": 3.3517639512508023e-06, "loss": 0.8354, "step": 29800 }, { "epoch": 0.36324083214507696, "grad_norm": 1.9564460515975952, "learning_rate": 3.3514432328415653e-06, "loss": 0.7709, "step": 29805 }, { "epoch": 0.3633017683692248, "grad_norm": 1.9290069341659546, "learning_rate": 3.351122514432329e-06, "loss": 0.7705, "step": 29810 }, { "epoch": 0.36336270459337255, "grad_norm": 2.798830270767212, "learning_rate": 3.350801796023092e-06, "loss": 0.9175, "step": 29815 }, { "epoch": 0.3634236408175204, "grad_norm": 1.9295129776000977, "learning_rate": 3.350481077613855e-06, "loss": 0.8872, "step": 29820 }, { "epoch": 0.3634845770416682, "grad_norm": 2.0700764656066895, "learning_rate": 3.350160359204618e-06, "loss": 0.9265, "step": 29825 }, { "epoch": 0.363545513265816, "grad_norm": 1.7408579587936401, "learning_rate": 3.349839640795382e-06, "loss": 0.8615, "step": 29830 }, { "epoch": 0.3636064494899638, "grad_norm": 2.1870803833007812, "learning_rate": 3.349518922386145e-06, "loss": 0.873, "step": 29835 }, { "epoch": 0.3636673857141116, "grad_norm": 2.0695228576660156, "learning_rate": 3.3491982039769085e-06, "loss": 0.9168, "step": 29840 }, { "epoch": 0.36372832193825944, "grad_norm": 1.9284926652908325, "learning_rate": 3.348877485567672e-06, "loss": 0.9177, "step": 29845 }, { "epoch": 0.3637892581624072, "grad_norm": 2.3547475337982178, "learning_rate": 3.348556767158435e-06, "loss": 0.8554, "step": 29850 }, { "epoch": 0.363850194386555, "grad_norm": 1.9900000095367432, "learning_rate": 3.3482360487491984e-06, "loss": 0.8882, "step": 29855 }, { "epoch": 0.36391113061070285, "grad_norm": 1.8440728187561035, "learning_rate": 3.347915330339962e-06, "loss": 0.8126, "step": 29860 }, { "epoch": 0.36397206683485067, "grad_norm": 2.006016969680786, "learning_rate": 3.3475946119307253e-06, "loss": 0.8514, "step": 29865 }, { "epoch": 0.36403300305899844, "grad_norm": 2.086890935897827, "learning_rate": 3.3472738935214883e-06, "loss": 0.7985, "step": 29870 }, { "epoch": 0.36409393928314626, "grad_norm": 2.456449031829834, "learning_rate": 3.3469531751122517e-06, "loss": 0.8955, "step": 29875 }, { "epoch": 0.3641548755072941, "grad_norm": 1.8762493133544922, "learning_rate": 3.346632456703015e-06, "loss": 0.7553, "step": 29880 }, { "epoch": 0.36421581173144185, "grad_norm": 2.1820695400238037, "learning_rate": 3.346311738293778e-06, "loss": 0.8887, "step": 29885 }, { "epoch": 0.3642767479555897, "grad_norm": 2.125298261642456, "learning_rate": 3.345991019884542e-06, "loss": 0.7892, "step": 29890 }, { "epoch": 0.3643376841797375, "grad_norm": 2.2415926456451416, "learning_rate": 3.345670301475305e-06, "loss": 0.8639, "step": 29895 }, { "epoch": 0.3643986204038853, "grad_norm": 2.9980437755584717, "learning_rate": 3.345349583066068e-06, "loss": 0.8042, "step": 29900 }, { "epoch": 0.3644595566280331, "grad_norm": 1.902482032775879, "learning_rate": 3.345028864656831e-06, "loss": 0.8692, "step": 29905 }, { "epoch": 0.3645204928521809, "grad_norm": 1.9217649698257446, "learning_rate": 3.344708146247595e-06, "loss": 0.8471, "step": 29910 }, { "epoch": 0.36458142907632873, "grad_norm": 1.8156437873840332, "learning_rate": 3.344387427838358e-06, "loss": 0.893, "step": 29915 }, { "epoch": 0.3646423653004765, "grad_norm": 1.786812424659729, "learning_rate": 3.3440667094291214e-06, "loss": 0.8483, "step": 29920 }, { "epoch": 0.3647033015246243, "grad_norm": 2.127037286758423, "learning_rate": 3.343745991019885e-06, "loss": 0.8647, "step": 29925 }, { "epoch": 0.36476423774877215, "grad_norm": 1.8901538848876953, "learning_rate": 3.343425272610648e-06, "loss": 0.8695, "step": 29930 }, { "epoch": 0.36482517397291997, "grad_norm": 2.112124443054199, "learning_rate": 3.3431045542014113e-06, "loss": 0.8855, "step": 29935 }, { "epoch": 0.36488611019706774, "grad_norm": 2.15071702003479, "learning_rate": 3.3427838357921748e-06, "loss": 0.8804, "step": 29940 }, { "epoch": 0.36494704642121556, "grad_norm": 2.187208890914917, "learning_rate": 3.342463117382938e-06, "loss": 0.7867, "step": 29945 }, { "epoch": 0.3650079826453634, "grad_norm": 2.239034414291382, "learning_rate": 3.3421423989737012e-06, "loss": 0.9492, "step": 29950 }, { "epoch": 0.36506891886951115, "grad_norm": 2.005441665649414, "learning_rate": 3.341821680564465e-06, "loss": 0.8125, "step": 29955 }, { "epoch": 0.365129855093659, "grad_norm": 1.9701805114746094, "learning_rate": 3.341500962155228e-06, "loss": 0.8692, "step": 29960 }, { "epoch": 0.3651907913178068, "grad_norm": 2.6601781845092773, "learning_rate": 3.341180243745991e-06, "loss": 0.8473, "step": 29965 }, { "epoch": 0.36525172754195456, "grad_norm": 2.0556013584136963, "learning_rate": 3.340859525336755e-06, "loss": 0.9052, "step": 29970 }, { "epoch": 0.3653126637661024, "grad_norm": 1.9007221460342407, "learning_rate": 3.340538806927518e-06, "loss": 0.8612, "step": 29975 }, { "epoch": 0.3653735999902502, "grad_norm": 1.6496492624282837, "learning_rate": 3.340218088518281e-06, "loss": 0.8286, "step": 29980 }, { "epoch": 0.36543453621439803, "grad_norm": 1.9385651350021362, "learning_rate": 3.339897370109045e-06, "loss": 0.8788, "step": 29985 }, { "epoch": 0.3654954724385458, "grad_norm": 2.137061595916748, "learning_rate": 3.339576651699808e-06, "loss": 0.8439, "step": 29990 }, { "epoch": 0.3655564086626936, "grad_norm": 1.9288139343261719, "learning_rate": 3.339255933290571e-06, "loss": 0.8684, "step": 29995 }, { "epoch": 0.36561734488684144, "grad_norm": 2.1851515769958496, "learning_rate": 3.3389352148813344e-06, "loss": 0.8562, "step": 30000 }, { "epoch": 0.3656782811109892, "grad_norm": 1.8379541635513306, "learning_rate": 3.338614496472098e-06, "loss": 0.8781, "step": 30005 }, { "epoch": 0.36573921733513703, "grad_norm": 1.802018642425537, "learning_rate": 3.3382937780628612e-06, "loss": 0.8647, "step": 30010 }, { "epoch": 0.36580015355928486, "grad_norm": 1.8566175699234009, "learning_rate": 3.3379730596536243e-06, "loss": 0.848, "step": 30015 }, { "epoch": 0.3658610897834327, "grad_norm": 2.105982780456543, "learning_rate": 3.3376523412443877e-06, "loss": 0.8286, "step": 30020 }, { "epoch": 0.36592202600758045, "grad_norm": 1.9731708765029907, "learning_rate": 3.337331622835151e-06, "loss": 0.8577, "step": 30025 }, { "epoch": 0.36598296223172827, "grad_norm": 2.65390682220459, "learning_rate": 3.337010904425914e-06, "loss": 0.8896, "step": 30030 }, { "epoch": 0.3660438984558761, "grad_norm": 2.1090199947357178, "learning_rate": 3.336690186016678e-06, "loss": 0.89, "step": 30035 }, { "epoch": 0.36610483468002386, "grad_norm": 2.064157009124756, "learning_rate": 3.336369467607441e-06, "loss": 0.8436, "step": 30040 }, { "epoch": 0.3661657709041717, "grad_norm": 1.748100757598877, "learning_rate": 3.336048749198204e-06, "loss": 0.886, "step": 30045 }, { "epoch": 0.3662267071283195, "grad_norm": 2.099658250808716, "learning_rate": 3.335728030788968e-06, "loss": 0.9843, "step": 30050 }, { "epoch": 0.36628764335246733, "grad_norm": 2.020216941833496, "learning_rate": 3.335407312379731e-06, "loss": 0.834, "step": 30055 }, { "epoch": 0.3663485795766151, "grad_norm": 2.1960439682006836, "learning_rate": 3.335086593970494e-06, "loss": 0.8958, "step": 30060 }, { "epoch": 0.3664095158007629, "grad_norm": 1.9339700937271118, "learning_rate": 3.334765875561258e-06, "loss": 0.829, "step": 30065 }, { "epoch": 0.36647045202491074, "grad_norm": 2.979276180267334, "learning_rate": 3.334445157152021e-06, "loss": 0.8537, "step": 30070 }, { "epoch": 0.3665313882490585, "grad_norm": 1.998145580291748, "learning_rate": 3.334124438742784e-06, "loss": 0.7948, "step": 30075 }, { "epoch": 0.36659232447320633, "grad_norm": 2.0818827152252197, "learning_rate": 3.3338037203335473e-06, "loss": 0.8809, "step": 30080 }, { "epoch": 0.36665326069735416, "grad_norm": 2.4054362773895264, "learning_rate": 3.3334830019243107e-06, "loss": 0.8506, "step": 30085 }, { "epoch": 0.366714196921502, "grad_norm": 1.8391050100326538, "learning_rate": 3.333162283515074e-06, "loss": 0.8027, "step": 30090 }, { "epoch": 0.36677513314564975, "grad_norm": 1.7765344381332397, "learning_rate": 3.332841565105837e-06, "loss": 0.8821, "step": 30095 }, { "epoch": 0.36683606936979757, "grad_norm": 2.046365737915039, "learning_rate": 3.3325208466966006e-06, "loss": 0.8642, "step": 30100 }, { "epoch": 0.3668970055939454, "grad_norm": 1.9014872312545776, "learning_rate": 3.332200128287364e-06, "loss": 0.8234, "step": 30105 }, { "epoch": 0.36695794181809316, "grad_norm": 2.0103871822357178, "learning_rate": 3.331879409878127e-06, "loss": 0.859, "step": 30110 }, { "epoch": 0.367018878042241, "grad_norm": 1.866052269935608, "learning_rate": 3.331558691468891e-06, "loss": 0.814, "step": 30115 }, { "epoch": 0.3670798142663888, "grad_norm": 1.8262083530426025, "learning_rate": 3.331237973059654e-06, "loss": 0.8834, "step": 30120 }, { "epoch": 0.3671407504905366, "grad_norm": 1.8675271272659302, "learning_rate": 3.330917254650417e-06, "loss": 0.7577, "step": 30125 }, { "epoch": 0.3672016867146844, "grad_norm": 1.823383092880249, "learning_rate": 3.330596536241181e-06, "loss": 0.8183, "step": 30130 }, { "epoch": 0.3672626229388322, "grad_norm": 1.5707758665084839, "learning_rate": 3.330275817831944e-06, "loss": 0.8612, "step": 30135 }, { "epoch": 0.36732355916298004, "grad_norm": 1.8951133489608765, "learning_rate": 3.329955099422707e-06, "loss": 0.8858, "step": 30140 }, { "epoch": 0.3673844953871278, "grad_norm": 2.2442612648010254, "learning_rate": 3.3296343810134707e-06, "loss": 0.8716, "step": 30145 }, { "epoch": 0.36744543161127563, "grad_norm": 1.661093831062317, "learning_rate": 3.3293136626042338e-06, "loss": 0.8194, "step": 30150 }, { "epoch": 0.36750636783542345, "grad_norm": 1.605379343032837, "learning_rate": 3.3289929441949968e-06, "loss": 0.86, "step": 30155 }, { "epoch": 0.3675673040595713, "grad_norm": 1.8706282377243042, "learning_rate": 3.32867222578576e-06, "loss": 0.887, "step": 30160 }, { "epoch": 0.36762824028371904, "grad_norm": 1.8190677165985107, "learning_rate": 3.3283515073765237e-06, "loss": 0.7869, "step": 30165 }, { "epoch": 0.36768917650786687, "grad_norm": 2.361515522003174, "learning_rate": 3.328030788967287e-06, "loss": 0.8267, "step": 30170 }, { "epoch": 0.3677501127320147, "grad_norm": 1.9331837892532349, "learning_rate": 3.32771007055805e-06, "loss": 0.863, "step": 30175 }, { "epoch": 0.36781104895616246, "grad_norm": 1.8846940994262695, "learning_rate": 3.3273893521488135e-06, "loss": 0.8924, "step": 30180 }, { "epoch": 0.3678719851803103, "grad_norm": 2.095961570739746, "learning_rate": 3.327068633739577e-06, "loss": 0.905, "step": 30185 }, { "epoch": 0.3679329214044581, "grad_norm": 2.291790246963501, "learning_rate": 3.32674791533034e-06, "loss": 0.8767, "step": 30190 }, { "epoch": 0.3679938576286059, "grad_norm": 1.9119538068771362, "learning_rate": 3.326427196921104e-06, "loss": 0.8033, "step": 30195 }, { "epoch": 0.3680547938527537, "grad_norm": 2.2502388954162598, "learning_rate": 3.326106478511867e-06, "loss": 0.9007, "step": 30200 }, { "epoch": 0.3681157300769015, "grad_norm": 1.8490556478500366, "learning_rate": 3.32578576010263e-06, "loss": 0.915, "step": 30205 }, { "epoch": 0.36817666630104934, "grad_norm": 2.321702241897583, "learning_rate": 3.3254650416933938e-06, "loss": 0.8547, "step": 30210 }, { "epoch": 0.3682376025251971, "grad_norm": 1.675082802772522, "learning_rate": 3.3251443232841568e-06, "loss": 0.8199, "step": 30215 }, { "epoch": 0.36829853874934493, "grad_norm": 1.885827660560608, "learning_rate": 3.32482360487492e-06, "loss": 0.7813, "step": 30220 }, { "epoch": 0.36835947497349275, "grad_norm": 1.832687258720398, "learning_rate": 3.3245028864656837e-06, "loss": 0.8871, "step": 30225 }, { "epoch": 0.3684204111976406, "grad_norm": 1.7802997827529907, "learning_rate": 3.3241821680564467e-06, "loss": 0.797, "step": 30230 }, { "epoch": 0.36848134742178834, "grad_norm": 2.113839626312256, "learning_rate": 3.32386144964721e-06, "loss": 0.8727, "step": 30235 }, { "epoch": 0.36854228364593616, "grad_norm": 1.9186519384384155, "learning_rate": 3.3235407312379736e-06, "loss": 0.8854, "step": 30240 }, { "epoch": 0.368603219870084, "grad_norm": 1.9765920639038086, "learning_rate": 3.3232200128287366e-06, "loss": 0.8535, "step": 30245 }, { "epoch": 0.36866415609423175, "grad_norm": 2.2111425399780273, "learning_rate": 3.3228992944195e-06, "loss": 0.8243, "step": 30250 }, { "epoch": 0.3687250923183796, "grad_norm": 1.8280280828475952, "learning_rate": 3.322578576010263e-06, "loss": 0.9425, "step": 30255 }, { "epoch": 0.3687860285425274, "grad_norm": 1.8704568147659302, "learning_rate": 3.322257857601027e-06, "loss": 0.8342, "step": 30260 }, { "epoch": 0.3688469647666752, "grad_norm": 2.117428779602051, "learning_rate": 3.32193713919179e-06, "loss": 0.697, "step": 30265 }, { "epoch": 0.368907900990823, "grad_norm": 1.7866857051849365, "learning_rate": 3.321616420782553e-06, "loss": 0.7914, "step": 30270 }, { "epoch": 0.3689688372149708, "grad_norm": 1.9651464223861694, "learning_rate": 3.321295702373317e-06, "loss": 0.9817, "step": 30275 }, { "epoch": 0.36902977343911864, "grad_norm": 2.1521003246307373, "learning_rate": 3.32097498396408e-06, "loss": 0.8253, "step": 30280 }, { "epoch": 0.3690907096632664, "grad_norm": 2.1502625942230225, "learning_rate": 3.320654265554843e-06, "loss": 0.7954, "step": 30285 }, { "epoch": 0.3691516458874142, "grad_norm": 1.8186697959899902, "learning_rate": 3.3203335471456067e-06, "loss": 0.9032, "step": 30290 }, { "epoch": 0.36921258211156205, "grad_norm": 2.1493470668792725, "learning_rate": 3.3200128287363697e-06, "loss": 0.7977, "step": 30295 }, { "epoch": 0.36927351833570987, "grad_norm": 2.0102264881134033, "learning_rate": 3.3196921103271327e-06, "loss": 0.9018, "step": 30300 }, { "epoch": 0.36933445455985764, "grad_norm": 3.001068353652954, "learning_rate": 3.3193713919178966e-06, "loss": 0.9218, "step": 30305 }, { "epoch": 0.36939539078400546, "grad_norm": 1.9651095867156982, "learning_rate": 3.3190506735086596e-06, "loss": 0.86, "step": 30310 }, { "epoch": 0.3694563270081533, "grad_norm": 2.2963764667510986, "learning_rate": 3.318729955099423e-06, "loss": 0.8634, "step": 30315 }, { "epoch": 0.36951726323230105, "grad_norm": 1.686096429824829, "learning_rate": 3.3184092366901865e-06, "loss": 0.9079, "step": 30320 }, { "epoch": 0.3695781994564489, "grad_norm": 1.9676600694656372, "learning_rate": 3.3180885182809495e-06, "loss": 0.8633, "step": 30325 }, { "epoch": 0.3696391356805967, "grad_norm": 1.609964370727539, "learning_rate": 3.317767799871713e-06, "loss": 0.902, "step": 30330 }, { "epoch": 0.3697000719047445, "grad_norm": 1.7729027271270752, "learning_rate": 3.317447081462476e-06, "loss": 0.8498, "step": 30335 }, { "epoch": 0.3697610081288923, "grad_norm": 1.6460784673690796, "learning_rate": 3.31712636305324e-06, "loss": 0.8365, "step": 30340 }, { "epoch": 0.3698219443530401, "grad_norm": 2.3570058345794678, "learning_rate": 3.316805644644003e-06, "loss": 0.8516, "step": 30345 }, { "epoch": 0.36988288057718793, "grad_norm": 2.345238208770752, "learning_rate": 3.316484926234766e-06, "loss": 0.8833, "step": 30350 }, { "epoch": 0.3699438168013357, "grad_norm": 2.0576956272125244, "learning_rate": 3.3161642078255297e-06, "loss": 0.777, "step": 30355 }, { "epoch": 0.3700047530254835, "grad_norm": 1.8070272207260132, "learning_rate": 3.3158434894162927e-06, "loss": 0.885, "step": 30360 }, { "epoch": 0.37006568924963135, "grad_norm": 1.9471360445022583, "learning_rate": 3.3155227710070558e-06, "loss": 0.8503, "step": 30365 }, { "epoch": 0.37012662547377917, "grad_norm": 2.197744369506836, "learning_rate": 3.3152020525978196e-06, "loss": 0.85, "step": 30370 }, { "epoch": 0.37018756169792694, "grad_norm": 1.992014765739441, "learning_rate": 3.3148813341885826e-06, "loss": 0.863, "step": 30375 }, { "epoch": 0.37024849792207476, "grad_norm": 1.8747836351394653, "learning_rate": 3.3145606157793457e-06, "loss": 0.8289, "step": 30380 }, { "epoch": 0.3703094341462226, "grad_norm": 2.3480260372161865, "learning_rate": 3.3142398973701095e-06, "loss": 0.8561, "step": 30385 }, { "epoch": 0.37037037037037035, "grad_norm": 1.8034164905548096, "learning_rate": 3.3139191789608725e-06, "loss": 0.8604, "step": 30390 }, { "epoch": 0.3704313065945182, "grad_norm": 1.6601066589355469, "learning_rate": 3.313598460551636e-06, "loss": 0.8569, "step": 30395 }, { "epoch": 0.370492242818666, "grad_norm": 2.005906105041504, "learning_rate": 3.3132777421423994e-06, "loss": 0.873, "step": 30400 }, { "epoch": 0.3705531790428138, "grad_norm": 1.752055287361145, "learning_rate": 3.3129570237331624e-06, "loss": 0.947, "step": 30405 }, { "epoch": 0.3706141152669616, "grad_norm": 1.9129443168640137, "learning_rate": 3.312636305323926e-06, "loss": 0.8458, "step": 30410 }, { "epoch": 0.3706750514911094, "grad_norm": 1.8821818828582764, "learning_rate": 3.312315586914689e-06, "loss": 0.9077, "step": 30415 }, { "epoch": 0.37073598771525723, "grad_norm": 1.781754493713379, "learning_rate": 3.3119948685054528e-06, "loss": 0.8616, "step": 30420 }, { "epoch": 0.370796923939405, "grad_norm": 2.4178555011749268, "learning_rate": 3.3116741500962158e-06, "loss": 0.827, "step": 30425 }, { "epoch": 0.3708578601635528, "grad_norm": 2.005995512008667, "learning_rate": 3.3113534316869788e-06, "loss": 0.8643, "step": 30430 }, { "epoch": 0.37091879638770064, "grad_norm": 1.9186437129974365, "learning_rate": 3.3110327132777427e-06, "loss": 0.9163, "step": 30435 }, { "epoch": 0.3709797326118484, "grad_norm": 1.8706691265106201, "learning_rate": 3.3107119948685057e-06, "loss": 0.8503, "step": 30440 }, { "epoch": 0.37104066883599623, "grad_norm": 1.6545419692993164, "learning_rate": 3.3103912764592687e-06, "loss": 0.8237, "step": 30445 }, { "epoch": 0.37110160506014406, "grad_norm": 2.0778653621673584, "learning_rate": 3.3100705580500325e-06, "loss": 0.8323, "step": 30450 }, { "epoch": 0.3711625412842919, "grad_norm": 2.0308837890625, "learning_rate": 3.3097498396407956e-06, "loss": 0.8588, "step": 30455 }, { "epoch": 0.37122347750843965, "grad_norm": 2.1426541805267334, "learning_rate": 3.309429121231559e-06, "loss": 0.8965, "step": 30460 }, { "epoch": 0.37128441373258747, "grad_norm": 2.14897084236145, "learning_rate": 3.3091084028223224e-06, "loss": 0.9353, "step": 30465 }, { "epoch": 0.3713453499567353, "grad_norm": 1.9297391176223755, "learning_rate": 3.3087876844130855e-06, "loss": 0.8884, "step": 30470 }, { "epoch": 0.37140628618088306, "grad_norm": 1.9155526161193848, "learning_rate": 3.308466966003849e-06, "loss": 0.8224, "step": 30475 }, { "epoch": 0.3714672224050309, "grad_norm": 1.947291374206543, "learning_rate": 3.3081462475946123e-06, "loss": 0.8585, "step": 30480 }, { "epoch": 0.3715281586291787, "grad_norm": 1.8219143152236938, "learning_rate": 3.3078255291853758e-06, "loss": 0.8972, "step": 30485 }, { "epoch": 0.37158909485332653, "grad_norm": 2.2898905277252197, "learning_rate": 3.307504810776139e-06, "loss": 0.9311, "step": 30490 }, { "epoch": 0.3716500310774743, "grad_norm": 1.8200968503952026, "learning_rate": 3.307184092366902e-06, "loss": 0.8408, "step": 30495 }, { "epoch": 0.3717109673016221, "grad_norm": 2.1205766201019287, "learning_rate": 3.3068633739576657e-06, "loss": 0.9224, "step": 30500 }, { "epoch": 0.37177190352576994, "grad_norm": 1.9456778764724731, "learning_rate": 3.3065426555484287e-06, "loss": 0.7966, "step": 30505 }, { "epoch": 0.3718328397499177, "grad_norm": 2.810993194580078, "learning_rate": 3.3062219371391917e-06, "loss": 0.8447, "step": 30510 }, { "epoch": 0.37189377597406553, "grad_norm": 1.9813287258148193, "learning_rate": 3.3059012187299556e-06, "loss": 0.8694, "step": 30515 }, { "epoch": 0.37195471219821336, "grad_norm": 1.8498718738555908, "learning_rate": 3.3055805003207186e-06, "loss": 0.9023, "step": 30520 }, { "epoch": 0.3720156484223612, "grad_norm": 1.791479468345642, "learning_rate": 3.3052597819114816e-06, "loss": 0.9052, "step": 30525 }, { "epoch": 0.37207658464650895, "grad_norm": 2.189490795135498, "learning_rate": 3.3049390635022455e-06, "loss": 0.8282, "step": 30530 }, { "epoch": 0.37213752087065677, "grad_norm": 2.1106579303741455, "learning_rate": 3.3046183450930085e-06, "loss": 0.8395, "step": 30535 }, { "epoch": 0.3721984570948046, "grad_norm": 2.5829575061798096, "learning_rate": 3.304297626683772e-06, "loss": 0.833, "step": 30540 }, { "epoch": 0.37225939331895236, "grad_norm": 2.0752811431884766, "learning_rate": 3.3039769082745354e-06, "loss": 0.8867, "step": 30545 }, { "epoch": 0.3723203295431002, "grad_norm": 1.6706615686416626, "learning_rate": 3.3036561898652984e-06, "loss": 0.9192, "step": 30550 }, { "epoch": 0.372381265767248, "grad_norm": 2.0510671138763428, "learning_rate": 3.303335471456062e-06, "loss": 0.8623, "step": 30555 }, { "epoch": 0.3724422019913958, "grad_norm": 1.7474493980407715, "learning_rate": 3.3030147530468253e-06, "loss": 0.8863, "step": 30560 }, { "epoch": 0.3725031382155436, "grad_norm": 2.102827548980713, "learning_rate": 3.3026940346375887e-06, "loss": 0.8218, "step": 30565 }, { "epoch": 0.3725640744396914, "grad_norm": 1.9542769193649292, "learning_rate": 3.3023733162283517e-06, "loss": 0.8475, "step": 30570 }, { "epoch": 0.37262501066383924, "grad_norm": 1.6320818662643433, "learning_rate": 3.302052597819115e-06, "loss": 0.8506, "step": 30575 }, { "epoch": 0.372685946887987, "grad_norm": 2.4890754222869873, "learning_rate": 3.3017318794098786e-06, "loss": 0.8395, "step": 30580 }, { "epoch": 0.37274688311213483, "grad_norm": 2.1734328269958496, "learning_rate": 3.3014111610006416e-06, "loss": 0.8397, "step": 30585 }, { "epoch": 0.37280781933628265, "grad_norm": 2.0708014965057373, "learning_rate": 3.3010904425914046e-06, "loss": 0.868, "step": 30590 }, { "epoch": 0.3728687555604305, "grad_norm": 2.154611110687256, "learning_rate": 3.3007697241821685e-06, "loss": 0.9132, "step": 30595 }, { "epoch": 0.37292969178457824, "grad_norm": 1.7920782566070557, "learning_rate": 3.3004490057729315e-06, "loss": 0.8917, "step": 30600 }, { "epoch": 0.37299062800872607, "grad_norm": 2.0632214546203613, "learning_rate": 3.3001282873636945e-06, "loss": 0.9079, "step": 30605 }, { "epoch": 0.3730515642328739, "grad_norm": 2.0189058780670166, "learning_rate": 3.2998075689544584e-06, "loss": 0.8252, "step": 30610 }, { "epoch": 0.37311250045702166, "grad_norm": 2.1367180347442627, "learning_rate": 3.2994868505452214e-06, "loss": 0.828, "step": 30615 }, { "epoch": 0.3731734366811695, "grad_norm": 2.0361602306365967, "learning_rate": 3.299166132135985e-06, "loss": 0.8363, "step": 30620 }, { "epoch": 0.3732343729053173, "grad_norm": 1.882606863975525, "learning_rate": 3.2988454137267483e-06, "loss": 0.8829, "step": 30625 }, { "epoch": 0.3732953091294651, "grad_norm": 1.860422134399414, "learning_rate": 3.2985246953175113e-06, "loss": 0.9216, "step": 30630 }, { "epoch": 0.3733562453536129, "grad_norm": 1.7991883754730225, "learning_rate": 3.2982039769082748e-06, "loss": 0.8677, "step": 30635 }, { "epoch": 0.3734171815777607, "grad_norm": 2.3579957485198975, "learning_rate": 3.297883258499038e-06, "loss": 0.8842, "step": 30640 }, { "epoch": 0.37347811780190854, "grad_norm": 1.6728734970092773, "learning_rate": 3.2975625400898016e-06, "loss": 0.8906, "step": 30645 }, { "epoch": 0.3735390540260563, "grad_norm": 2.0305047035217285, "learning_rate": 3.2972418216805647e-06, "loss": 0.8811, "step": 30650 }, { "epoch": 0.37359999025020413, "grad_norm": 1.837816596031189, "learning_rate": 3.296921103271328e-06, "loss": 0.8545, "step": 30655 }, { "epoch": 0.37366092647435195, "grad_norm": 1.922424077987671, "learning_rate": 3.2966003848620915e-06, "loss": 0.884, "step": 30660 }, { "epoch": 0.3737218626984998, "grad_norm": 1.9627739191055298, "learning_rate": 3.2962796664528546e-06, "loss": 0.8312, "step": 30665 }, { "epoch": 0.37378279892264754, "grad_norm": 2.1835217475891113, "learning_rate": 3.2959589480436176e-06, "loss": 0.8232, "step": 30670 }, { "epoch": 0.37384373514679536, "grad_norm": 1.9883166551589966, "learning_rate": 3.2956382296343814e-06, "loss": 0.8736, "step": 30675 }, { "epoch": 0.3739046713709432, "grad_norm": 2.0826947689056396, "learning_rate": 3.2953175112251444e-06, "loss": 0.8758, "step": 30680 }, { "epoch": 0.37396560759509095, "grad_norm": 2.1855154037475586, "learning_rate": 3.2949967928159075e-06, "loss": 0.8693, "step": 30685 }, { "epoch": 0.3740265438192388, "grad_norm": 1.6291942596435547, "learning_rate": 3.2946760744066713e-06, "loss": 0.7881, "step": 30690 }, { "epoch": 0.3740874800433866, "grad_norm": 2.1067776679992676, "learning_rate": 3.2943553559974343e-06, "loss": 0.8137, "step": 30695 }, { "epoch": 0.3741484162675344, "grad_norm": 1.9571900367736816, "learning_rate": 3.2940346375881978e-06, "loss": 0.8479, "step": 30700 }, { "epoch": 0.3742093524916822, "grad_norm": 2.1094043254852295, "learning_rate": 3.2937139191789612e-06, "loss": 0.8545, "step": 30705 }, { "epoch": 0.37427028871583, "grad_norm": 2.00586199760437, "learning_rate": 3.2933932007697247e-06, "loss": 0.8719, "step": 30710 }, { "epoch": 0.37433122493997784, "grad_norm": 2.142883777618408, "learning_rate": 3.2930724823604877e-06, "loss": 0.8301, "step": 30715 }, { "epoch": 0.3743921611641256, "grad_norm": 2.00722599029541, "learning_rate": 3.292751763951251e-06, "loss": 0.8618, "step": 30720 }, { "epoch": 0.3744530973882734, "grad_norm": 2.0746543407440186, "learning_rate": 3.2924310455420146e-06, "loss": 0.8277, "step": 30725 }, { "epoch": 0.37451403361242125, "grad_norm": 2.284597158432007, "learning_rate": 3.2921103271327776e-06, "loss": 0.7506, "step": 30730 }, { "epoch": 0.37457496983656907, "grad_norm": 1.8966336250305176, "learning_rate": 3.2917896087235414e-06, "loss": 0.8471, "step": 30735 }, { "epoch": 0.37463590606071684, "grad_norm": 1.9298789501190186, "learning_rate": 3.2914688903143045e-06, "loss": 0.9029, "step": 30740 }, { "epoch": 0.37469684228486466, "grad_norm": 2.944622039794922, "learning_rate": 3.2911481719050675e-06, "loss": 0.7803, "step": 30745 }, { "epoch": 0.3747577785090125, "grad_norm": 2.0668418407440186, "learning_rate": 3.2908274534958305e-06, "loss": 0.8512, "step": 30750 }, { "epoch": 0.37481871473316025, "grad_norm": 2.0336806774139404, "learning_rate": 3.2905067350865944e-06, "loss": 0.7735, "step": 30755 }, { "epoch": 0.3748796509573081, "grad_norm": 1.8771870136260986, "learning_rate": 3.2901860166773574e-06, "loss": 0.8465, "step": 30760 }, { "epoch": 0.3749405871814559, "grad_norm": 1.8432906866073608, "learning_rate": 3.289865298268121e-06, "loss": 0.8763, "step": 30765 }, { "epoch": 0.3750015234056037, "grad_norm": 1.7341041564941406, "learning_rate": 3.2895445798588843e-06, "loss": 0.851, "step": 30770 }, { "epoch": 0.3750624596297515, "grad_norm": 1.7410181760787964, "learning_rate": 3.2892238614496473e-06, "loss": 0.8462, "step": 30775 }, { "epoch": 0.3751233958538993, "grad_norm": 2.1941869258880615, "learning_rate": 3.2889031430404107e-06, "loss": 0.9065, "step": 30780 }, { "epoch": 0.37518433207804713, "grad_norm": 2.0013914108276367, "learning_rate": 3.288582424631174e-06, "loss": 0.8089, "step": 30785 }, { "epoch": 0.3752452683021949, "grad_norm": 1.873399257659912, "learning_rate": 3.2882617062219376e-06, "loss": 0.8381, "step": 30790 }, { "epoch": 0.3753062045263427, "grad_norm": 2.157235622406006, "learning_rate": 3.2879409878127006e-06, "loss": 0.7736, "step": 30795 }, { "epoch": 0.37536714075049055, "grad_norm": 2.0510311126708984, "learning_rate": 3.287620269403464e-06, "loss": 0.8906, "step": 30800 }, { "epoch": 0.37542807697463837, "grad_norm": 1.9067468643188477, "learning_rate": 3.2872995509942275e-06, "loss": 0.9989, "step": 30805 }, { "epoch": 0.37548901319878614, "grad_norm": 1.9638217687606812, "learning_rate": 3.2869788325849905e-06, "loss": 0.873, "step": 30810 }, { "epoch": 0.37554994942293396, "grad_norm": 1.8589740991592407, "learning_rate": 3.2866581141757544e-06, "loss": 0.8621, "step": 30815 }, { "epoch": 0.3756108856470818, "grad_norm": 1.856553077697754, "learning_rate": 3.2863373957665174e-06, "loss": 0.8566, "step": 30820 }, { "epoch": 0.37567182187122955, "grad_norm": 1.9080886840820312, "learning_rate": 3.2860166773572804e-06, "loss": 0.8248, "step": 30825 }, { "epoch": 0.3757327580953774, "grad_norm": 1.7896875143051147, "learning_rate": 3.2856959589480443e-06, "loss": 0.8519, "step": 30830 }, { "epoch": 0.3757936943195252, "grad_norm": 1.8202100992202759, "learning_rate": 3.2853752405388073e-06, "loss": 0.7943, "step": 30835 }, { "epoch": 0.375854630543673, "grad_norm": 1.6200133562088013, "learning_rate": 3.2850545221295703e-06, "loss": 0.8451, "step": 30840 }, { "epoch": 0.3759155667678208, "grad_norm": 1.8209902048110962, "learning_rate": 3.2847338037203337e-06, "loss": 0.8388, "step": 30845 }, { "epoch": 0.3759765029919686, "grad_norm": 1.9376004934310913, "learning_rate": 3.284413085311097e-06, "loss": 0.8393, "step": 30850 }, { "epoch": 0.37603743921611643, "grad_norm": 1.6337279081344604, "learning_rate": 3.28409236690186e-06, "loss": 0.8207, "step": 30855 }, { "epoch": 0.3760983754402642, "grad_norm": 1.841373324394226, "learning_rate": 3.2837716484926236e-06, "loss": 0.8112, "step": 30860 }, { "epoch": 0.376159311664412, "grad_norm": 2.2109856605529785, "learning_rate": 3.283450930083387e-06, "loss": 0.8188, "step": 30865 }, { "epoch": 0.37622024788855984, "grad_norm": 1.741798758506775, "learning_rate": 3.2831302116741505e-06, "loss": 0.8165, "step": 30870 }, { "epoch": 0.37628118411270767, "grad_norm": 2.1990268230438232, "learning_rate": 3.2828094932649135e-06, "loss": 0.8412, "step": 30875 }, { "epoch": 0.37634212033685543, "grad_norm": 2.0160422325134277, "learning_rate": 3.282488774855677e-06, "loss": 0.8387, "step": 30880 }, { "epoch": 0.37640305656100326, "grad_norm": 1.997910976409912, "learning_rate": 3.2821680564464404e-06, "loss": 0.8788, "step": 30885 }, { "epoch": 0.3764639927851511, "grad_norm": 2.6008858680725098, "learning_rate": 3.2818473380372034e-06, "loss": 0.8913, "step": 30890 }, { "epoch": 0.37652492900929885, "grad_norm": 2.0997841358184814, "learning_rate": 3.2815266196279673e-06, "loss": 0.7726, "step": 30895 }, { "epoch": 0.37658586523344667, "grad_norm": 1.917031168937683, "learning_rate": 3.2812059012187303e-06, "loss": 0.8608, "step": 30900 }, { "epoch": 0.3766468014575945, "grad_norm": 1.7718719244003296, "learning_rate": 3.2808851828094933e-06, "loss": 0.8941, "step": 30905 }, { "epoch": 0.3767077376817423, "grad_norm": 2.0133588314056396, "learning_rate": 3.280564464400257e-06, "loss": 0.8181, "step": 30910 }, { "epoch": 0.3767686739058901, "grad_norm": 1.8218690156936646, "learning_rate": 3.2802437459910202e-06, "loss": 0.8728, "step": 30915 }, { "epoch": 0.3768296101300379, "grad_norm": 1.7557737827301025, "learning_rate": 3.2799230275817832e-06, "loss": 0.9419, "step": 30920 }, { "epoch": 0.37689054635418573, "grad_norm": 1.9547146558761597, "learning_rate": 3.2796023091725467e-06, "loss": 0.8416, "step": 30925 }, { "epoch": 0.3769514825783335, "grad_norm": 2.38645339012146, "learning_rate": 3.27928159076331e-06, "loss": 0.839, "step": 30930 }, { "epoch": 0.3770124188024813, "grad_norm": 1.8971316814422607, "learning_rate": 3.2789608723540736e-06, "loss": 0.8347, "step": 30935 }, { "epoch": 0.37707335502662914, "grad_norm": 1.9627553224563599, "learning_rate": 3.2786401539448366e-06, "loss": 0.8467, "step": 30940 }, { "epoch": 0.3771342912507769, "grad_norm": 1.7824480533599854, "learning_rate": 3.2783194355356e-06, "loss": 0.8911, "step": 30945 }, { "epoch": 0.37719522747492473, "grad_norm": 2.037914752960205, "learning_rate": 3.2779987171263634e-06, "loss": 0.8486, "step": 30950 }, { "epoch": 0.37725616369907256, "grad_norm": 2.339064836502075, "learning_rate": 3.2776779987171265e-06, "loss": 0.8604, "step": 30955 }, { "epoch": 0.3773170999232204, "grad_norm": 2.1224379539489746, "learning_rate": 3.2773572803078903e-06, "loss": 0.8097, "step": 30960 }, { "epoch": 0.37737803614736815, "grad_norm": 1.882301688194275, "learning_rate": 3.2770365618986533e-06, "loss": 0.8092, "step": 30965 }, { "epoch": 0.37743897237151597, "grad_norm": 1.9183015823364258, "learning_rate": 3.2767158434894164e-06, "loss": 0.8607, "step": 30970 }, { "epoch": 0.3774999085956638, "grad_norm": 2.0525991916656494, "learning_rate": 3.2763951250801802e-06, "loss": 0.8617, "step": 30975 }, { "epoch": 0.37756084481981156, "grad_norm": 1.7161319255828857, "learning_rate": 3.2760744066709432e-06, "loss": 0.7679, "step": 30980 }, { "epoch": 0.3776217810439594, "grad_norm": 2.463296890258789, "learning_rate": 3.2757536882617063e-06, "loss": 0.893, "step": 30985 }, { "epoch": 0.3776827172681072, "grad_norm": 2.047605037689209, "learning_rate": 3.27543296985247e-06, "loss": 0.874, "step": 30990 }, { "epoch": 0.377743653492255, "grad_norm": 2.2344369888305664, "learning_rate": 3.275112251443233e-06, "loss": 0.898, "step": 30995 }, { "epoch": 0.3778045897164028, "grad_norm": 1.7754629850387573, "learning_rate": 3.274791533033996e-06, "loss": 0.7814, "step": 31000 }, { "epoch": 0.3778655259405506, "grad_norm": 2.016557216644287, "learning_rate": 3.2744708146247596e-06, "loss": 0.8391, "step": 31005 }, { "epoch": 0.37792646216469844, "grad_norm": 1.6408747434616089, "learning_rate": 3.274150096215523e-06, "loss": 0.8376, "step": 31010 }, { "epoch": 0.3779873983888462, "grad_norm": 1.863054633140564, "learning_rate": 3.2738293778062865e-06, "loss": 0.7893, "step": 31015 }, { "epoch": 0.37804833461299403, "grad_norm": 1.788475751876831, "learning_rate": 3.2735086593970495e-06, "loss": 0.7568, "step": 31020 }, { "epoch": 0.37810927083714185, "grad_norm": 2.593050479888916, "learning_rate": 3.273187940987813e-06, "loss": 0.8435, "step": 31025 }, { "epoch": 0.3781702070612897, "grad_norm": 1.9126592874526978, "learning_rate": 3.2728672225785764e-06, "loss": 0.8949, "step": 31030 }, { "epoch": 0.37823114328543744, "grad_norm": 1.9599789381027222, "learning_rate": 3.2725465041693394e-06, "loss": 0.8215, "step": 31035 }, { "epoch": 0.37829207950958527, "grad_norm": 2.243725538253784, "learning_rate": 3.2722257857601033e-06, "loss": 0.9089, "step": 31040 }, { "epoch": 0.3783530157337331, "grad_norm": 1.9944595098495483, "learning_rate": 3.2719050673508663e-06, "loss": 0.8289, "step": 31045 }, { "epoch": 0.37841395195788086, "grad_norm": 1.914525032043457, "learning_rate": 3.2715843489416293e-06, "loss": 0.9325, "step": 31050 }, { "epoch": 0.3784748881820287, "grad_norm": 2.0798404216766357, "learning_rate": 3.271263630532393e-06, "loss": 0.8294, "step": 31055 }, { "epoch": 0.3785358244061765, "grad_norm": 2.08048152923584, "learning_rate": 3.270942912123156e-06, "loss": 0.8747, "step": 31060 }, { "epoch": 0.3785967606303243, "grad_norm": 2.163644552230835, "learning_rate": 3.270622193713919e-06, "loss": 0.8353, "step": 31065 }, { "epoch": 0.3786576968544721, "grad_norm": 2.1169698238372803, "learning_rate": 3.270301475304683e-06, "loss": 0.8801, "step": 31070 }, { "epoch": 0.3787186330786199, "grad_norm": 2.442331552505493, "learning_rate": 3.269980756895446e-06, "loss": 0.8394, "step": 31075 }, { "epoch": 0.37877956930276774, "grad_norm": 1.8531341552734375, "learning_rate": 3.269660038486209e-06, "loss": 0.8834, "step": 31080 }, { "epoch": 0.3788405055269155, "grad_norm": 1.9497016668319702, "learning_rate": 3.2693393200769725e-06, "loss": 0.8417, "step": 31085 }, { "epoch": 0.37890144175106333, "grad_norm": 1.9967691898345947, "learning_rate": 3.269018601667736e-06, "loss": 0.8208, "step": 31090 }, { "epoch": 0.37896237797521115, "grad_norm": 1.820619821548462, "learning_rate": 3.2686978832584994e-06, "loss": 0.9154, "step": 31095 }, { "epoch": 0.379023314199359, "grad_norm": 1.898923397064209, "learning_rate": 3.2683771648492624e-06, "loss": 0.857, "step": 31100 }, { "epoch": 0.37908425042350674, "grad_norm": 2.147876501083374, "learning_rate": 3.268056446440026e-06, "loss": 0.8123, "step": 31105 }, { "epoch": 0.37914518664765456, "grad_norm": 2.070730209350586, "learning_rate": 3.2677357280307893e-06, "loss": 0.8563, "step": 31110 }, { "epoch": 0.3792061228718024, "grad_norm": 1.9306961297988892, "learning_rate": 3.2674150096215523e-06, "loss": 0.8467, "step": 31115 }, { "epoch": 0.37926705909595015, "grad_norm": 1.79794442653656, "learning_rate": 3.267094291212316e-06, "loss": 0.864, "step": 31120 }, { "epoch": 0.379327995320098, "grad_norm": 1.873557209968567, "learning_rate": 3.266773572803079e-06, "loss": 0.9191, "step": 31125 }, { "epoch": 0.3793889315442458, "grad_norm": 1.874916911125183, "learning_rate": 3.2664528543938422e-06, "loss": 0.8843, "step": 31130 }, { "epoch": 0.3794498677683936, "grad_norm": 1.7067009210586548, "learning_rate": 3.266132135984606e-06, "loss": 0.9157, "step": 31135 }, { "epoch": 0.3795108039925414, "grad_norm": 1.8335356712341309, "learning_rate": 3.265811417575369e-06, "loss": 0.8612, "step": 31140 }, { "epoch": 0.3795717402166892, "grad_norm": 2.057065963745117, "learning_rate": 3.265490699166132e-06, "loss": 0.8559, "step": 31145 }, { "epoch": 0.37963267644083704, "grad_norm": 2.1264195442199707, "learning_rate": 3.265169980756896e-06, "loss": 0.8134, "step": 31150 }, { "epoch": 0.3796936126649848, "grad_norm": 1.9612219333648682, "learning_rate": 3.264849262347659e-06, "loss": 0.887, "step": 31155 }, { "epoch": 0.3797545488891326, "grad_norm": 1.8239582777023315, "learning_rate": 3.2645285439384224e-06, "loss": 0.9128, "step": 31160 }, { "epoch": 0.37981548511328045, "grad_norm": 1.7879142761230469, "learning_rate": 3.264207825529186e-06, "loss": 0.8633, "step": 31165 }, { "epoch": 0.37987642133742827, "grad_norm": 1.7342643737792969, "learning_rate": 3.263887107119949e-06, "loss": 0.8373, "step": 31170 }, { "epoch": 0.37993735756157604, "grad_norm": 2.2013981342315674, "learning_rate": 3.2635663887107123e-06, "loss": 0.7668, "step": 31175 }, { "epoch": 0.37999829378572386, "grad_norm": 1.7441943883895874, "learning_rate": 3.2632456703014753e-06, "loss": 0.8257, "step": 31180 }, { "epoch": 0.3800592300098717, "grad_norm": 1.9857298135757446, "learning_rate": 3.2629249518922392e-06, "loss": 0.9003, "step": 31185 }, { "epoch": 0.38012016623401945, "grad_norm": 1.88015878200531, "learning_rate": 3.2626042334830022e-06, "loss": 0.933, "step": 31190 }, { "epoch": 0.3801811024581673, "grad_norm": 1.7644422054290771, "learning_rate": 3.2622835150737652e-06, "loss": 0.8879, "step": 31195 }, { "epoch": 0.3802420386823151, "grad_norm": 1.9497597217559814, "learning_rate": 3.261962796664529e-06, "loss": 0.8035, "step": 31200 }, { "epoch": 0.3803029749064629, "grad_norm": 2.177839756011963, "learning_rate": 3.261642078255292e-06, "loss": 0.8378, "step": 31205 }, { "epoch": 0.3803639111306107, "grad_norm": 2.341179609298706, "learning_rate": 3.261321359846055e-06, "loss": 0.8701, "step": 31210 }, { "epoch": 0.3804248473547585, "grad_norm": 2.1550967693328857, "learning_rate": 3.261000641436819e-06, "loss": 0.8285, "step": 31215 }, { "epoch": 0.38048578357890633, "grad_norm": 2.336184024810791, "learning_rate": 3.260679923027582e-06, "loss": 0.9205, "step": 31220 }, { "epoch": 0.3805467198030541, "grad_norm": 2.1984784603118896, "learning_rate": 3.260359204618345e-06, "loss": 0.8842, "step": 31225 }, { "epoch": 0.3806076560272019, "grad_norm": 1.9439486265182495, "learning_rate": 3.260038486209109e-06, "loss": 0.8468, "step": 31230 }, { "epoch": 0.38066859225134975, "grad_norm": 1.9568006992340088, "learning_rate": 3.259717767799872e-06, "loss": 0.8469, "step": 31235 }, { "epoch": 0.38072952847549757, "grad_norm": 1.8338178396224976, "learning_rate": 3.2593970493906354e-06, "loss": 0.881, "step": 31240 }, { "epoch": 0.38079046469964534, "grad_norm": 1.8598355054855347, "learning_rate": 3.259076330981399e-06, "loss": 0.8447, "step": 31245 }, { "epoch": 0.38085140092379316, "grad_norm": 1.9116591215133667, "learning_rate": 3.258755612572162e-06, "loss": 0.8948, "step": 31250 }, { "epoch": 0.380912337147941, "grad_norm": 2.3236026763916016, "learning_rate": 3.2584348941629253e-06, "loss": 0.7777, "step": 31255 }, { "epoch": 0.38097327337208875, "grad_norm": 1.751656413078308, "learning_rate": 3.2581141757536883e-06, "loss": 0.9527, "step": 31260 }, { "epoch": 0.3810342095962366, "grad_norm": 1.7997872829437256, "learning_rate": 3.257793457344452e-06, "loss": 0.8811, "step": 31265 }, { "epoch": 0.3810951458203844, "grad_norm": 1.8691591024398804, "learning_rate": 3.257472738935215e-06, "loss": 0.8333, "step": 31270 }, { "epoch": 0.3811560820445322, "grad_norm": 1.7055487632751465, "learning_rate": 3.257152020525978e-06, "loss": 0.832, "step": 31275 }, { "epoch": 0.38121701826868, "grad_norm": 2.1851415634155273, "learning_rate": 3.256831302116742e-06, "loss": 0.875, "step": 31280 }, { "epoch": 0.3812779544928278, "grad_norm": 1.9110641479492188, "learning_rate": 3.256510583707505e-06, "loss": 0.8538, "step": 31285 }, { "epoch": 0.38133889071697563, "grad_norm": 1.975187063217163, "learning_rate": 3.256189865298268e-06, "loss": 0.8478, "step": 31290 }, { "epoch": 0.3813998269411234, "grad_norm": 1.9591994285583496, "learning_rate": 3.255869146889032e-06, "loss": 0.8557, "step": 31295 }, { "epoch": 0.3814607631652712, "grad_norm": 2.2640833854675293, "learning_rate": 3.255548428479795e-06, "loss": 0.8247, "step": 31300 }, { "epoch": 0.38152169938941904, "grad_norm": 1.8836164474487305, "learning_rate": 3.255227710070558e-06, "loss": 0.875, "step": 31305 }, { "epoch": 0.38158263561356687, "grad_norm": 1.9140989780426025, "learning_rate": 3.254906991661322e-06, "loss": 0.8034, "step": 31310 }, { "epoch": 0.38164357183771463, "grad_norm": 1.882416009902954, "learning_rate": 3.254586273252085e-06, "loss": 0.8428, "step": 31315 }, { "epoch": 0.38170450806186246, "grad_norm": 1.9205790758132935, "learning_rate": 3.2542655548428483e-06, "loss": 0.8957, "step": 31320 }, { "epoch": 0.3817654442860103, "grad_norm": 2.2557308673858643, "learning_rate": 3.2539448364336117e-06, "loss": 0.8816, "step": 31325 }, { "epoch": 0.38182638051015805, "grad_norm": 1.9623245000839233, "learning_rate": 3.2536241180243747e-06, "loss": 0.8278, "step": 31330 }, { "epoch": 0.38188731673430587, "grad_norm": 1.9866641759872437, "learning_rate": 3.253303399615138e-06, "loss": 0.7958, "step": 31335 }, { "epoch": 0.3819482529584537, "grad_norm": 1.810963749885559, "learning_rate": 3.252982681205901e-06, "loss": 0.8386, "step": 31340 }, { "epoch": 0.3820091891826015, "grad_norm": 1.8942198753356934, "learning_rate": 3.252661962796665e-06, "loss": 0.8559, "step": 31345 }, { "epoch": 0.3820701254067493, "grad_norm": 2.053830146789551, "learning_rate": 3.252341244387428e-06, "loss": 0.8291, "step": 31350 }, { "epoch": 0.3821310616308971, "grad_norm": 2.1994614601135254, "learning_rate": 3.252020525978191e-06, "loss": 0.8724, "step": 31355 }, { "epoch": 0.38219199785504493, "grad_norm": 1.9714137315750122, "learning_rate": 3.251699807568955e-06, "loss": 0.8973, "step": 31360 }, { "epoch": 0.3822529340791927, "grad_norm": 2.8501362800598145, "learning_rate": 3.251379089159718e-06, "loss": 0.8688, "step": 31365 }, { "epoch": 0.3823138703033405, "grad_norm": 1.8960115909576416, "learning_rate": 3.251058370750481e-06, "loss": 0.8839, "step": 31370 }, { "epoch": 0.38237480652748834, "grad_norm": 2.050905704498291, "learning_rate": 3.250737652341245e-06, "loss": 0.8474, "step": 31375 }, { "epoch": 0.38243574275163617, "grad_norm": 2.0036544799804688, "learning_rate": 3.250416933932008e-06, "loss": 0.8711, "step": 31380 }, { "epoch": 0.38249667897578393, "grad_norm": 1.5974284410476685, "learning_rate": 3.250096215522771e-06, "loss": 0.7985, "step": 31385 }, { "epoch": 0.38255761519993176, "grad_norm": 1.8230587244033813, "learning_rate": 3.2497754971135348e-06, "loss": 0.8461, "step": 31390 }, { "epoch": 0.3826185514240796, "grad_norm": 1.929491639137268, "learning_rate": 3.2494547787042978e-06, "loss": 0.902, "step": 31395 }, { "epoch": 0.38267948764822735, "grad_norm": 1.9258723258972168, "learning_rate": 3.2491340602950612e-06, "loss": 0.8208, "step": 31400 }, { "epoch": 0.38274042387237517, "grad_norm": 2.0577340126037598, "learning_rate": 3.2488133418858247e-06, "loss": 0.89, "step": 31405 }, { "epoch": 0.382801360096523, "grad_norm": 1.8400710821151733, "learning_rate": 3.248492623476588e-06, "loss": 0.8016, "step": 31410 }, { "epoch": 0.38286229632067076, "grad_norm": 1.8885835409164429, "learning_rate": 3.248171905067351e-06, "loss": 0.8487, "step": 31415 }, { "epoch": 0.3829232325448186, "grad_norm": 1.9915610551834106, "learning_rate": 3.2478511866581146e-06, "loss": 0.8173, "step": 31420 }, { "epoch": 0.3829841687689664, "grad_norm": 2.216447114944458, "learning_rate": 3.247530468248878e-06, "loss": 0.8887, "step": 31425 }, { "epoch": 0.3830451049931142, "grad_norm": 1.9886784553527832, "learning_rate": 3.247209749839641e-06, "loss": 0.9175, "step": 31430 }, { "epoch": 0.383106041217262, "grad_norm": 1.7065496444702148, "learning_rate": 3.246889031430404e-06, "loss": 0.8212, "step": 31435 }, { "epoch": 0.3831669774414098, "grad_norm": 2.39813494682312, "learning_rate": 3.246568313021168e-06, "loss": 0.9192, "step": 31440 }, { "epoch": 0.38322791366555764, "grad_norm": 3.1063613891601562, "learning_rate": 3.246247594611931e-06, "loss": 0.8023, "step": 31445 }, { "epoch": 0.3832888498897054, "grad_norm": 1.8650587797164917, "learning_rate": 3.245926876202694e-06, "loss": 0.8545, "step": 31450 }, { "epoch": 0.38334978611385323, "grad_norm": 1.7317540645599365, "learning_rate": 3.245606157793458e-06, "loss": 0.8171, "step": 31455 }, { "epoch": 0.38341072233800105, "grad_norm": 1.8849431276321411, "learning_rate": 3.245285439384221e-06, "loss": 0.8852, "step": 31460 }, { "epoch": 0.3834716585621489, "grad_norm": 1.7845678329467773, "learning_rate": 3.2449647209749842e-06, "loss": 0.8504, "step": 31465 }, { "epoch": 0.38353259478629664, "grad_norm": 1.657557725906372, "learning_rate": 3.2446440025657477e-06, "loss": 0.7674, "step": 31470 }, { "epoch": 0.38359353101044447, "grad_norm": 1.979703426361084, "learning_rate": 3.2443232841565107e-06, "loss": 0.874, "step": 31475 }, { "epoch": 0.3836544672345923, "grad_norm": 1.9734835624694824, "learning_rate": 3.244002565747274e-06, "loss": 0.8035, "step": 31480 }, { "epoch": 0.38371540345874006, "grad_norm": 2.1403815746307373, "learning_rate": 3.2436818473380376e-06, "loss": 0.8354, "step": 31485 }, { "epoch": 0.3837763396828879, "grad_norm": 1.9513952732086182, "learning_rate": 3.243361128928801e-06, "loss": 0.8312, "step": 31490 }, { "epoch": 0.3838372759070357, "grad_norm": 1.84987473487854, "learning_rate": 3.243040410519564e-06, "loss": 0.8061, "step": 31495 }, { "epoch": 0.3838982121311835, "grad_norm": 2.0253684520721436, "learning_rate": 3.2427196921103275e-06, "loss": 0.8539, "step": 31500 }, { "epoch": 0.3839591483553313, "grad_norm": 2.4450106620788574, "learning_rate": 3.242398973701091e-06, "loss": 0.8585, "step": 31505 }, { "epoch": 0.3840200845794791, "grad_norm": 2.493159770965576, "learning_rate": 3.242078255291854e-06, "loss": 0.8626, "step": 31510 }, { "epoch": 0.38408102080362694, "grad_norm": 2.0305182933807373, "learning_rate": 3.241757536882617e-06, "loss": 0.8688, "step": 31515 }, { "epoch": 0.3841419570277747, "grad_norm": 2.213343620300293, "learning_rate": 3.241436818473381e-06, "loss": 0.885, "step": 31520 }, { "epoch": 0.38420289325192253, "grad_norm": 1.7859694957733154, "learning_rate": 3.241116100064144e-06, "loss": 0.9128, "step": 31525 }, { "epoch": 0.38426382947607035, "grad_norm": 4.022057056427002, "learning_rate": 3.240795381654907e-06, "loss": 0.818, "step": 31530 }, { "epoch": 0.3843247657002182, "grad_norm": 2.052734375, "learning_rate": 3.2404746632456707e-06, "loss": 0.817, "step": 31535 }, { "epoch": 0.38438570192436594, "grad_norm": 2.046870470046997, "learning_rate": 3.2401539448364337e-06, "loss": 0.8926, "step": 31540 }, { "epoch": 0.38444663814851376, "grad_norm": 1.6872785091400146, "learning_rate": 3.239833226427197e-06, "loss": 0.909, "step": 31545 }, { "epoch": 0.3845075743726616, "grad_norm": 1.942373514175415, "learning_rate": 3.2395125080179606e-06, "loss": 0.85, "step": 31550 }, { "epoch": 0.38456851059680935, "grad_norm": 2.2662577629089355, "learning_rate": 3.2391917896087236e-06, "loss": 0.7711, "step": 31555 }, { "epoch": 0.3846294468209572, "grad_norm": 2.374523162841797, "learning_rate": 3.238871071199487e-06, "loss": 0.8175, "step": 31560 }, { "epoch": 0.384690383045105, "grad_norm": 1.8267245292663574, "learning_rate": 3.2385503527902505e-06, "loss": 0.7919, "step": 31565 }, { "epoch": 0.3847513192692528, "grad_norm": 2.1148924827575684, "learning_rate": 3.238229634381014e-06, "loss": 0.8145, "step": 31570 }, { "epoch": 0.3848122554934006, "grad_norm": 1.8452993631362915, "learning_rate": 3.237908915971777e-06, "loss": 0.8613, "step": 31575 }, { "epoch": 0.3848731917175484, "grad_norm": 2.1641361713409424, "learning_rate": 3.2375881975625404e-06, "loss": 0.8361, "step": 31580 }, { "epoch": 0.38493412794169624, "grad_norm": 1.8752206563949585, "learning_rate": 3.237267479153304e-06, "loss": 0.8156, "step": 31585 }, { "epoch": 0.384995064165844, "grad_norm": 1.824371576309204, "learning_rate": 3.236946760744067e-06, "loss": 0.8415, "step": 31590 }, { "epoch": 0.3850560003899918, "grad_norm": 1.886084794998169, "learning_rate": 3.23662604233483e-06, "loss": 0.8437, "step": 31595 }, { "epoch": 0.38511693661413965, "grad_norm": 2.029269218444824, "learning_rate": 3.2363053239255937e-06, "loss": 0.8723, "step": 31600 }, { "epoch": 0.38517787283828747, "grad_norm": 1.825653314590454, "learning_rate": 3.2359846055163568e-06, "loss": 0.8534, "step": 31605 }, { "epoch": 0.38523880906243524, "grad_norm": 1.7954161167144775, "learning_rate": 3.2356638871071198e-06, "loss": 0.9126, "step": 31610 }, { "epoch": 0.38529974528658306, "grad_norm": 2.1322975158691406, "learning_rate": 3.2353431686978836e-06, "loss": 0.8534, "step": 31615 }, { "epoch": 0.3853606815107309, "grad_norm": 1.9115275144577026, "learning_rate": 3.2350224502886467e-06, "loss": 0.8122, "step": 31620 }, { "epoch": 0.38542161773487865, "grad_norm": 2.1960248947143555, "learning_rate": 3.23470173187941e-06, "loss": 0.8867, "step": 31625 }, { "epoch": 0.3854825539590265, "grad_norm": 2.0590624809265137, "learning_rate": 3.2343810134701735e-06, "loss": 0.8593, "step": 31630 }, { "epoch": 0.3855434901831743, "grad_norm": 1.7397865056991577, "learning_rate": 3.234060295060937e-06, "loss": 0.8268, "step": 31635 }, { "epoch": 0.3856044264073221, "grad_norm": 1.7563318014144897, "learning_rate": 3.2337395766517e-06, "loss": 0.9166, "step": 31640 }, { "epoch": 0.3856653626314699, "grad_norm": 1.9568630456924438, "learning_rate": 3.2334188582424634e-06, "loss": 0.8838, "step": 31645 }, { "epoch": 0.3857262988556177, "grad_norm": 1.808747410774231, "learning_rate": 3.233098139833227e-06, "loss": 0.9097, "step": 31650 }, { "epoch": 0.38578723507976553, "grad_norm": 1.805707335472107, "learning_rate": 3.23277742142399e-06, "loss": 0.8161, "step": 31655 }, { "epoch": 0.3858481713039133, "grad_norm": 1.790395975112915, "learning_rate": 3.2324567030147538e-06, "loss": 0.8146, "step": 31660 }, { "epoch": 0.3859091075280611, "grad_norm": 1.8943455219268799, "learning_rate": 3.2321359846055168e-06, "loss": 0.9242, "step": 31665 }, { "epoch": 0.38597004375220895, "grad_norm": 2.682119607925415, "learning_rate": 3.23181526619628e-06, "loss": 0.8361, "step": 31670 }, { "epoch": 0.38603097997635677, "grad_norm": 2.266873836517334, "learning_rate": 3.231494547787043e-06, "loss": 0.8422, "step": 31675 }, { "epoch": 0.38609191620050454, "grad_norm": 1.902685284614563, "learning_rate": 3.2311738293778067e-06, "loss": 0.7302, "step": 31680 }, { "epoch": 0.38615285242465236, "grad_norm": 1.8535075187683105, "learning_rate": 3.2308531109685697e-06, "loss": 0.8159, "step": 31685 }, { "epoch": 0.3862137886488002, "grad_norm": 1.8008486032485962, "learning_rate": 3.230532392559333e-06, "loss": 0.8398, "step": 31690 }, { "epoch": 0.38627472487294795, "grad_norm": 2.0594563484191895, "learning_rate": 3.2302116741500966e-06, "loss": 0.8084, "step": 31695 }, { "epoch": 0.3863356610970958, "grad_norm": 1.8668376207351685, "learning_rate": 3.2298909557408596e-06, "loss": 0.8459, "step": 31700 }, { "epoch": 0.3863965973212436, "grad_norm": 2.029020309448242, "learning_rate": 3.229570237331623e-06, "loss": 0.9066, "step": 31705 }, { "epoch": 0.3864575335453914, "grad_norm": 2.2384250164031982, "learning_rate": 3.2292495189223865e-06, "loss": 0.8775, "step": 31710 }, { "epoch": 0.3865184697695392, "grad_norm": 2.0608110427856445, "learning_rate": 3.22892880051315e-06, "loss": 0.7833, "step": 31715 }, { "epoch": 0.386579405993687, "grad_norm": 2.0372369289398193, "learning_rate": 3.228608082103913e-06, "loss": 0.8314, "step": 31720 }, { "epoch": 0.38664034221783483, "grad_norm": 1.8272802829742432, "learning_rate": 3.2282873636946764e-06, "loss": 0.812, "step": 31725 }, { "epoch": 0.3867012784419826, "grad_norm": 2.098144054412842, "learning_rate": 3.22796664528544e-06, "loss": 0.8352, "step": 31730 }, { "epoch": 0.3867622146661304, "grad_norm": 2.10166335105896, "learning_rate": 3.227645926876203e-06, "loss": 0.9068, "step": 31735 }, { "epoch": 0.38682315089027824, "grad_norm": 1.749991536140442, "learning_rate": 3.2273252084669667e-06, "loss": 0.8391, "step": 31740 }, { "epoch": 0.38688408711442607, "grad_norm": 1.7882730960845947, "learning_rate": 3.2270044900577297e-06, "loss": 0.8872, "step": 31745 }, { "epoch": 0.38694502333857383, "grad_norm": 1.7621312141418457, "learning_rate": 3.2266837716484927e-06, "loss": 0.8245, "step": 31750 }, { "epoch": 0.38700595956272166, "grad_norm": 2.2255353927612305, "learning_rate": 3.2263630532392566e-06, "loss": 0.9281, "step": 31755 }, { "epoch": 0.3870668957868695, "grad_norm": 2.0811681747436523, "learning_rate": 3.2260423348300196e-06, "loss": 0.8261, "step": 31760 }, { "epoch": 0.38712783201101725, "grad_norm": 1.9143505096435547, "learning_rate": 3.2257216164207826e-06, "loss": 0.8151, "step": 31765 }, { "epoch": 0.38718876823516507, "grad_norm": 2.3000569343566895, "learning_rate": 3.225400898011546e-06, "loss": 0.8544, "step": 31770 }, { "epoch": 0.3872497044593129, "grad_norm": 1.7853081226348877, "learning_rate": 3.2250801796023095e-06, "loss": 0.7619, "step": 31775 }, { "epoch": 0.3873106406834607, "grad_norm": 2.1827664375305176, "learning_rate": 3.2247594611930725e-06, "loss": 0.8819, "step": 31780 }, { "epoch": 0.3873715769076085, "grad_norm": 1.9413464069366455, "learning_rate": 3.224438742783836e-06, "loss": 0.9295, "step": 31785 }, { "epoch": 0.3874325131317563, "grad_norm": 1.9726674556732178, "learning_rate": 3.2241180243745994e-06, "loss": 0.8855, "step": 31790 }, { "epoch": 0.38749344935590413, "grad_norm": 1.8950741291046143, "learning_rate": 3.223797305965363e-06, "loss": 0.8318, "step": 31795 }, { "epoch": 0.3875543855800519, "grad_norm": 1.9921351671218872, "learning_rate": 3.223476587556126e-06, "loss": 0.843, "step": 31800 }, { "epoch": 0.3876153218041997, "grad_norm": 1.7803853750228882, "learning_rate": 3.2231558691468893e-06, "loss": 0.8421, "step": 31805 }, { "epoch": 0.38767625802834754, "grad_norm": 2.072592258453369, "learning_rate": 3.2228351507376527e-06, "loss": 0.8083, "step": 31810 }, { "epoch": 0.38773719425249537, "grad_norm": 2.6319196224212646, "learning_rate": 3.2225144323284157e-06, "loss": 0.8928, "step": 31815 }, { "epoch": 0.38779813047664313, "grad_norm": 1.9209271669387817, "learning_rate": 3.2221937139191796e-06, "loss": 0.8174, "step": 31820 }, { "epoch": 0.38785906670079096, "grad_norm": 2.0842902660369873, "learning_rate": 3.2218729955099426e-06, "loss": 0.9782, "step": 31825 }, { "epoch": 0.3879200029249388, "grad_norm": 1.6808497905731201, "learning_rate": 3.2215522771007056e-06, "loss": 0.8378, "step": 31830 }, { "epoch": 0.38798093914908655, "grad_norm": 2.3982930183410645, "learning_rate": 3.2212315586914695e-06, "loss": 0.8496, "step": 31835 }, { "epoch": 0.38804187537323437, "grad_norm": 1.9727308750152588, "learning_rate": 3.2209108402822325e-06, "loss": 0.8787, "step": 31840 }, { "epoch": 0.3881028115973822, "grad_norm": 1.8688790798187256, "learning_rate": 3.2205901218729955e-06, "loss": 0.8713, "step": 31845 }, { "epoch": 0.38816374782153, "grad_norm": 1.9207221269607544, "learning_rate": 3.220269403463759e-06, "loss": 0.842, "step": 31850 }, { "epoch": 0.3882246840456778, "grad_norm": 2.373892307281494, "learning_rate": 3.2199486850545224e-06, "loss": 0.8747, "step": 31855 }, { "epoch": 0.3882856202698256, "grad_norm": 1.9380669593811035, "learning_rate": 3.2196279666452854e-06, "loss": 0.9311, "step": 31860 }, { "epoch": 0.3883465564939734, "grad_norm": 1.854662537574768, "learning_rate": 3.219307248236049e-06, "loss": 0.936, "step": 31865 }, { "epoch": 0.3884074927181212, "grad_norm": 2.184174060821533, "learning_rate": 3.2189865298268123e-06, "loss": 0.8282, "step": 31870 }, { "epoch": 0.388468428942269, "grad_norm": 2.0781490802764893, "learning_rate": 3.2186658114175758e-06, "loss": 0.8785, "step": 31875 }, { "epoch": 0.38852936516641684, "grad_norm": 2.0133752822875977, "learning_rate": 3.2183450930083388e-06, "loss": 0.9245, "step": 31880 }, { "epoch": 0.3885903013905646, "grad_norm": 1.8424360752105713, "learning_rate": 3.2180243745991026e-06, "loss": 0.8956, "step": 31885 }, { "epoch": 0.38865123761471243, "grad_norm": 1.939815640449524, "learning_rate": 3.2177036561898657e-06, "loss": 0.9523, "step": 31890 }, { "epoch": 0.38871217383886025, "grad_norm": 2.3505537509918213, "learning_rate": 3.2173829377806287e-06, "loss": 0.8362, "step": 31895 }, { "epoch": 0.3887731100630081, "grad_norm": 2.4655368328094482, "learning_rate": 3.2170622193713925e-06, "loss": 0.8863, "step": 31900 }, { "epoch": 0.38883404628715584, "grad_norm": 2.2778067588806152, "learning_rate": 3.2167415009621556e-06, "loss": 0.8597, "step": 31905 }, { "epoch": 0.38889498251130367, "grad_norm": 2.6660263538360596, "learning_rate": 3.2164207825529186e-06, "loss": 0.8815, "step": 31910 }, { "epoch": 0.3889559187354515, "grad_norm": 1.9157814979553223, "learning_rate": 3.2161000641436824e-06, "loss": 0.8343, "step": 31915 }, { "epoch": 0.38901685495959926, "grad_norm": 2.352177858352661, "learning_rate": 3.2157793457344455e-06, "loss": 0.8241, "step": 31920 }, { "epoch": 0.3890777911837471, "grad_norm": 2.5042364597320557, "learning_rate": 3.2154586273252085e-06, "loss": 0.9229, "step": 31925 }, { "epoch": 0.3891387274078949, "grad_norm": 1.9752544164657593, "learning_rate": 3.215137908915972e-06, "loss": 0.8349, "step": 31930 }, { "epoch": 0.3891996636320427, "grad_norm": 1.7131301164627075, "learning_rate": 3.2148171905067354e-06, "loss": 0.8742, "step": 31935 }, { "epoch": 0.3892605998561905, "grad_norm": 1.5119072198867798, "learning_rate": 3.214496472097499e-06, "loss": 0.8329, "step": 31940 }, { "epoch": 0.3893215360803383, "grad_norm": 2.2591588497161865, "learning_rate": 3.214175753688262e-06, "loss": 0.8974, "step": 31945 }, { "epoch": 0.38938247230448614, "grad_norm": 1.7330732345581055, "learning_rate": 3.2138550352790252e-06, "loss": 0.861, "step": 31950 }, { "epoch": 0.3894434085286339, "grad_norm": 2.3218865394592285, "learning_rate": 3.2135343168697887e-06, "loss": 0.8167, "step": 31955 }, { "epoch": 0.38950434475278173, "grad_norm": 2.052478313446045, "learning_rate": 3.2132135984605517e-06, "loss": 0.8349, "step": 31960 }, { "epoch": 0.38956528097692955, "grad_norm": 1.9838194847106934, "learning_rate": 3.2128928800513156e-06, "loss": 0.8454, "step": 31965 }, { "epoch": 0.3896262172010774, "grad_norm": 1.8437052965164185, "learning_rate": 3.2125721616420786e-06, "loss": 0.7759, "step": 31970 }, { "epoch": 0.38968715342522514, "grad_norm": 1.7500866651535034, "learning_rate": 3.2122514432328416e-06, "loss": 0.8239, "step": 31975 }, { "epoch": 0.38974808964937296, "grad_norm": 2.039539337158203, "learning_rate": 3.2119307248236055e-06, "loss": 0.9078, "step": 31980 }, { "epoch": 0.3898090258735208, "grad_norm": 2.096534252166748, "learning_rate": 3.2116100064143685e-06, "loss": 0.87, "step": 31985 }, { "epoch": 0.38986996209766855, "grad_norm": 1.9892103672027588, "learning_rate": 3.2112892880051315e-06, "loss": 0.8596, "step": 31990 }, { "epoch": 0.3899308983218164, "grad_norm": 1.9509353637695312, "learning_rate": 3.2109685695958954e-06, "loss": 0.8956, "step": 31995 }, { "epoch": 0.3899918345459642, "grad_norm": 2.0118353366851807, "learning_rate": 3.2106478511866584e-06, "loss": 0.8349, "step": 32000 }, { "epoch": 0.390052770770112, "grad_norm": 1.5744930505752563, "learning_rate": 3.2103271327774214e-06, "loss": 0.8918, "step": 32005 }, { "epoch": 0.3901137069942598, "grad_norm": 1.7923320531845093, "learning_rate": 3.210006414368185e-06, "loss": 0.8211, "step": 32010 }, { "epoch": 0.3901746432184076, "grad_norm": 2.317505121231079, "learning_rate": 3.2096856959589483e-06, "loss": 0.8326, "step": 32015 }, { "epoch": 0.39023557944255544, "grad_norm": 2.1995694637298584, "learning_rate": 3.2093649775497117e-06, "loss": 0.8789, "step": 32020 }, { "epoch": 0.3902965156667032, "grad_norm": 1.6953840255737305, "learning_rate": 3.2090442591404747e-06, "loss": 0.8346, "step": 32025 }, { "epoch": 0.390357451890851, "grad_norm": 1.966858148574829, "learning_rate": 3.208723540731238e-06, "loss": 0.7711, "step": 32030 }, { "epoch": 0.39041838811499885, "grad_norm": 1.9265824556350708, "learning_rate": 3.2084028223220016e-06, "loss": 0.8219, "step": 32035 }, { "epoch": 0.39047932433914667, "grad_norm": 1.9605013132095337, "learning_rate": 3.2080821039127646e-06, "loss": 0.9073, "step": 32040 }, { "epoch": 0.39054026056329444, "grad_norm": 2.088195562362671, "learning_rate": 3.2077613855035285e-06, "loss": 0.8921, "step": 32045 }, { "epoch": 0.39060119678744226, "grad_norm": 1.8908731937408447, "learning_rate": 3.2074406670942915e-06, "loss": 0.8504, "step": 32050 }, { "epoch": 0.3906621330115901, "grad_norm": 2.1285057067871094, "learning_rate": 3.2071199486850545e-06, "loss": 0.866, "step": 32055 }, { "epoch": 0.39072306923573785, "grad_norm": 2.011380195617676, "learning_rate": 3.2067992302758184e-06, "loss": 0.9271, "step": 32060 }, { "epoch": 0.3907840054598857, "grad_norm": 1.9818270206451416, "learning_rate": 3.2064785118665814e-06, "loss": 0.8478, "step": 32065 }, { "epoch": 0.3908449416840335, "grad_norm": 2.048769474029541, "learning_rate": 3.2061577934573444e-06, "loss": 0.78, "step": 32070 }, { "epoch": 0.3909058779081813, "grad_norm": 1.736537218093872, "learning_rate": 3.2058370750481083e-06, "loss": 0.8691, "step": 32075 }, { "epoch": 0.3909668141323291, "grad_norm": 2.1014208793640137, "learning_rate": 3.2055163566388713e-06, "loss": 0.8796, "step": 32080 }, { "epoch": 0.3910277503564769, "grad_norm": 2.3373758792877197, "learning_rate": 3.2051956382296343e-06, "loss": 0.8669, "step": 32085 }, { "epoch": 0.39108868658062473, "grad_norm": 1.939489722251892, "learning_rate": 3.204874919820398e-06, "loss": 0.8434, "step": 32090 }, { "epoch": 0.3911496228047725, "grad_norm": 1.8727643489837646, "learning_rate": 3.204554201411161e-06, "loss": 0.821, "step": 32095 }, { "epoch": 0.3912105590289203, "grad_norm": 1.665654182434082, "learning_rate": 3.2042334830019246e-06, "loss": 0.7844, "step": 32100 }, { "epoch": 0.39127149525306815, "grad_norm": 1.810895323753357, "learning_rate": 3.2039127645926877e-06, "loss": 0.7653, "step": 32105 }, { "epoch": 0.39133243147721597, "grad_norm": 1.8498530387878418, "learning_rate": 3.2035920461834515e-06, "loss": 0.8164, "step": 32110 }, { "epoch": 0.39139336770136374, "grad_norm": 1.969382643699646, "learning_rate": 3.2032713277742145e-06, "loss": 0.8676, "step": 32115 }, { "epoch": 0.39145430392551156, "grad_norm": 1.9923969507217407, "learning_rate": 3.2029506093649776e-06, "loss": 0.906, "step": 32120 }, { "epoch": 0.3915152401496594, "grad_norm": 2.0595219135284424, "learning_rate": 3.2026298909557414e-06, "loss": 0.932, "step": 32125 }, { "epoch": 0.39157617637380715, "grad_norm": 2.3675665855407715, "learning_rate": 3.2023091725465044e-06, "loss": 0.8639, "step": 32130 }, { "epoch": 0.391637112597955, "grad_norm": 2.131078004837036, "learning_rate": 3.2019884541372675e-06, "loss": 0.8861, "step": 32135 }, { "epoch": 0.3916980488221028, "grad_norm": 1.869614601135254, "learning_rate": 3.2016677357280313e-06, "loss": 0.857, "step": 32140 }, { "epoch": 0.3917589850462506, "grad_norm": 1.825409173965454, "learning_rate": 3.2013470173187943e-06, "loss": 0.8102, "step": 32145 }, { "epoch": 0.3918199212703984, "grad_norm": 2.0326554775238037, "learning_rate": 3.2010262989095574e-06, "loss": 0.7734, "step": 32150 }, { "epoch": 0.3918808574945462, "grad_norm": 2.224843740463257, "learning_rate": 3.2007055805003212e-06, "loss": 0.8749, "step": 32155 }, { "epoch": 0.39194179371869403, "grad_norm": 2.042311429977417, "learning_rate": 3.2003848620910842e-06, "loss": 0.8862, "step": 32160 }, { "epoch": 0.3920027299428418, "grad_norm": 2.160123586654663, "learning_rate": 3.2000641436818477e-06, "loss": 0.8506, "step": 32165 }, { "epoch": 0.3920636661669896, "grad_norm": 2.0146231651306152, "learning_rate": 3.199743425272611e-06, "loss": 0.8389, "step": 32170 }, { "epoch": 0.39212460239113744, "grad_norm": 1.9981402158737183, "learning_rate": 3.199422706863374e-06, "loss": 0.8961, "step": 32175 }, { "epoch": 0.39218553861528527, "grad_norm": 2.056016445159912, "learning_rate": 3.1991019884541376e-06, "loss": 0.8546, "step": 32180 }, { "epoch": 0.39224647483943303, "grad_norm": 2.1408703327178955, "learning_rate": 3.1987812700449006e-06, "loss": 0.9219, "step": 32185 }, { "epoch": 0.39230741106358086, "grad_norm": 1.8263957500457764, "learning_rate": 3.1984605516356645e-06, "loss": 0.8235, "step": 32190 }, { "epoch": 0.3923683472877287, "grad_norm": 1.950504183769226, "learning_rate": 3.1981398332264275e-06, "loss": 0.8157, "step": 32195 }, { "epoch": 0.39242928351187645, "grad_norm": 1.7805286645889282, "learning_rate": 3.1978191148171905e-06, "loss": 0.8813, "step": 32200 }, { "epoch": 0.39249021973602427, "grad_norm": 2.129570960998535, "learning_rate": 3.1974983964079544e-06, "loss": 0.8513, "step": 32205 }, { "epoch": 0.3925511559601721, "grad_norm": 1.868516206741333, "learning_rate": 3.1971776779987174e-06, "loss": 0.8123, "step": 32210 }, { "epoch": 0.3926120921843199, "grad_norm": 2.161050319671631, "learning_rate": 3.1968569595894804e-06, "loss": 0.8114, "step": 32215 }, { "epoch": 0.3926730284084677, "grad_norm": 2.448120355606079, "learning_rate": 3.1965362411802442e-06, "loss": 0.8413, "step": 32220 }, { "epoch": 0.3927339646326155, "grad_norm": 1.791322112083435, "learning_rate": 3.1962155227710073e-06, "loss": 0.8973, "step": 32225 }, { "epoch": 0.39279490085676333, "grad_norm": 2.1444783210754395, "learning_rate": 3.1958948043617703e-06, "loss": 0.8804, "step": 32230 }, { "epoch": 0.3928558370809111, "grad_norm": 2.113521099090576, "learning_rate": 3.195574085952534e-06, "loss": 0.7879, "step": 32235 }, { "epoch": 0.3929167733050589, "grad_norm": 2.089458465576172, "learning_rate": 3.195253367543297e-06, "loss": 0.7625, "step": 32240 }, { "epoch": 0.39297770952920674, "grad_norm": 1.6474310159683228, "learning_rate": 3.1949326491340606e-06, "loss": 0.8638, "step": 32245 }, { "epoch": 0.39303864575335457, "grad_norm": 1.6827038526535034, "learning_rate": 3.194611930724824e-06, "loss": 0.8193, "step": 32250 }, { "epoch": 0.39309958197750233, "grad_norm": 1.9384719133377075, "learning_rate": 3.194291212315587e-06, "loss": 0.8616, "step": 32255 }, { "epoch": 0.39316051820165016, "grad_norm": 1.8826756477355957, "learning_rate": 3.1939704939063505e-06, "loss": 0.8365, "step": 32260 }, { "epoch": 0.393221454425798, "grad_norm": 1.8124445676803589, "learning_rate": 3.1936497754971135e-06, "loss": 0.8349, "step": 32265 }, { "epoch": 0.39328239064994575, "grad_norm": 1.7247635126113892, "learning_rate": 3.1933290570878774e-06, "loss": 0.7873, "step": 32270 }, { "epoch": 0.39334332687409357, "grad_norm": 1.9794895648956299, "learning_rate": 3.1930083386786404e-06, "loss": 0.811, "step": 32275 }, { "epoch": 0.3934042630982414, "grad_norm": 1.7801593542099, "learning_rate": 3.1926876202694034e-06, "loss": 0.8644, "step": 32280 }, { "epoch": 0.3934651993223892, "grad_norm": 1.7648767232894897, "learning_rate": 3.1923669018601673e-06, "loss": 0.8952, "step": 32285 }, { "epoch": 0.393526135546537, "grad_norm": 1.953084945678711, "learning_rate": 3.1920461834509303e-06, "loss": 0.8, "step": 32290 }, { "epoch": 0.3935870717706848, "grad_norm": 2.1349093914031982, "learning_rate": 3.1917254650416933e-06, "loss": 0.8789, "step": 32295 }, { "epoch": 0.3936480079948326, "grad_norm": 1.8939138650894165, "learning_rate": 3.191404746632457e-06, "loss": 0.8997, "step": 32300 }, { "epoch": 0.3937089442189804, "grad_norm": 2.378448724746704, "learning_rate": 3.19108402822322e-06, "loss": 0.8797, "step": 32305 }, { "epoch": 0.3937698804431282, "grad_norm": 1.9781445264816284, "learning_rate": 3.190763309813983e-06, "loss": 0.8432, "step": 32310 }, { "epoch": 0.39383081666727604, "grad_norm": 2.0392677783966064, "learning_rate": 3.190442591404747e-06, "loss": 0.8434, "step": 32315 }, { "epoch": 0.39389175289142386, "grad_norm": 2.058037281036377, "learning_rate": 3.19012187299551e-06, "loss": 0.9281, "step": 32320 }, { "epoch": 0.39395268911557163, "grad_norm": 1.994000792503357, "learning_rate": 3.1898011545862735e-06, "loss": 0.8504, "step": 32325 }, { "epoch": 0.39401362533971945, "grad_norm": 1.9739336967468262, "learning_rate": 3.189480436177037e-06, "loss": 0.9503, "step": 32330 }, { "epoch": 0.3940745615638673, "grad_norm": 1.7470033168792725, "learning_rate": 3.1891597177678004e-06, "loss": 0.8261, "step": 32335 }, { "epoch": 0.39413549778801504, "grad_norm": 1.985556721687317, "learning_rate": 3.1888389993585634e-06, "loss": 0.8722, "step": 32340 }, { "epoch": 0.39419643401216287, "grad_norm": 2.0038022994995117, "learning_rate": 3.188518280949327e-06, "loss": 0.8471, "step": 32345 }, { "epoch": 0.3942573702363107, "grad_norm": 2.1468939781188965, "learning_rate": 3.1881975625400903e-06, "loss": 0.9079, "step": 32350 }, { "epoch": 0.39431830646045846, "grad_norm": 2.1338326930999756, "learning_rate": 3.1878768441308533e-06, "loss": 0.8927, "step": 32355 }, { "epoch": 0.3943792426846063, "grad_norm": 1.888973593711853, "learning_rate": 3.1875561257216163e-06, "loss": 0.8075, "step": 32360 }, { "epoch": 0.3944401789087541, "grad_norm": 2.141968250274658, "learning_rate": 3.18723540731238e-06, "loss": 0.8896, "step": 32365 }, { "epoch": 0.3945011151329019, "grad_norm": 2.1350088119506836, "learning_rate": 3.1869146889031432e-06, "loss": 0.8525, "step": 32370 }, { "epoch": 0.3945620513570497, "grad_norm": 2.20330548286438, "learning_rate": 3.1865939704939062e-06, "loss": 0.8858, "step": 32375 }, { "epoch": 0.3946229875811975, "grad_norm": 2.08882474899292, "learning_rate": 3.18627325208467e-06, "loss": 0.9298, "step": 32380 }, { "epoch": 0.39468392380534534, "grad_norm": 2.3782737255096436, "learning_rate": 3.185952533675433e-06, "loss": 0.9089, "step": 32385 }, { "epoch": 0.3947448600294931, "grad_norm": 1.8456836938858032, "learning_rate": 3.1856318152661966e-06, "loss": 0.8615, "step": 32390 }, { "epoch": 0.39480579625364093, "grad_norm": 2.0095744132995605, "learning_rate": 3.18531109685696e-06, "loss": 0.8113, "step": 32395 }, { "epoch": 0.39486673247778875, "grad_norm": 2.320944309234619, "learning_rate": 3.184990378447723e-06, "loss": 0.938, "step": 32400 }, { "epoch": 0.3949276687019366, "grad_norm": 2.0034899711608887, "learning_rate": 3.1846696600384865e-06, "loss": 0.811, "step": 32405 }, { "epoch": 0.39498860492608434, "grad_norm": 2.312385082244873, "learning_rate": 3.18434894162925e-06, "loss": 0.8708, "step": 32410 }, { "epoch": 0.39504954115023216, "grad_norm": 1.888227105140686, "learning_rate": 3.1840282232200133e-06, "loss": 0.8676, "step": 32415 }, { "epoch": 0.39511047737438, "grad_norm": 2.2566347122192383, "learning_rate": 3.1837075048107764e-06, "loss": 0.8621, "step": 32420 }, { "epoch": 0.39517141359852775, "grad_norm": 1.9393891096115112, "learning_rate": 3.18338678640154e-06, "loss": 0.8394, "step": 32425 }, { "epoch": 0.3952323498226756, "grad_norm": 2.119058609008789, "learning_rate": 3.1830660679923032e-06, "loss": 0.9452, "step": 32430 }, { "epoch": 0.3952932860468234, "grad_norm": 2.0390145778656006, "learning_rate": 3.1827453495830663e-06, "loss": 0.8674, "step": 32435 }, { "epoch": 0.3953542222709712, "grad_norm": 1.867639422416687, "learning_rate": 3.1824246311738293e-06, "loss": 0.8252, "step": 32440 }, { "epoch": 0.395415158495119, "grad_norm": 1.7651162147521973, "learning_rate": 3.182103912764593e-06, "loss": 0.7966, "step": 32445 }, { "epoch": 0.3954760947192668, "grad_norm": 2.0219614505767822, "learning_rate": 3.181783194355356e-06, "loss": 0.8049, "step": 32450 }, { "epoch": 0.39553703094341464, "grad_norm": 1.9365721940994263, "learning_rate": 3.181462475946119e-06, "loss": 0.8065, "step": 32455 }, { "epoch": 0.3955979671675624, "grad_norm": 2.0214011669158936, "learning_rate": 3.181141757536883e-06, "loss": 0.8759, "step": 32460 }, { "epoch": 0.3956589033917102, "grad_norm": 1.7795112133026123, "learning_rate": 3.180821039127646e-06, "loss": 0.8705, "step": 32465 }, { "epoch": 0.39571983961585805, "grad_norm": 1.9153176546096802, "learning_rate": 3.1805003207184095e-06, "loss": 0.858, "step": 32470 }, { "epoch": 0.39578077584000587, "grad_norm": 1.8997116088867188, "learning_rate": 3.180179602309173e-06, "loss": 0.8437, "step": 32475 }, { "epoch": 0.39584171206415364, "grad_norm": 1.8551669120788574, "learning_rate": 3.179858883899936e-06, "loss": 0.8094, "step": 32480 }, { "epoch": 0.39590264828830146, "grad_norm": 1.6998339891433716, "learning_rate": 3.1795381654906994e-06, "loss": 0.8583, "step": 32485 }, { "epoch": 0.3959635845124493, "grad_norm": 1.9008331298828125, "learning_rate": 3.179217447081463e-06, "loss": 0.8102, "step": 32490 }, { "epoch": 0.39602452073659705, "grad_norm": 2.168290853500366, "learning_rate": 3.1788967286722263e-06, "loss": 0.8437, "step": 32495 }, { "epoch": 0.3960854569607449, "grad_norm": 2.003696918487549, "learning_rate": 3.1785760102629893e-06, "loss": 0.8134, "step": 32500 }, { "epoch": 0.3961463931848927, "grad_norm": 2.011415958404541, "learning_rate": 3.1782552918537527e-06, "loss": 0.8432, "step": 32505 }, { "epoch": 0.3962073294090405, "grad_norm": 2.1347970962524414, "learning_rate": 3.177934573444516e-06, "loss": 0.8106, "step": 32510 }, { "epoch": 0.3962682656331883, "grad_norm": 2.1035075187683105, "learning_rate": 3.177613855035279e-06, "loss": 0.8815, "step": 32515 }, { "epoch": 0.3963292018573361, "grad_norm": 1.7038826942443848, "learning_rate": 3.177293136626042e-06, "loss": 0.845, "step": 32520 }, { "epoch": 0.39639013808148393, "grad_norm": 1.958724021911621, "learning_rate": 3.176972418216806e-06, "loss": 0.8861, "step": 32525 }, { "epoch": 0.3964510743056317, "grad_norm": 1.8279914855957031, "learning_rate": 3.176651699807569e-06, "loss": 0.8469, "step": 32530 }, { "epoch": 0.3965120105297795, "grad_norm": 1.6720856428146362, "learning_rate": 3.176330981398332e-06, "loss": 0.8622, "step": 32535 }, { "epoch": 0.39657294675392735, "grad_norm": 1.9459177255630493, "learning_rate": 3.176010262989096e-06, "loss": 0.7951, "step": 32540 }, { "epoch": 0.39663388297807517, "grad_norm": 1.6403136253356934, "learning_rate": 3.175689544579859e-06, "loss": 0.8522, "step": 32545 }, { "epoch": 0.39669481920222294, "grad_norm": 1.8894175291061401, "learning_rate": 3.1753688261706224e-06, "loss": 0.883, "step": 32550 }, { "epoch": 0.39675575542637076, "grad_norm": 1.8366676568984985, "learning_rate": 3.175048107761386e-06, "loss": 0.8514, "step": 32555 }, { "epoch": 0.3968166916505186, "grad_norm": 1.9413702487945557, "learning_rate": 3.174727389352149e-06, "loss": 0.7811, "step": 32560 }, { "epoch": 0.39687762787466635, "grad_norm": 1.8007327318191528, "learning_rate": 3.1744066709429123e-06, "loss": 0.7963, "step": 32565 }, { "epoch": 0.3969385640988142, "grad_norm": 2.017496109008789, "learning_rate": 3.1740859525336758e-06, "loss": 0.9144, "step": 32570 }, { "epoch": 0.396999500322962, "grad_norm": 2.2876482009887695, "learning_rate": 3.173765234124439e-06, "loss": 0.7477, "step": 32575 }, { "epoch": 0.3970604365471098, "grad_norm": 1.8683167695999146, "learning_rate": 3.173444515715202e-06, "loss": 0.9577, "step": 32580 }, { "epoch": 0.3971213727712576, "grad_norm": 1.9014885425567627, "learning_rate": 3.173123797305966e-06, "loss": 0.8907, "step": 32585 }, { "epoch": 0.3971823089954054, "grad_norm": 1.6362870931625366, "learning_rate": 3.172803078896729e-06, "loss": 0.8932, "step": 32590 }, { "epoch": 0.39724324521955323, "grad_norm": 1.8690102100372314, "learning_rate": 3.172482360487492e-06, "loss": 0.8732, "step": 32595 }, { "epoch": 0.397304181443701, "grad_norm": 2.1868762969970703, "learning_rate": 3.172161642078255e-06, "loss": 0.9201, "step": 32600 }, { "epoch": 0.3973651176678488, "grad_norm": 1.8069145679473877, "learning_rate": 3.171840923669019e-06, "loss": 0.8202, "step": 32605 }, { "epoch": 0.39742605389199664, "grad_norm": 2.170654058456421, "learning_rate": 3.171520205259782e-06, "loss": 0.8658, "step": 32610 }, { "epoch": 0.39748699011614447, "grad_norm": 2.5492663383483887, "learning_rate": 3.1711994868505454e-06, "loss": 0.8404, "step": 32615 }, { "epoch": 0.39754792634029223, "grad_norm": 1.8463809490203857, "learning_rate": 3.170878768441309e-06, "loss": 0.8496, "step": 32620 }, { "epoch": 0.39760886256444006, "grad_norm": 2.78855299949646, "learning_rate": 3.170558050032072e-06, "loss": 0.7997, "step": 32625 }, { "epoch": 0.3976697987885879, "grad_norm": 2.193514108657837, "learning_rate": 3.1702373316228353e-06, "loss": 0.8023, "step": 32630 }, { "epoch": 0.39773073501273565, "grad_norm": 1.981448769569397, "learning_rate": 3.1699166132135988e-06, "loss": 0.8986, "step": 32635 }, { "epoch": 0.39779167123688347, "grad_norm": 2.3749232292175293, "learning_rate": 3.1695958948043622e-06, "loss": 0.9289, "step": 32640 }, { "epoch": 0.3978526074610313, "grad_norm": 2.017172336578369, "learning_rate": 3.1692751763951252e-06, "loss": 0.7913, "step": 32645 }, { "epoch": 0.3979135436851791, "grad_norm": 1.75872802734375, "learning_rate": 3.1689544579858887e-06, "loss": 0.8179, "step": 32650 }, { "epoch": 0.3979744799093269, "grad_norm": 1.7038509845733643, "learning_rate": 3.168633739576652e-06, "loss": 0.7911, "step": 32655 }, { "epoch": 0.3980354161334747, "grad_norm": 1.664465308189392, "learning_rate": 3.168313021167415e-06, "loss": 0.813, "step": 32660 }, { "epoch": 0.39809635235762253, "grad_norm": 1.9257099628448486, "learning_rate": 3.167992302758179e-06, "loss": 0.8663, "step": 32665 }, { "epoch": 0.3981572885817703, "grad_norm": 1.883957862854004, "learning_rate": 3.167671584348942e-06, "loss": 0.9133, "step": 32670 }, { "epoch": 0.3982182248059181, "grad_norm": 2.3009815216064453, "learning_rate": 3.167350865939705e-06, "loss": 0.9178, "step": 32675 }, { "epoch": 0.39827916103006594, "grad_norm": 1.8565102815628052, "learning_rate": 3.167030147530469e-06, "loss": 0.8863, "step": 32680 }, { "epoch": 0.39834009725421377, "grad_norm": 2.2140185832977295, "learning_rate": 3.166709429121232e-06, "loss": 0.9147, "step": 32685 }, { "epoch": 0.39840103347836153, "grad_norm": 2.2162351608276367, "learning_rate": 3.166388710711995e-06, "loss": 0.8651, "step": 32690 }, { "epoch": 0.39846196970250936, "grad_norm": 1.967036485671997, "learning_rate": 3.1660679923027584e-06, "loss": 0.8267, "step": 32695 }, { "epoch": 0.3985229059266572, "grad_norm": 2.163370370864868, "learning_rate": 3.165747273893522e-06, "loss": 0.8832, "step": 32700 }, { "epoch": 0.39858384215080495, "grad_norm": 2.355865240097046, "learning_rate": 3.165426555484285e-06, "loss": 0.8327, "step": 32705 }, { "epoch": 0.39864477837495277, "grad_norm": 2.0629329681396484, "learning_rate": 3.1651058370750483e-06, "loss": 0.8784, "step": 32710 }, { "epoch": 0.3987057145991006, "grad_norm": 2.047525644302368, "learning_rate": 3.1647851186658117e-06, "loss": 0.9046, "step": 32715 }, { "epoch": 0.3987666508232484, "grad_norm": 1.7030706405639648, "learning_rate": 3.164464400256575e-06, "loss": 0.8829, "step": 32720 }, { "epoch": 0.3988275870473962, "grad_norm": 1.8888577222824097, "learning_rate": 3.164143681847338e-06, "loss": 0.8799, "step": 32725 }, { "epoch": 0.398888523271544, "grad_norm": 1.9671010971069336, "learning_rate": 3.1638229634381016e-06, "loss": 0.8918, "step": 32730 }, { "epoch": 0.3989494594956918, "grad_norm": 2.56339430809021, "learning_rate": 3.163502245028865e-06, "loss": 0.8798, "step": 32735 }, { "epoch": 0.3990103957198396, "grad_norm": 1.887269139289856, "learning_rate": 3.163181526619628e-06, "loss": 0.8454, "step": 32740 }, { "epoch": 0.3990713319439874, "grad_norm": 1.8605337142944336, "learning_rate": 3.162860808210392e-06, "loss": 0.8387, "step": 32745 }, { "epoch": 0.39913226816813524, "grad_norm": 2.1519033908843994, "learning_rate": 3.162540089801155e-06, "loss": 0.928, "step": 32750 }, { "epoch": 0.39919320439228306, "grad_norm": 1.9856458902359009, "learning_rate": 3.162219371391918e-06, "loss": 0.8308, "step": 32755 }, { "epoch": 0.39925414061643083, "grad_norm": 1.926356554031372, "learning_rate": 3.161898652982682e-06, "loss": 0.8994, "step": 32760 }, { "epoch": 0.39931507684057865, "grad_norm": 2.105818510055542, "learning_rate": 3.161577934573445e-06, "loss": 0.8755, "step": 32765 }, { "epoch": 0.3993760130647265, "grad_norm": 2.180320978164673, "learning_rate": 3.161257216164208e-06, "loss": 0.8748, "step": 32770 }, { "epoch": 0.39943694928887424, "grad_norm": 2.1327197551727295, "learning_rate": 3.1609364977549713e-06, "loss": 0.8911, "step": 32775 }, { "epoch": 0.39949788551302207, "grad_norm": 2.348499298095703, "learning_rate": 3.1606157793457347e-06, "loss": 0.8684, "step": 32780 }, { "epoch": 0.3995588217371699, "grad_norm": 1.8355023860931396, "learning_rate": 3.1602950609364978e-06, "loss": 0.8318, "step": 32785 }, { "epoch": 0.3996197579613177, "grad_norm": 2.2165744304656982, "learning_rate": 3.159974342527261e-06, "loss": 0.7918, "step": 32790 }, { "epoch": 0.3996806941854655, "grad_norm": 1.9135653972625732, "learning_rate": 3.1596536241180246e-06, "loss": 0.8151, "step": 32795 }, { "epoch": 0.3997416304096133, "grad_norm": 1.8182843923568726, "learning_rate": 3.159332905708788e-06, "loss": 0.8842, "step": 32800 }, { "epoch": 0.3998025666337611, "grad_norm": 1.8206523656845093, "learning_rate": 3.159012187299551e-06, "loss": 0.8027, "step": 32805 }, { "epoch": 0.3998635028579089, "grad_norm": 2.0197644233703613, "learning_rate": 3.158691468890315e-06, "loss": 0.8102, "step": 32810 }, { "epoch": 0.3999244390820567, "grad_norm": 2.0936572551727295, "learning_rate": 3.158370750481078e-06, "loss": 0.8582, "step": 32815 }, { "epoch": 0.39998537530620454, "grad_norm": 1.7979633808135986, "learning_rate": 3.158050032071841e-06, "loss": 0.8743, "step": 32820 }, { "epoch": 0.40004631153035236, "grad_norm": 2.1636412143707275, "learning_rate": 3.157729313662605e-06, "loss": 0.8417, "step": 32825 }, { "epoch": 0.40010724775450013, "grad_norm": 1.7995339632034302, "learning_rate": 3.157408595253368e-06, "loss": 0.8385, "step": 32830 }, { "epoch": 0.40016818397864795, "grad_norm": 2.173297882080078, "learning_rate": 3.157087876844131e-06, "loss": 0.8086, "step": 32835 }, { "epoch": 0.4002291202027958, "grad_norm": 1.9066712856292725, "learning_rate": 3.1567671584348948e-06, "loss": 0.8618, "step": 32840 }, { "epoch": 0.40029005642694354, "grad_norm": 1.7941769361495972, "learning_rate": 3.1564464400256578e-06, "loss": 0.8607, "step": 32845 }, { "epoch": 0.40035099265109136, "grad_norm": 2.4594638347625732, "learning_rate": 3.1561257216164208e-06, "loss": 0.8401, "step": 32850 }, { "epoch": 0.4004119288752392, "grad_norm": 1.9261910915374756, "learning_rate": 3.1558050032071842e-06, "loss": 0.8474, "step": 32855 }, { "epoch": 0.40047286509938695, "grad_norm": 2.1530508995056152, "learning_rate": 3.1554842847979477e-06, "loss": 0.8726, "step": 32860 }, { "epoch": 0.4005338013235348, "grad_norm": 2.0904886722564697, "learning_rate": 3.155163566388711e-06, "loss": 0.8629, "step": 32865 }, { "epoch": 0.4005947375476826, "grad_norm": 1.87078857421875, "learning_rate": 3.154842847979474e-06, "loss": 0.8188, "step": 32870 }, { "epoch": 0.4006556737718304, "grad_norm": 1.9496653079986572, "learning_rate": 3.1545221295702376e-06, "loss": 0.8565, "step": 32875 }, { "epoch": 0.4007166099959782, "grad_norm": 2.0029258728027344, "learning_rate": 3.154201411161001e-06, "loss": 0.9163, "step": 32880 }, { "epoch": 0.400777546220126, "grad_norm": 1.9867371320724487, "learning_rate": 3.153880692751764e-06, "loss": 0.9244, "step": 32885 }, { "epoch": 0.40083848244427384, "grad_norm": 1.7934203147888184, "learning_rate": 3.153559974342528e-06, "loss": 0.8202, "step": 32890 }, { "epoch": 0.4008994186684216, "grad_norm": 1.9606703519821167, "learning_rate": 3.153239255933291e-06, "loss": 0.8505, "step": 32895 }, { "epoch": 0.4009603548925694, "grad_norm": 2.2249839305877686, "learning_rate": 3.152918537524054e-06, "loss": 0.8264, "step": 32900 }, { "epoch": 0.40102129111671725, "grad_norm": 1.805864691734314, "learning_rate": 3.1525978191148178e-06, "loss": 0.8616, "step": 32905 }, { "epoch": 0.40108222734086507, "grad_norm": 2.0729877948760986, "learning_rate": 3.152277100705581e-06, "loss": 0.8367, "step": 32910 }, { "epoch": 0.40114316356501284, "grad_norm": 1.8819502592086792, "learning_rate": 3.151956382296344e-06, "loss": 0.903, "step": 32915 }, { "epoch": 0.40120409978916066, "grad_norm": 2.0708425045013428, "learning_rate": 3.1516356638871077e-06, "loss": 0.872, "step": 32920 }, { "epoch": 0.4012650360133085, "grad_norm": 1.8203651905059814, "learning_rate": 3.1513149454778707e-06, "loss": 0.8319, "step": 32925 }, { "epoch": 0.40132597223745625, "grad_norm": 2.3138222694396973, "learning_rate": 3.1509942270686337e-06, "loss": 0.8845, "step": 32930 }, { "epoch": 0.4013869084616041, "grad_norm": 2.1070752143859863, "learning_rate": 3.1506735086593976e-06, "loss": 0.8136, "step": 32935 }, { "epoch": 0.4014478446857519, "grad_norm": 2.41005802154541, "learning_rate": 3.1503527902501606e-06, "loss": 0.8839, "step": 32940 }, { "epoch": 0.4015087809098997, "grad_norm": 2.037858009338379, "learning_rate": 3.150032071840924e-06, "loss": 0.8529, "step": 32945 }, { "epoch": 0.4015697171340475, "grad_norm": 2.700402021408081, "learning_rate": 3.149711353431687e-06, "loss": 0.8927, "step": 32950 }, { "epoch": 0.4016306533581953, "grad_norm": 1.7431998252868652, "learning_rate": 3.1493906350224505e-06, "loss": 0.8387, "step": 32955 }, { "epoch": 0.40169158958234313, "grad_norm": 2.1591711044311523, "learning_rate": 3.149069916613214e-06, "loss": 0.8368, "step": 32960 }, { "epoch": 0.4017525258064909, "grad_norm": 1.8841596841812134, "learning_rate": 3.148749198203977e-06, "loss": 0.8232, "step": 32965 }, { "epoch": 0.4018134620306387, "grad_norm": 2.244626045227051, "learning_rate": 3.148428479794741e-06, "loss": 0.7643, "step": 32970 }, { "epoch": 0.40187439825478655, "grad_norm": 2.121823310852051, "learning_rate": 3.148107761385504e-06, "loss": 0.8401, "step": 32975 }, { "epoch": 0.40193533447893437, "grad_norm": 1.9116184711456299, "learning_rate": 3.147787042976267e-06, "loss": 0.8518, "step": 32980 }, { "epoch": 0.40199627070308214, "grad_norm": 2.0126891136169434, "learning_rate": 3.1474663245670307e-06, "loss": 0.7714, "step": 32985 }, { "epoch": 0.40205720692722996, "grad_norm": 2.045762777328491, "learning_rate": 3.1471456061577937e-06, "loss": 0.7889, "step": 32990 }, { "epoch": 0.4021181431513778, "grad_norm": 1.6821588277816772, "learning_rate": 3.1468248877485567e-06, "loss": 0.8103, "step": 32995 }, { "epoch": 0.40217907937552555, "grad_norm": 2.0845727920532227, "learning_rate": 3.1465041693393206e-06, "loss": 0.7716, "step": 33000 }, { "epoch": 0.4022400155996734, "grad_norm": 2.014310121536255, "learning_rate": 3.1461834509300836e-06, "loss": 0.8094, "step": 33005 }, { "epoch": 0.4023009518238212, "grad_norm": 1.8702772855758667, "learning_rate": 3.1458627325208466e-06, "loss": 0.8229, "step": 33010 }, { "epoch": 0.402361888047969, "grad_norm": 1.9999006986618042, "learning_rate": 3.1455420141116105e-06, "loss": 0.8729, "step": 33015 }, { "epoch": 0.4024228242721168, "grad_norm": 1.7770230770111084, "learning_rate": 3.1452212957023735e-06, "loss": 0.8654, "step": 33020 }, { "epoch": 0.4024837604962646, "grad_norm": 1.97848641872406, "learning_rate": 3.144900577293137e-06, "loss": 0.8711, "step": 33025 }, { "epoch": 0.40254469672041243, "grad_norm": 2.420856475830078, "learning_rate": 3.1445798588839e-06, "loss": 0.8485, "step": 33030 }, { "epoch": 0.4026056329445602, "grad_norm": 2.0096547603607178, "learning_rate": 3.144259140474664e-06, "loss": 0.8434, "step": 33035 }, { "epoch": 0.402666569168708, "grad_norm": 2.136103868484497, "learning_rate": 3.143938422065427e-06, "loss": 0.9348, "step": 33040 }, { "epoch": 0.40272750539285584, "grad_norm": 2.3673110008239746, "learning_rate": 3.14361770365619e-06, "loss": 0.8196, "step": 33045 }, { "epoch": 0.40278844161700367, "grad_norm": 1.9030057191848755, "learning_rate": 3.1432969852469537e-06, "loss": 0.8558, "step": 33050 }, { "epoch": 0.40284937784115143, "grad_norm": 1.8645806312561035, "learning_rate": 3.1429762668377168e-06, "loss": 0.7975, "step": 33055 }, { "epoch": 0.40291031406529926, "grad_norm": 1.721824288368225, "learning_rate": 3.1426555484284798e-06, "loss": 0.8617, "step": 33060 }, { "epoch": 0.4029712502894471, "grad_norm": 2.0257256031036377, "learning_rate": 3.1423348300192436e-06, "loss": 0.8263, "step": 33065 }, { "epoch": 0.40303218651359485, "grad_norm": 2.183244466781616, "learning_rate": 3.1420141116100067e-06, "loss": 0.8705, "step": 33070 }, { "epoch": 0.40309312273774267, "grad_norm": 1.8189448118209839, "learning_rate": 3.1416933932007697e-06, "loss": 0.8379, "step": 33075 }, { "epoch": 0.4031540589618905, "grad_norm": 1.83244788646698, "learning_rate": 3.1413726747915335e-06, "loss": 0.7971, "step": 33080 }, { "epoch": 0.4032149951860383, "grad_norm": 1.9937113523483276, "learning_rate": 3.1410519563822965e-06, "loss": 0.8043, "step": 33085 }, { "epoch": 0.4032759314101861, "grad_norm": 1.9217420816421509, "learning_rate": 3.14073123797306e-06, "loss": 0.8576, "step": 33090 }, { "epoch": 0.4033368676343339, "grad_norm": 2.08650803565979, "learning_rate": 3.1404105195638234e-06, "loss": 0.8242, "step": 33095 }, { "epoch": 0.40339780385848173, "grad_norm": 1.7222439050674438, "learning_rate": 3.1400898011545864e-06, "loss": 0.8012, "step": 33100 }, { "epoch": 0.4034587400826295, "grad_norm": 2.136270046234131, "learning_rate": 3.13976908274535e-06, "loss": 0.9049, "step": 33105 }, { "epoch": 0.4035196763067773, "grad_norm": 1.8837796449661255, "learning_rate": 3.139448364336113e-06, "loss": 0.8073, "step": 33110 }, { "epoch": 0.40358061253092514, "grad_norm": 1.666785478591919, "learning_rate": 3.1391276459268768e-06, "loss": 0.9019, "step": 33115 }, { "epoch": 0.40364154875507297, "grad_norm": 2.0180411338806152, "learning_rate": 3.1388069275176398e-06, "loss": 0.8244, "step": 33120 }, { "epoch": 0.40370248497922073, "grad_norm": 1.8616819381713867, "learning_rate": 3.138486209108403e-06, "loss": 0.8468, "step": 33125 }, { "epoch": 0.40376342120336856, "grad_norm": 1.9376198053359985, "learning_rate": 3.1381654906991667e-06, "loss": 0.8375, "step": 33130 }, { "epoch": 0.4038243574275164, "grad_norm": 1.6670693159103394, "learning_rate": 3.1378447722899297e-06, "loss": 0.7528, "step": 33135 }, { "epoch": 0.40388529365166415, "grad_norm": 1.813995599746704, "learning_rate": 3.1375240538806927e-06, "loss": 0.8264, "step": 33140 }, { "epoch": 0.40394622987581197, "grad_norm": 1.850795030593872, "learning_rate": 3.1372033354714566e-06, "loss": 0.9078, "step": 33145 }, { "epoch": 0.4040071660999598, "grad_norm": 2.0, "learning_rate": 3.1368826170622196e-06, "loss": 0.9337, "step": 33150 }, { "epoch": 0.4040681023241076, "grad_norm": 1.8892978429794312, "learning_rate": 3.1365618986529826e-06, "loss": 0.8544, "step": 33155 }, { "epoch": 0.4041290385482554, "grad_norm": 1.8561151027679443, "learning_rate": 3.1362411802437465e-06, "loss": 0.8099, "step": 33160 }, { "epoch": 0.4041899747724032, "grad_norm": 1.9234610795974731, "learning_rate": 3.1359204618345095e-06, "loss": 0.8395, "step": 33165 }, { "epoch": 0.404250910996551, "grad_norm": 1.9234548807144165, "learning_rate": 3.135599743425273e-06, "loss": 0.9138, "step": 33170 }, { "epoch": 0.4043118472206988, "grad_norm": 2.2419862747192383, "learning_rate": 3.1352790250160364e-06, "loss": 0.8668, "step": 33175 }, { "epoch": 0.4043727834448466, "grad_norm": 1.8771857023239136, "learning_rate": 3.1349583066067994e-06, "loss": 0.8295, "step": 33180 }, { "epoch": 0.40443371966899444, "grad_norm": 1.7075914144515991, "learning_rate": 3.134637588197563e-06, "loss": 0.811, "step": 33185 }, { "epoch": 0.40449465589314226, "grad_norm": 1.961753010749817, "learning_rate": 3.134316869788326e-06, "loss": 0.969, "step": 33190 }, { "epoch": 0.40455559211729003, "grad_norm": 2.0065410137176514, "learning_rate": 3.1339961513790897e-06, "loss": 0.8675, "step": 33195 }, { "epoch": 0.40461652834143785, "grad_norm": 2.025682210922241, "learning_rate": 3.1336754329698527e-06, "loss": 0.8396, "step": 33200 }, { "epoch": 0.4046774645655857, "grad_norm": 1.7527308464050293, "learning_rate": 3.1333547145606157e-06, "loss": 0.7866, "step": 33205 }, { "epoch": 0.40473840078973344, "grad_norm": 2.2192063331604004, "learning_rate": 3.1330339961513796e-06, "loss": 0.7787, "step": 33210 }, { "epoch": 0.40479933701388127, "grad_norm": 1.925160527229309, "learning_rate": 3.1327132777421426e-06, "loss": 0.7631, "step": 33215 }, { "epoch": 0.4048602732380291, "grad_norm": 2.1955432891845703, "learning_rate": 3.1323925593329056e-06, "loss": 0.853, "step": 33220 }, { "epoch": 0.4049212094621769, "grad_norm": 1.810218334197998, "learning_rate": 3.1320718409236695e-06, "loss": 0.7929, "step": 33225 }, { "epoch": 0.4049821456863247, "grad_norm": 1.8295435905456543, "learning_rate": 3.1317511225144325e-06, "loss": 0.8884, "step": 33230 }, { "epoch": 0.4050430819104725, "grad_norm": 1.6941944360733032, "learning_rate": 3.1314304041051955e-06, "loss": 0.8862, "step": 33235 }, { "epoch": 0.4051040181346203, "grad_norm": 2.2430219650268555, "learning_rate": 3.1311096856959594e-06, "loss": 0.8285, "step": 33240 }, { "epoch": 0.4051649543587681, "grad_norm": 1.976181983947754, "learning_rate": 3.1307889672867224e-06, "loss": 0.8932, "step": 33245 }, { "epoch": 0.4052258905829159, "grad_norm": 1.7266255617141724, "learning_rate": 3.130468248877486e-06, "loss": 0.8523, "step": 33250 }, { "epoch": 0.40528682680706374, "grad_norm": 1.7598316669464111, "learning_rate": 3.1301475304682493e-06, "loss": 0.84, "step": 33255 }, { "epoch": 0.40534776303121156, "grad_norm": 1.9002611637115479, "learning_rate": 3.1298268120590123e-06, "loss": 0.8376, "step": 33260 }, { "epoch": 0.40540869925535933, "grad_norm": 2.607518196105957, "learning_rate": 3.1295060936497757e-06, "loss": 0.8819, "step": 33265 }, { "epoch": 0.40546963547950715, "grad_norm": 2.028109073638916, "learning_rate": 3.129185375240539e-06, "loss": 0.8142, "step": 33270 }, { "epoch": 0.405530571703655, "grad_norm": 2.01835036277771, "learning_rate": 3.1288646568313026e-06, "loss": 0.8628, "step": 33275 }, { "epoch": 0.40559150792780274, "grad_norm": 1.9023038148880005, "learning_rate": 3.1285439384220656e-06, "loss": 0.7833, "step": 33280 }, { "epoch": 0.40565244415195056, "grad_norm": 2.2113759517669678, "learning_rate": 3.1282232200128287e-06, "loss": 0.8439, "step": 33285 }, { "epoch": 0.4057133803760984, "grad_norm": 2.3245906829833984, "learning_rate": 3.1279025016035925e-06, "loss": 0.8893, "step": 33290 }, { "epoch": 0.4057743166002462, "grad_norm": 1.8262207508087158, "learning_rate": 3.1275817831943555e-06, "loss": 0.7982, "step": 33295 }, { "epoch": 0.405835252824394, "grad_norm": 2.1539735794067383, "learning_rate": 3.1272610647851186e-06, "loss": 0.8631, "step": 33300 }, { "epoch": 0.4058961890485418, "grad_norm": 1.8368899822235107, "learning_rate": 3.1269403463758824e-06, "loss": 0.8143, "step": 33305 }, { "epoch": 0.4059571252726896, "grad_norm": 1.836957335472107, "learning_rate": 3.1266196279666454e-06, "loss": 0.8812, "step": 33310 }, { "epoch": 0.4060180614968374, "grad_norm": 2.1483852863311768, "learning_rate": 3.126298909557409e-06, "loss": 0.9051, "step": 33315 }, { "epoch": 0.4060789977209852, "grad_norm": 2.2009809017181396, "learning_rate": 3.1259781911481723e-06, "loss": 0.8648, "step": 33320 }, { "epoch": 0.40613993394513304, "grad_norm": 1.6970012187957764, "learning_rate": 3.1256574727389353e-06, "loss": 0.8097, "step": 33325 }, { "epoch": 0.4062008701692808, "grad_norm": 1.630989670753479, "learning_rate": 3.1253367543296988e-06, "loss": 0.8089, "step": 33330 }, { "epoch": 0.4062618063934286, "grad_norm": 1.8544032573699951, "learning_rate": 3.125016035920462e-06, "loss": 0.8952, "step": 33335 }, { "epoch": 0.40632274261757645, "grad_norm": 2.136957883834839, "learning_rate": 3.1246953175112257e-06, "loss": 0.798, "step": 33340 }, { "epoch": 0.40638367884172427, "grad_norm": 1.8831449747085571, "learning_rate": 3.1243745991019887e-06, "loss": 0.8838, "step": 33345 }, { "epoch": 0.40644461506587204, "grad_norm": 2.1187140941619873, "learning_rate": 3.124053880692752e-06, "loss": 0.8165, "step": 33350 }, { "epoch": 0.40650555129001986, "grad_norm": 1.981742024421692, "learning_rate": 3.1237331622835155e-06, "loss": 0.8362, "step": 33355 }, { "epoch": 0.4065664875141677, "grad_norm": 1.9741840362548828, "learning_rate": 3.1234124438742786e-06, "loss": 0.8878, "step": 33360 }, { "epoch": 0.40662742373831545, "grad_norm": 2.0710031986236572, "learning_rate": 3.1230917254650416e-06, "loss": 0.8407, "step": 33365 }, { "epoch": 0.4066883599624633, "grad_norm": 2.5603115558624268, "learning_rate": 3.1227710070558054e-06, "loss": 0.8639, "step": 33370 }, { "epoch": 0.4067492961866111, "grad_norm": 2.12673282623291, "learning_rate": 3.1224502886465685e-06, "loss": 0.8393, "step": 33375 }, { "epoch": 0.4068102324107589, "grad_norm": 2.197174072265625, "learning_rate": 3.1221295702373315e-06, "loss": 0.8668, "step": 33380 }, { "epoch": 0.4068711686349067, "grad_norm": 1.9419513940811157, "learning_rate": 3.1218088518280953e-06, "loss": 0.8324, "step": 33385 }, { "epoch": 0.4069321048590545, "grad_norm": 1.7375913858413696, "learning_rate": 3.1214881334188584e-06, "loss": 0.8361, "step": 33390 }, { "epoch": 0.40699304108320233, "grad_norm": 2.2571067810058594, "learning_rate": 3.121167415009622e-06, "loss": 0.8808, "step": 33395 }, { "epoch": 0.4070539773073501, "grad_norm": 2.111224889755249, "learning_rate": 3.1208466966003852e-06, "loss": 0.7945, "step": 33400 }, { "epoch": 0.4071149135314979, "grad_norm": 2.6266162395477295, "learning_rate": 3.1205259781911483e-06, "loss": 0.8899, "step": 33405 }, { "epoch": 0.40717584975564575, "grad_norm": 3.3425827026367188, "learning_rate": 3.1202052597819117e-06, "loss": 0.8125, "step": 33410 }, { "epoch": 0.40723678597979357, "grad_norm": 1.7936780452728271, "learning_rate": 3.119884541372675e-06, "loss": 0.8293, "step": 33415 }, { "epoch": 0.40729772220394134, "grad_norm": 2.1522607803344727, "learning_rate": 3.1195638229634386e-06, "loss": 0.9063, "step": 33420 }, { "epoch": 0.40735865842808916, "grad_norm": 1.8655773401260376, "learning_rate": 3.1192431045542016e-06, "loss": 0.8171, "step": 33425 }, { "epoch": 0.407419594652237, "grad_norm": 1.9205747842788696, "learning_rate": 3.118922386144965e-06, "loss": 0.9242, "step": 33430 }, { "epoch": 0.40748053087638475, "grad_norm": 2.0862858295440674, "learning_rate": 3.1186016677357285e-06, "loss": 0.8535, "step": 33435 }, { "epoch": 0.4075414671005326, "grad_norm": 2.2739124298095703, "learning_rate": 3.1182809493264915e-06, "loss": 0.8096, "step": 33440 }, { "epoch": 0.4076024033246804, "grad_norm": 2.2454445362091064, "learning_rate": 3.1179602309172545e-06, "loss": 0.8553, "step": 33445 }, { "epoch": 0.4076633395488282, "grad_norm": 1.9115287065505981, "learning_rate": 3.1176395125080184e-06, "loss": 0.8609, "step": 33450 }, { "epoch": 0.407724275772976, "grad_norm": 2.185356616973877, "learning_rate": 3.1173187940987814e-06, "loss": 0.8632, "step": 33455 }, { "epoch": 0.4077852119971238, "grad_norm": 1.619816780090332, "learning_rate": 3.1169980756895444e-06, "loss": 0.8241, "step": 33460 }, { "epoch": 0.40784614822127163, "grad_norm": 2.1748316287994385, "learning_rate": 3.1166773572803083e-06, "loss": 0.823, "step": 33465 }, { "epoch": 0.4079070844454194, "grad_norm": 1.772461175918579, "learning_rate": 3.1163566388710713e-06, "loss": 0.9042, "step": 33470 }, { "epoch": 0.4079680206695672, "grad_norm": 2.1380691528320312, "learning_rate": 3.1160359204618347e-06, "loss": 0.7786, "step": 33475 }, { "epoch": 0.40802895689371504, "grad_norm": 1.864518404006958, "learning_rate": 3.115715202052598e-06, "loss": 0.9344, "step": 33480 }, { "epoch": 0.40808989311786287, "grad_norm": 2.219167947769165, "learning_rate": 3.115394483643361e-06, "loss": 0.8505, "step": 33485 }, { "epoch": 0.40815082934201063, "grad_norm": 2.089390754699707, "learning_rate": 3.1150737652341246e-06, "loss": 0.8288, "step": 33490 }, { "epoch": 0.40821176556615846, "grad_norm": 2.0049448013305664, "learning_rate": 3.114753046824888e-06, "loss": 0.7882, "step": 33495 }, { "epoch": 0.4082727017903063, "grad_norm": 2.030972480773926, "learning_rate": 3.1144323284156515e-06, "loss": 0.8872, "step": 33500 }, { "epoch": 0.40833363801445405, "grad_norm": 1.6736009120941162, "learning_rate": 3.1141116100064145e-06, "loss": 0.8566, "step": 33505 }, { "epoch": 0.40839457423860187, "grad_norm": 1.8984787464141846, "learning_rate": 3.1137908915971784e-06, "loss": 0.7414, "step": 33510 }, { "epoch": 0.4084555104627497, "grad_norm": 2.025869846343994, "learning_rate": 3.1134701731879414e-06, "loss": 0.8267, "step": 33515 }, { "epoch": 0.4085164466868975, "grad_norm": 2.006235122680664, "learning_rate": 3.1131494547787044e-06, "loss": 0.8259, "step": 33520 }, { "epoch": 0.4085773829110453, "grad_norm": 2.1504859924316406, "learning_rate": 3.1128287363694674e-06, "loss": 0.8841, "step": 33525 }, { "epoch": 0.4086383191351931, "grad_norm": 2.2858245372772217, "learning_rate": 3.1125080179602313e-06, "loss": 0.8225, "step": 33530 }, { "epoch": 0.40869925535934093, "grad_norm": 1.7934343814849854, "learning_rate": 3.1121872995509943e-06, "loss": 0.9059, "step": 33535 }, { "epoch": 0.4087601915834887, "grad_norm": 1.9984863996505737, "learning_rate": 3.1118665811417578e-06, "loss": 0.853, "step": 33540 }, { "epoch": 0.4088211278076365, "grad_norm": 2.006117582321167, "learning_rate": 3.111545862732521e-06, "loss": 0.7726, "step": 33545 }, { "epoch": 0.40888206403178434, "grad_norm": 1.7384121417999268, "learning_rate": 3.1112251443232842e-06, "loss": 0.7978, "step": 33550 }, { "epoch": 0.40894300025593217, "grad_norm": 2.0961294174194336, "learning_rate": 3.1109044259140477e-06, "loss": 0.8041, "step": 33555 }, { "epoch": 0.40900393648007993, "grad_norm": 2.1981732845306396, "learning_rate": 3.110583707504811e-06, "loss": 0.9037, "step": 33560 }, { "epoch": 0.40906487270422776, "grad_norm": 2.064579486846924, "learning_rate": 3.1102629890955745e-06, "loss": 0.827, "step": 33565 }, { "epoch": 0.4091258089283756, "grad_norm": 1.9175306558609009, "learning_rate": 3.1099422706863376e-06, "loss": 0.8537, "step": 33570 }, { "epoch": 0.40918674515252335, "grad_norm": 1.866649866104126, "learning_rate": 3.109621552277101e-06, "loss": 0.9163, "step": 33575 }, { "epoch": 0.40924768137667117, "grad_norm": 1.6922215223312378, "learning_rate": 3.1093008338678644e-06, "loss": 0.8204, "step": 33580 }, { "epoch": 0.409308617600819, "grad_norm": 2.0109493732452393, "learning_rate": 3.1089801154586274e-06, "loss": 0.8061, "step": 33585 }, { "epoch": 0.4093695538249668, "grad_norm": 2.115628480911255, "learning_rate": 3.1086593970493913e-06, "loss": 0.8586, "step": 33590 }, { "epoch": 0.4094304900491146, "grad_norm": 2.0643601417541504, "learning_rate": 3.1083386786401543e-06, "loss": 0.8813, "step": 33595 }, { "epoch": 0.4094914262732624, "grad_norm": 1.7905491590499878, "learning_rate": 3.1080179602309173e-06, "loss": 0.887, "step": 33600 }, { "epoch": 0.4095523624974102, "grad_norm": 1.7286717891693115, "learning_rate": 3.107697241821681e-06, "loss": 0.8529, "step": 33605 }, { "epoch": 0.409613298721558, "grad_norm": 2.0773520469665527, "learning_rate": 3.1073765234124442e-06, "loss": 0.7904, "step": 33610 }, { "epoch": 0.4096742349457058, "grad_norm": 2.018660068511963, "learning_rate": 3.1070558050032072e-06, "loss": 0.8768, "step": 33615 }, { "epoch": 0.40973517116985364, "grad_norm": 2.0136098861694336, "learning_rate": 3.1067350865939707e-06, "loss": 0.843, "step": 33620 }, { "epoch": 0.40979610739400146, "grad_norm": 1.7979663610458374, "learning_rate": 3.106414368184734e-06, "loss": 0.8013, "step": 33625 }, { "epoch": 0.40985704361814923, "grad_norm": 2.1255061626434326, "learning_rate": 3.106093649775497e-06, "loss": 0.8793, "step": 33630 }, { "epoch": 0.40991797984229705, "grad_norm": 1.879086971282959, "learning_rate": 3.1057729313662606e-06, "loss": 0.844, "step": 33635 }, { "epoch": 0.4099789160664449, "grad_norm": 2.0140953063964844, "learning_rate": 3.105452212957024e-06, "loss": 0.8648, "step": 33640 }, { "epoch": 0.41003985229059264, "grad_norm": 2.029771089553833, "learning_rate": 3.1051314945477875e-06, "loss": 0.801, "step": 33645 }, { "epoch": 0.41010078851474047, "grad_norm": 1.9610645771026611, "learning_rate": 3.1048107761385505e-06, "loss": 0.7595, "step": 33650 }, { "epoch": 0.4101617247388883, "grad_norm": 1.6782350540161133, "learning_rate": 3.104490057729314e-06, "loss": 0.85, "step": 33655 }, { "epoch": 0.4102226609630361, "grad_norm": 2.29622745513916, "learning_rate": 3.1041693393200774e-06, "loss": 0.8209, "step": 33660 }, { "epoch": 0.4102835971871839, "grad_norm": 2.2543282508850098, "learning_rate": 3.1038486209108404e-06, "loss": 0.8844, "step": 33665 }, { "epoch": 0.4103445334113317, "grad_norm": 1.857961654663086, "learning_rate": 3.1035279025016042e-06, "loss": 0.8278, "step": 33670 }, { "epoch": 0.4104054696354795, "grad_norm": 2.1544458866119385, "learning_rate": 3.1032071840923673e-06, "loss": 0.8677, "step": 33675 }, { "epoch": 0.4104664058596273, "grad_norm": 2.4100394248962402, "learning_rate": 3.1028864656831303e-06, "loss": 0.866, "step": 33680 }, { "epoch": 0.4105273420837751, "grad_norm": 2.1605684757232666, "learning_rate": 3.102565747273894e-06, "loss": 0.9309, "step": 33685 }, { "epoch": 0.41058827830792294, "grad_norm": 1.8088001012802124, "learning_rate": 3.102245028864657e-06, "loss": 0.7718, "step": 33690 }, { "epoch": 0.41064921453207076, "grad_norm": 2.0586185455322266, "learning_rate": 3.10192431045542e-06, "loss": 0.8542, "step": 33695 }, { "epoch": 0.41071015075621853, "grad_norm": 1.7741608619689941, "learning_rate": 3.1016035920461836e-06, "loss": 0.8055, "step": 33700 }, { "epoch": 0.41077108698036635, "grad_norm": 2.004201650619507, "learning_rate": 3.101282873636947e-06, "loss": 0.8044, "step": 33705 }, { "epoch": 0.4108320232045142, "grad_norm": 2.5584189891815186, "learning_rate": 3.10096215522771e-06, "loss": 0.849, "step": 33710 }, { "epoch": 0.41089295942866194, "grad_norm": 2.227398157119751, "learning_rate": 3.1006414368184735e-06, "loss": 0.9003, "step": 33715 }, { "epoch": 0.41095389565280976, "grad_norm": 2.023385524749756, "learning_rate": 3.100320718409237e-06, "loss": 0.8511, "step": 33720 }, { "epoch": 0.4110148318769576, "grad_norm": 2.3448798656463623, "learning_rate": 3.1000000000000004e-06, "loss": 0.8119, "step": 33725 }, { "epoch": 0.4110757681011054, "grad_norm": 2.2188045978546143, "learning_rate": 3.0996792815907634e-06, "loss": 0.8397, "step": 33730 }, { "epoch": 0.4111367043252532, "grad_norm": 2.186652421951294, "learning_rate": 3.0993585631815273e-06, "loss": 0.8677, "step": 33735 }, { "epoch": 0.411197640549401, "grad_norm": 2.0898473262786865, "learning_rate": 3.0990378447722903e-06, "loss": 0.8494, "step": 33740 }, { "epoch": 0.4112585767735488, "grad_norm": 1.7572994232177734, "learning_rate": 3.0987171263630533e-06, "loss": 0.8523, "step": 33745 }, { "epoch": 0.4113195129976966, "grad_norm": 1.8103375434875488, "learning_rate": 3.098396407953817e-06, "loss": 0.8546, "step": 33750 }, { "epoch": 0.4113804492218444, "grad_norm": 1.7386804819107056, "learning_rate": 3.09807568954458e-06, "loss": 0.9465, "step": 33755 }, { "epoch": 0.41144138544599224, "grad_norm": 2.017167329788208, "learning_rate": 3.097754971135343e-06, "loss": 0.8224, "step": 33760 }, { "epoch": 0.41150232167014006, "grad_norm": 2.218745231628418, "learning_rate": 3.097434252726107e-06, "loss": 0.8484, "step": 33765 }, { "epoch": 0.4115632578942878, "grad_norm": 2.067917585372925, "learning_rate": 3.09711353431687e-06, "loss": 0.8204, "step": 33770 }, { "epoch": 0.41162419411843565, "grad_norm": 1.799666166305542, "learning_rate": 3.096792815907633e-06, "loss": 0.8034, "step": 33775 }, { "epoch": 0.41168513034258347, "grad_norm": 2.04681396484375, "learning_rate": 3.0964720974983965e-06, "loss": 0.7834, "step": 33780 }, { "epoch": 0.41174606656673124, "grad_norm": 2.1256048679351807, "learning_rate": 3.09615137908916e-06, "loss": 0.771, "step": 33785 }, { "epoch": 0.41180700279087906, "grad_norm": 2.155031204223633, "learning_rate": 3.0958306606799234e-06, "loss": 0.8588, "step": 33790 }, { "epoch": 0.4118679390150269, "grad_norm": 1.7120845317840576, "learning_rate": 3.0955099422706864e-06, "loss": 0.9531, "step": 33795 }, { "epoch": 0.41192887523917465, "grad_norm": 2.0928521156311035, "learning_rate": 3.09518922386145e-06, "loss": 0.8457, "step": 33800 }, { "epoch": 0.4119898114633225, "grad_norm": 1.9337115287780762, "learning_rate": 3.0948685054522133e-06, "loss": 0.8642, "step": 33805 }, { "epoch": 0.4120507476874703, "grad_norm": 2.515716075897217, "learning_rate": 3.0945477870429763e-06, "loss": 0.8061, "step": 33810 }, { "epoch": 0.4121116839116181, "grad_norm": 1.7592681646347046, "learning_rate": 3.09422706863374e-06, "loss": 0.8958, "step": 33815 }, { "epoch": 0.4121726201357659, "grad_norm": 1.9118009805679321, "learning_rate": 3.0939063502245032e-06, "loss": 0.8357, "step": 33820 }, { "epoch": 0.4122335563599137, "grad_norm": 2.166652202606201, "learning_rate": 3.0935856318152662e-06, "loss": 0.8924, "step": 33825 }, { "epoch": 0.41229449258406153, "grad_norm": 2.0223045349121094, "learning_rate": 3.09326491340603e-06, "loss": 0.8424, "step": 33830 }, { "epoch": 0.4123554288082093, "grad_norm": 1.8751333951950073, "learning_rate": 3.092944194996793e-06, "loss": 0.8903, "step": 33835 }, { "epoch": 0.4124163650323571, "grad_norm": 1.7364767789840698, "learning_rate": 3.092623476587556e-06, "loss": 0.8077, "step": 33840 }, { "epoch": 0.41247730125650495, "grad_norm": 1.8165985345840454, "learning_rate": 3.09230275817832e-06, "loss": 0.7996, "step": 33845 }, { "epoch": 0.41253823748065277, "grad_norm": 1.90525484085083, "learning_rate": 3.091982039769083e-06, "loss": 0.7936, "step": 33850 }, { "epoch": 0.41259917370480054, "grad_norm": 2.2899010181427, "learning_rate": 3.091661321359846e-06, "loss": 0.8848, "step": 33855 }, { "epoch": 0.41266010992894836, "grad_norm": 2.3527212142944336, "learning_rate": 3.09134060295061e-06, "loss": 0.8075, "step": 33860 }, { "epoch": 0.4127210461530962, "grad_norm": 2.0610527992248535, "learning_rate": 3.091019884541373e-06, "loss": 0.8584, "step": 33865 }, { "epoch": 0.41278198237724395, "grad_norm": 1.9513607025146484, "learning_rate": 3.0906991661321363e-06, "loss": 0.8496, "step": 33870 }, { "epoch": 0.4128429186013918, "grad_norm": 1.7499520778656006, "learning_rate": 3.0903784477228994e-06, "loss": 0.8329, "step": 33875 }, { "epoch": 0.4129038548255396, "grad_norm": 2.1188037395477295, "learning_rate": 3.090057729313663e-06, "loss": 0.8926, "step": 33880 }, { "epoch": 0.4129647910496874, "grad_norm": 1.7561792135238647, "learning_rate": 3.0897370109044262e-06, "loss": 0.8315, "step": 33885 }, { "epoch": 0.4130257272738352, "grad_norm": 1.9824950695037842, "learning_rate": 3.0894162924951893e-06, "loss": 0.8426, "step": 33890 }, { "epoch": 0.413086663497983, "grad_norm": 1.8299388885498047, "learning_rate": 3.089095574085953e-06, "loss": 0.7985, "step": 33895 }, { "epoch": 0.41314759972213083, "grad_norm": 2.1995952129364014, "learning_rate": 3.088774855676716e-06, "loss": 0.8232, "step": 33900 }, { "epoch": 0.4132085359462786, "grad_norm": 2.0565297603607178, "learning_rate": 3.088454137267479e-06, "loss": 0.9039, "step": 33905 }, { "epoch": 0.4132694721704264, "grad_norm": 2.106802225112915, "learning_rate": 3.088133418858243e-06, "loss": 0.9288, "step": 33910 }, { "epoch": 0.41333040839457424, "grad_norm": 1.8465954065322876, "learning_rate": 3.087812700449006e-06, "loss": 0.8605, "step": 33915 }, { "epoch": 0.41339134461872207, "grad_norm": 1.9530497789382935, "learning_rate": 3.087491982039769e-06, "loss": 0.8979, "step": 33920 }, { "epoch": 0.41345228084286983, "grad_norm": 2.242586612701416, "learning_rate": 3.087171263630533e-06, "loss": 0.8384, "step": 33925 }, { "epoch": 0.41351321706701766, "grad_norm": 2.1019814014434814, "learning_rate": 3.086850545221296e-06, "loss": 0.8917, "step": 33930 }, { "epoch": 0.4135741532911655, "grad_norm": 1.9249818325042725, "learning_rate": 3.086529826812059e-06, "loss": 0.8141, "step": 33935 }, { "epoch": 0.41363508951531325, "grad_norm": 1.9780677556991577, "learning_rate": 3.086209108402823e-06, "loss": 0.7979, "step": 33940 }, { "epoch": 0.41369602573946107, "grad_norm": 1.8370336294174194, "learning_rate": 3.085888389993586e-06, "loss": 0.8203, "step": 33945 }, { "epoch": 0.4137569619636089, "grad_norm": 2.3505072593688965, "learning_rate": 3.0855676715843493e-06, "loss": 0.9069, "step": 33950 }, { "epoch": 0.4138178981877567, "grad_norm": 2.4621312618255615, "learning_rate": 3.0852469531751123e-06, "loss": 0.8418, "step": 33955 }, { "epoch": 0.4138788344119045, "grad_norm": 2.309861898422241, "learning_rate": 3.0849262347658757e-06, "loss": 0.7918, "step": 33960 }, { "epoch": 0.4139397706360523, "grad_norm": 2.4400124549865723, "learning_rate": 3.084605516356639e-06, "loss": 0.8026, "step": 33965 }, { "epoch": 0.41400070686020013, "grad_norm": 1.9821792840957642, "learning_rate": 3.084284797947402e-06, "loss": 0.8353, "step": 33970 }, { "epoch": 0.4140616430843479, "grad_norm": 2.0424046516418457, "learning_rate": 3.083964079538166e-06, "loss": 0.8731, "step": 33975 }, { "epoch": 0.4141225793084957, "grad_norm": 1.8943978548049927, "learning_rate": 3.083643361128929e-06, "loss": 0.8938, "step": 33980 }, { "epoch": 0.41418351553264354, "grad_norm": 1.946106195449829, "learning_rate": 3.083322642719692e-06, "loss": 0.7958, "step": 33985 }, { "epoch": 0.41424445175679137, "grad_norm": 1.9983445405960083, "learning_rate": 3.083001924310456e-06, "loss": 0.8365, "step": 33990 }, { "epoch": 0.41430538798093913, "grad_norm": 2.056198835372925, "learning_rate": 3.082681205901219e-06, "loss": 0.8392, "step": 33995 }, { "epoch": 0.41436632420508696, "grad_norm": 1.9297977685928345, "learning_rate": 3.082360487491982e-06, "loss": 0.8649, "step": 34000 }, { "epoch": 0.4144272604292348, "grad_norm": 1.680963158607483, "learning_rate": 3.082039769082746e-06, "loss": 0.8407, "step": 34005 }, { "epoch": 0.41448819665338255, "grad_norm": 2.0768582820892334, "learning_rate": 3.081719050673509e-06, "loss": 0.8922, "step": 34010 }, { "epoch": 0.41454913287753037, "grad_norm": 2.5445618629455566, "learning_rate": 3.0813983322642723e-06, "loss": 0.899, "step": 34015 }, { "epoch": 0.4146100691016782, "grad_norm": 1.948522925376892, "learning_rate": 3.0810776138550357e-06, "loss": 0.8634, "step": 34020 }, { "epoch": 0.414671005325826, "grad_norm": 1.8382995128631592, "learning_rate": 3.0807568954457988e-06, "loss": 0.7954, "step": 34025 }, { "epoch": 0.4147319415499738, "grad_norm": 1.8492263555526733, "learning_rate": 3.080436177036562e-06, "loss": 0.7507, "step": 34030 }, { "epoch": 0.4147928777741216, "grad_norm": 2.0444953441619873, "learning_rate": 3.0801154586273252e-06, "loss": 0.8359, "step": 34035 }, { "epoch": 0.4148538139982694, "grad_norm": 1.883847951889038, "learning_rate": 3.079794740218089e-06, "loss": 0.8837, "step": 34040 }, { "epoch": 0.4149147502224172, "grad_norm": 2.2625324726104736, "learning_rate": 3.079474021808852e-06, "loss": 0.7719, "step": 34045 }, { "epoch": 0.414975686446565, "grad_norm": 2.1734280586242676, "learning_rate": 3.079153303399615e-06, "loss": 0.8653, "step": 34050 }, { "epoch": 0.41503662267071284, "grad_norm": 1.708475947380066, "learning_rate": 3.078832584990379e-06, "loss": 0.8634, "step": 34055 }, { "epoch": 0.41509755889486066, "grad_norm": 2.131934404373169, "learning_rate": 3.078511866581142e-06, "loss": 0.8656, "step": 34060 }, { "epoch": 0.41515849511900843, "grad_norm": 1.85646653175354, "learning_rate": 3.078191148171905e-06, "loss": 0.8725, "step": 34065 }, { "epoch": 0.41521943134315625, "grad_norm": 1.9288976192474365, "learning_rate": 3.077870429762669e-06, "loss": 0.8523, "step": 34070 }, { "epoch": 0.4152803675673041, "grad_norm": 1.8574079275131226, "learning_rate": 3.077549711353432e-06, "loss": 0.878, "step": 34075 }, { "epoch": 0.41534130379145184, "grad_norm": 2.108588933944702, "learning_rate": 3.077228992944195e-06, "loss": 0.8541, "step": 34080 }, { "epoch": 0.41540224001559967, "grad_norm": 1.8672757148742676, "learning_rate": 3.0769082745349588e-06, "loss": 0.8282, "step": 34085 }, { "epoch": 0.4154631762397475, "grad_norm": 2.284252405166626, "learning_rate": 3.076587556125722e-06, "loss": 0.799, "step": 34090 }, { "epoch": 0.4155241124638953, "grad_norm": 2.1746010780334473, "learning_rate": 3.0762668377164852e-06, "loss": 0.858, "step": 34095 }, { "epoch": 0.4155850486880431, "grad_norm": 2.02720046043396, "learning_rate": 3.0759461193072487e-06, "loss": 0.8214, "step": 34100 }, { "epoch": 0.4156459849121909, "grad_norm": 2.2503278255462646, "learning_rate": 3.0756254008980117e-06, "loss": 0.8301, "step": 34105 }, { "epoch": 0.4157069211363387, "grad_norm": 1.7210824489593506, "learning_rate": 3.075304682488775e-06, "loss": 0.8964, "step": 34110 }, { "epoch": 0.4157678573604865, "grad_norm": 2.1330997943878174, "learning_rate": 3.074983964079538e-06, "loss": 0.8872, "step": 34115 }, { "epoch": 0.4158287935846343, "grad_norm": 1.9716739654541016, "learning_rate": 3.074663245670302e-06, "loss": 0.8468, "step": 34120 }, { "epoch": 0.41588972980878214, "grad_norm": 2.0026848316192627, "learning_rate": 3.074342527261065e-06, "loss": 0.858, "step": 34125 }, { "epoch": 0.41595066603292996, "grad_norm": 1.9580223560333252, "learning_rate": 3.074021808851828e-06, "loss": 0.8024, "step": 34130 }, { "epoch": 0.41601160225707773, "grad_norm": 2.1129651069641113, "learning_rate": 3.073701090442592e-06, "loss": 0.8361, "step": 34135 }, { "epoch": 0.41607253848122555, "grad_norm": 2.1092865467071533, "learning_rate": 3.073380372033355e-06, "loss": 0.8852, "step": 34140 }, { "epoch": 0.4161334747053734, "grad_norm": 1.9029698371887207, "learning_rate": 3.073059653624118e-06, "loss": 0.8322, "step": 34145 }, { "epoch": 0.41619441092952114, "grad_norm": 2.1777031421661377, "learning_rate": 3.072738935214882e-06, "loss": 0.8446, "step": 34150 }, { "epoch": 0.41625534715366896, "grad_norm": 1.9969253540039062, "learning_rate": 3.072418216805645e-06, "loss": 0.8537, "step": 34155 }, { "epoch": 0.4163162833778168, "grad_norm": 1.9856748580932617, "learning_rate": 3.072097498396408e-06, "loss": 0.8546, "step": 34160 }, { "epoch": 0.4163772196019646, "grad_norm": 1.9820750951766968, "learning_rate": 3.0717767799871717e-06, "loss": 0.8185, "step": 34165 }, { "epoch": 0.4164381558261124, "grad_norm": 1.9491219520568848, "learning_rate": 3.0714560615779347e-06, "loss": 0.8445, "step": 34170 }, { "epoch": 0.4164990920502602, "grad_norm": 1.7149999141693115, "learning_rate": 3.071135343168698e-06, "loss": 0.8132, "step": 34175 }, { "epoch": 0.416560028274408, "grad_norm": 1.6454709768295288, "learning_rate": 3.0708146247594616e-06, "loss": 0.7393, "step": 34180 }, { "epoch": 0.4166209644985558, "grad_norm": 2.2531042098999023, "learning_rate": 3.0704939063502246e-06, "loss": 0.8381, "step": 34185 }, { "epoch": 0.4166819007227036, "grad_norm": 1.9660295248031616, "learning_rate": 3.070173187940988e-06, "loss": 0.8155, "step": 34190 }, { "epoch": 0.41674283694685144, "grad_norm": 2.0354323387145996, "learning_rate": 3.0698524695317515e-06, "loss": 0.9091, "step": 34195 }, { "epoch": 0.41680377317099926, "grad_norm": 2.284458875656128, "learning_rate": 3.069531751122515e-06, "loss": 0.8616, "step": 34200 }, { "epoch": 0.416864709395147, "grad_norm": 1.8422759771347046, "learning_rate": 3.069211032713278e-06, "loss": 0.7952, "step": 34205 }, { "epoch": 0.41692564561929485, "grad_norm": 2.664095878601074, "learning_rate": 3.068890314304041e-06, "loss": 0.8183, "step": 34210 }, { "epoch": 0.41698658184344267, "grad_norm": 1.8922654390335083, "learning_rate": 3.068569595894805e-06, "loss": 0.8497, "step": 34215 }, { "epoch": 0.41704751806759044, "grad_norm": 1.8616540431976318, "learning_rate": 3.068248877485568e-06, "loss": 0.8353, "step": 34220 }, { "epoch": 0.41710845429173826, "grad_norm": 1.9429057836532593, "learning_rate": 3.067928159076331e-06, "loss": 0.846, "step": 34225 }, { "epoch": 0.4171693905158861, "grad_norm": 1.889351487159729, "learning_rate": 3.0676074406670947e-06, "loss": 0.8296, "step": 34230 }, { "epoch": 0.4172303267400339, "grad_norm": 1.8996398448944092, "learning_rate": 3.0672867222578577e-06, "loss": 0.8263, "step": 34235 }, { "epoch": 0.4172912629641817, "grad_norm": 2.146404266357422, "learning_rate": 3.066966003848621e-06, "loss": 0.8102, "step": 34240 }, { "epoch": 0.4173521991883295, "grad_norm": 1.8122433423995972, "learning_rate": 3.0666452854393846e-06, "loss": 0.8438, "step": 34245 }, { "epoch": 0.4174131354124773, "grad_norm": 1.763664960861206, "learning_rate": 3.0663245670301476e-06, "loss": 0.778, "step": 34250 }, { "epoch": 0.4174740716366251, "grad_norm": 2.051457405090332, "learning_rate": 3.066003848620911e-06, "loss": 0.8452, "step": 34255 }, { "epoch": 0.4175350078607729, "grad_norm": 2.362518787384033, "learning_rate": 3.0656831302116745e-06, "loss": 0.885, "step": 34260 }, { "epoch": 0.41759594408492073, "grad_norm": 1.883257269859314, "learning_rate": 3.065362411802438e-06, "loss": 0.8407, "step": 34265 }, { "epoch": 0.4176568803090685, "grad_norm": 2.0334622859954834, "learning_rate": 3.065041693393201e-06, "loss": 0.8564, "step": 34270 }, { "epoch": 0.4177178165332163, "grad_norm": 1.9419342279434204, "learning_rate": 3.0647209749839644e-06, "loss": 0.8299, "step": 34275 }, { "epoch": 0.41777875275736415, "grad_norm": 2.808041572570801, "learning_rate": 3.064400256574728e-06, "loss": 0.8452, "step": 34280 }, { "epoch": 0.41783968898151197, "grad_norm": 1.8346455097198486, "learning_rate": 3.064079538165491e-06, "loss": 0.8641, "step": 34285 }, { "epoch": 0.41790062520565974, "grad_norm": 2.071335554122925, "learning_rate": 3.063758819756254e-06, "loss": 0.8746, "step": 34290 }, { "epoch": 0.41796156142980756, "grad_norm": 2.084148645401001, "learning_rate": 3.0634381013470178e-06, "loss": 0.8517, "step": 34295 }, { "epoch": 0.4180224976539554, "grad_norm": 1.9335920810699463, "learning_rate": 3.0631173829377808e-06, "loss": 0.8353, "step": 34300 }, { "epoch": 0.41808343387810315, "grad_norm": 1.7533318996429443, "learning_rate": 3.062796664528544e-06, "loss": 0.7471, "step": 34305 }, { "epoch": 0.418144370102251, "grad_norm": 2.0209765434265137, "learning_rate": 3.0624759461193077e-06, "loss": 0.8189, "step": 34310 }, { "epoch": 0.4182053063263988, "grad_norm": 1.7408124208450317, "learning_rate": 3.0621552277100707e-06, "loss": 0.8485, "step": 34315 }, { "epoch": 0.4182662425505466, "grad_norm": 1.9814766645431519, "learning_rate": 3.061834509300834e-06, "loss": 0.8612, "step": 34320 }, { "epoch": 0.4183271787746944, "grad_norm": 1.7413301467895508, "learning_rate": 3.0615137908915976e-06, "loss": 0.8298, "step": 34325 }, { "epoch": 0.4183881149988422, "grad_norm": 1.9081180095672607, "learning_rate": 3.0611930724823606e-06, "loss": 0.8577, "step": 34330 }, { "epoch": 0.41844905122299003, "grad_norm": 2.304175853729248, "learning_rate": 3.060872354073124e-06, "loss": 0.891, "step": 34335 }, { "epoch": 0.4185099874471378, "grad_norm": 1.8503055572509766, "learning_rate": 3.0605516356638875e-06, "loss": 0.8488, "step": 34340 }, { "epoch": 0.4185709236712856, "grad_norm": 1.6122926473617554, "learning_rate": 3.060230917254651e-06, "loss": 0.8499, "step": 34345 }, { "epoch": 0.41863185989543344, "grad_norm": 1.8756080865859985, "learning_rate": 3.059910198845414e-06, "loss": 0.8812, "step": 34350 }, { "epoch": 0.41869279611958127, "grad_norm": 2.0557680130004883, "learning_rate": 3.0595894804361773e-06, "loss": 0.8751, "step": 34355 }, { "epoch": 0.41875373234372903, "grad_norm": 1.906772494316101, "learning_rate": 3.059268762026941e-06, "loss": 0.8071, "step": 34360 }, { "epoch": 0.41881466856787686, "grad_norm": 2.0431838035583496, "learning_rate": 3.058948043617704e-06, "loss": 0.7869, "step": 34365 }, { "epoch": 0.4188756047920247, "grad_norm": 1.9639006853103638, "learning_rate": 3.058627325208467e-06, "loss": 0.8898, "step": 34370 }, { "epoch": 0.41893654101617245, "grad_norm": 1.7855679988861084, "learning_rate": 3.0583066067992307e-06, "loss": 0.8493, "step": 34375 }, { "epoch": 0.41899747724032027, "grad_norm": 1.875122308731079, "learning_rate": 3.0579858883899937e-06, "loss": 0.8719, "step": 34380 }, { "epoch": 0.4190584134644681, "grad_norm": 1.9459025859832764, "learning_rate": 3.0576651699807567e-06, "loss": 0.8467, "step": 34385 }, { "epoch": 0.4191193496886159, "grad_norm": 1.9266133308410645, "learning_rate": 3.0573444515715206e-06, "loss": 0.8525, "step": 34390 }, { "epoch": 0.4191802859127637, "grad_norm": 1.6326079368591309, "learning_rate": 3.0570237331622836e-06, "loss": 0.8239, "step": 34395 }, { "epoch": 0.4192412221369115, "grad_norm": 2.398414134979248, "learning_rate": 3.056703014753047e-06, "loss": 0.9075, "step": 34400 }, { "epoch": 0.41930215836105933, "grad_norm": 2.1216087341308594, "learning_rate": 3.0563822963438105e-06, "loss": 0.8758, "step": 34405 }, { "epoch": 0.4193630945852071, "grad_norm": 1.7253409624099731, "learning_rate": 3.0560615779345735e-06, "loss": 0.8912, "step": 34410 }, { "epoch": 0.4194240308093549, "grad_norm": 1.8514842987060547, "learning_rate": 3.055740859525337e-06, "loss": 0.8592, "step": 34415 }, { "epoch": 0.41948496703350274, "grad_norm": 1.9861425161361694, "learning_rate": 3.0554201411161004e-06, "loss": 0.8883, "step": 34420 }, { "epoch": 0.41954590325765057, "grad_norm": 2.1764676570892334, "learning_rate": 3.055099422706864e-06, "loss": 0.8112, "step": 34425 }, { "epoch": 0.41960683948179833, "grad_norm": 2.1011886596679688, "learning_rate": 3.054778704297627e-06, "loss": 0.7965, "step": 34430 }, { "epoch": 0.41966777570594616, "grad_norm": 2.0223019123077393, "learning_rate": 3.0544579858883903e-06, "loss": 0.8977, "step": 34435 }, { "epoch": 0.419728711930094, "grad_norm": 2.180190086364746, "learning_rate": 3.0541372674791537e-06, "loss": 0.8348, "step": 34440 }, { "epoch": 0.41978964815424175, "grad_norm": 2.029567241668701, "learning_rate": 3.0538165490699167e-06, "loss": 0.8696, "step": 34445 }, { "epoch": 0.41985058437838957, "grad_norm": 2.0846402645111084, "learning_rate": 3.0534958306606806e-06, "loss": 0.9037, "step": 34450 }, { "epoch": 0.4199115206025374, "grad_norm": 1.795817494392395, "learning_rate": 3.0531751122514436e-06, "loss": 0.8407, "step": 34455 }, { "epoch": 0.4199724568266852, "grad_norm": 2.0180182456970215, "learning_rate": 3.0528543938422066e-06, "loss": 0.8387, "step": 34460 }, { "epoch": 0.420033393050833, "grad_norm": 1.8194663524627686, "learning_rate": 3.0525336754329696e-06, "loss": 0.8717, "step": 34465 }, { "epoch": 0.4200943292749808, "grad_norm": 1.9401929378509521, "learning_rate": 3.0522129570237335e-06, "loss": 0.8725, "step": 34470 }, { "epoch": 0.4201552654991286, "grad_norm": 1.7493213415145874, "learning_rate": 3.0518922386144965e-06, "loss": 0.8453, "step": 34475 }, { "epoch": 0.4202162017232764, "grad_norm": 1.782426118850708, "learning_rate": 3.05157152020526e-06, "loss": 0.9522, "step": 34480 }, { "epoch": 0.4202771379474242, "grad_norm": 1.8082093000411987, "learning_rate": 3.0512508017960234e-06, "loss": 0.9078, "step": 34485 }, { "epoch": 0.42033807417157204, "grad_norm": 2.1366162300109863, "learning_rate": 3.050930083386787e-06, "loss": 0.896, "step": 34490 }, { "epoch": 0.42039901039571986, "grad_norm": 1.7968355417251587, "learning_rate": 3.05060936497755e-06, "loss": 0.857, "step": 34495 }, { "epoch": 0.42045994661986763, "grad_norm": 1.6092185974121094, "learning_rate": 3.0502886465683133e-06, "loss": 0.8403, "step": 34500 }, { "epoch": 0.42052088284401545, "grad_norm": 1.923703908920288, "learning_rate": 3.0499679281590767e-06, "loss": 0.922, "step": 34505 }, { "epoch": 0.4205818190681633, "grad_norm": 1.9568291902542114, "learning_rate": 3.0496472097498398e-06, "loss": 0.8585, "step": 34510 }, { "epoch": 0.42064275529231104, "grad_norm": 2.067513942718506, "learning_rate": 3.0493264913406036e-06, "loss": 0.8572, "step": 34515 }, { "epoch": 0.42070369151645887, "grad_norm": 1.9715194702148438, "learning_rate": 3.0490057729313666e-06, "loss": 0.8428, "step": 34520 }, { "epoch": 0.4207646277406067, "grad_norm": 1.6920570135116577, "learning_rate": 3.0486850545221297e-06, "loss": 0.8776, "step": 34525 }, { "epoch": 0.4208255639647545, "grad_norm": 1.7753463983535767, "learning_rate": 3.0483643361128935e-06, "loss": 0.8531, "step": 34530 }, { "epoch": 0.4208865001889023, "grad_norm": 2.0250675678253174, "learning_rate": 3.0480436177036565e-06, "loss": 0.8829, "step": 34535 }, { "epoch": 0.4209474364130501, "grad_norm": 1.8063706159591675, "learning_rate": 3.0477228992944196e-06, "loss": 0.819, "step": 34540 }, { "epoch": 0.4210083726371979, "grad_norm": 2.0857303142547607, "learning_rate": 3.047402180885183e-06, "loss": 0.8181, "step": 34545 }, { "epoch": 0.4210693088613457, "grad_norm": 1.7604955434799194, "learning_rate": 3.0470814624759464e-06, "loss": 0.8253, "step": 34550 }, { "epoch": 0.4211302450854935, "grad_norm": 2.7981021404266357, "learning_rate": 3.0467607440667095e-06, "loss": 0.8502, "step": 34555 }, { "epoch": 0.42119118130964134, "grad_norm": 2.091189384460449, "learning_rate": 3.046440025657473e-06, "loss": 0.8516, "step": 34560 }, { "epoch": 0.42125211753378916, "grad_norm": 2.1288223266601562, "learning_rate": 3.0461193072482363e-06, "loss": 0.7938, "step": 34565 }, { "epoch": 0.42131305375793693, "grad_norm": 1.7642568349838257, "learning_rate": 3.0457985888389998e-06, "loss": 0.8506, "step": 34570 }, { "epoch": 0.42137398998208475, "grad_norm": 1.9696263074874878, "learning_rate": 3.045477870429763e-06, "loss": 0.8274, "step": 34575 }, { "epoch": 0.4214349262062326, "grad_norm": 1.9554959535598755, "learning_rate": 3.0451571520205262e-06, "loss": 0.9156, "step": 34580 }, { "epoch": 0.42149586243038034, "grad_norm": 2.4245822429656982, "learning_rate": 3.0448364336112897e-06, "loss": 0.9152, "step": 34585 }, { "epoch": 0.42155679865452816, "grad_norm": 2.0084149837493896, "learning_rate": 3.0445157152020527e-06, "loss": 0.8852, "step": 34590 }, { "epoch": 0.421617734878676, "grad_norm": 2.2659900188446045, "learning_rate": 3.0441949967928166e-06, "loss": 0.8248, "step": 34595 }, { "epoch": 0.4216786711028238, "grad_norm": 1.8895907402038574, "learning_rate": 3.0438742783835796e-06, "loss": 0.8316, "step": 34600 }, { "epoch": 0.4217396073269716, "grad_norm": 2.0171258449554443, "learning_rate": 3.0435535599743426e-06, "loss": 0.8247, "step": 34605 }, { "epoch": 0.4218005435511194, "grad_norm": 1.9976816177368164, "learning_rate": 3.0432328415651065e-06, "loss": 0.824, "step": 34610 }, { "epoch": 0.4218614797752672, "grad_norm": 1.913918137550354, "learning_rate": 3.0429121231558695e-06, "loss": 0.8523, "step": 34615 }, { "epoch": 0.421922415999415, "grad_norm": 1.9925825595855713, "learning_rate": 3.0425914047466325e-06, "loss": 0.8108, "step": 34620 }, { "epoch": 0.4219833522235628, "grad_norm": 2.0453431606292725, "learning_rate": 3.042270686337396e-06, "loss": 0.8725, "step": 34625 }, { "epoch": 0.42204428844771064, "grad_norm": 1.9881912469863892, "learning_rate": 3.0419499679281594e-06, "loss": 0.9, "step": 34630 }, { "epoch": 0.42210522467185846, "grad_norm": 2.0668883323669434, "learning_rate": 3.0416292495189224e-06, "loss": 0.8423, "step": 34635 }, { "epoch": 0.4221661608960062, "grad_norm": 2.6961939334869385, "learning_rate": 3.041308531109686e-06, "loss": 0.8825, "step": 34640 }, { "epoch": 0.42222709712015405, "grad_norm": 2.098550319671631, "learning_rate": 3.0409878127004493e-06, "loss": 0.9289, "step": 34645 }, { "epoch": 0.42228803334430187, "grad_norm": 2.02864670753479, "learning_rate": 3.0406670942912127e-06, "loss": 0.855, "step": 34650 }, { "epoch": 0.42234896956844964, "grad_norm": 1.957326054573059, "learning_rate": 3.0403463758819757e-06, "loss": 0.8899, "step": 34655 }, { "epoch": 0.42240990579259746, "grad_norm": 2.3394551277160645, "learning_rate": 3.040025657472739e-06, "loss": 0.7834, "step": 34660 }, { "epoch": 0.4224708420167453, "grad_norm": 2.012199878692627, "learning_rate": 3.0397049390635026e-06, "loss": 0.8341, "step": 34665 }, { "epoch": 0.4225317782408931, "grad_norm": 1.8743131160736084, "learning_rate": 3.0393842206542656e-06, "loss": 0.8295, "step": 34670 }, { "epoch": 0.4225927144650409, "grad_norm": 2.009448766708374, "learning_rate": 3.0390635022450295e-06, "loss": 0.8547, "step": 34675 }, { "epoch": 0.4226536506891887, "grad_norm": 1.6226965188980103, "learning_rate": 3.0387427838357925e-06, "loss": 0.7903, "step": 34680 }, { "epoch": 0.4227145869133365, "grad_norm": 2.1439056396484375, "learning_rate": 3.0384220654265555e-06, "loss": 0.855, "step": 34685 }, { "epoch": 0.4227755231374843, "grad_norm": 1.8627113103866577, "learning_rate": 3.0381013470173194e-06, "loss": 0.8924, "step": 34690 }, { "epoch": 0.4228364593616321, "grad_norm": 1.7858272790908813, "learning_rate": 3.0377806286080824e-06, "loss": 0.9187, "step": 34695 }, { "epoch": 0.42289739558577993, "grad_norm": 2.4000420570373535, "learning_rate": 3.0374599101988454e-06, "loss": 0.9186, "step": 34700 }, { "epoch": 0.42295833180992776, "grad_norm": 1.6152210235595703, "learning_rate": 3.037139191789609e-06, "loss": 0.8513, "step": 34705 }, { "epoch": 0.4230192680340755, "grad_norm": 1.919111967086792, "learning_rate": 3.0368184733803723e-06, "loss": 0.7921, "step": 34710 }, { "epoch": 0.42308020425822335, "grad_norm": 2.056588649749756, "learning_rate": 3.0364977549711357e-06, "loss": 0.9068, "step": 34715 }, { "epoch": 0.42314114048237117, "grad_norm": 1.8509117364883423, "learning_rate": 3.0361770365618987e-06, "loss": 0.8321, "step": 34720 }, { "epoch": 0.42320207670651894, "grad_norm": 1.6541450023651123, "learning_rate": 3.035856318152662e-06, "loss": 0.8649, "step": 34725 }, { "epoch": 0.42326301293066676, "grad_norm": 2.0707814693450928, "learning_rate": 3.0355355997434256e-06, "loss": 0.8142, "step": 34730 }, { "epoch": 0.4233239491548146, "grad_norm": 2.074657678604126, "learning_rate": 3.0352148813341886e-06, "loss": 0.9288, "step": 34735 }, { "epoch": 0.4233848853789624, "grad_norm": 2.051236391067505, "learning_rate": 3.0348941629249525e-06, "loss": 0.8682, "step": 34740 }, { "epoch": 0.4234458216031102, "grad_norm": 2.0955450534820557, "learning_rate": 3.0345734445157155e-06, "loss": 0.8862, "step": 34745 }, { "epoch": 0.423506757827258, "grad_norm": 1.9925963878631592, "learning_rate": 3.0342527261064785e-06, "loss": 0.8371, "step": 34750 }, { "epoch": 0.4235676940514058, "grad_norm": 2.0020716190338135, "learning_rate": 3.0339320076972424e-06, "loss": 0.7817, "step": 34755 }, { "epoch": 0.4236286302755536, "grad_norm": 1.9074870347976685, "learning_rate": 3.0336112892880054e-06, "loss": 0.8206, "step": 34760 }, { "epoch": 0.4236895664997014, "grad_norm": 2.445516347885132, "learning_rate": 3.0332905708787684e-06, "loss": 0.9325, "step": 34765 }, { "epoch": 0.42375050272384923, "grad_norm": 1.8392032384872437, "learning_rate": 3.0329698524695323e-06, "loss": 0.8444, "step": 34770 }, { "epoch": 0.423811438947997, "grad_norm": 2.1093571186065674, "learning_rate": 3.0326491340602953e-06, "loss": 0.756, "step": 34775 }, { "epoch": 0.4238723751721448, "grad_norm": 2.1000800132751465, "learning_rate": 3.0323284156510583e-06, "loss": 0.8433, "step": 34780 }, { "epoch": 0.42393331139629264, "grad_norm": 2.0618536472320557, "learning_rate": 3.032007697241822e-06, "loss": 0.8063, "step": 34785 }, { "epoch": 0.42399424762044047, "grad_norm": 2.017141819000244, "learning_rate": 3.0316869788325852e-06, "loss": 0.8634, "step": 34790 }, { "epoch": 0.42405518384458823, "grad_norm": 2.011598825454712, "learning_rate": 3.0313662604233487e-06, "loss": 0.9123, "step": 34795 }, { "epoch": 0.42411612006873606, "grad_norm": 1.8545407056808472, "learning_rate": 3.0310455420141117e-06, "loss": 0.8273, "step": 34800 }, { "epoch": 0.4241770562928839, "grad_norm": 2.133140802383423, "learning_rate": 3.030724823604875e-06, "loss": 0.8421, "step": 34805 }, { "epoch": 0.42423799251703165, "grad_norm": 2.112703800201416, "learning_rate": 3.0304041051956386e-06, "loss": 0.817, "step": 34810 }, { "epoch": 0.42429892874117947, "grad_norm": 2.2525858879089355, "learning_rate": 3.0300833867864016e-06, "loss": 0.8668, "step": 34815 }, { "epoch": 0.4243598649653273, "grad_norm": 2.0456442832946777, "learning_rate": 3.0297626683771654e-06, "loss": 0.8892, "step": 34820 }, { "epoch": 0.4244208011894751, "grad_norm": 2.034263849258423, "learning_rate": 3.0294419499679285e-06, "loss": 0.8293, "step": 34825 }, { "epoch": 0.4244817374136229, "grad_norm": 1.7971152067184448, "learning_rate": 3.0291212315586915e-06, "loss": 0.843, "step": 34830 }, { "epoch": 0.4245426736377707, "grad_norm": 1.9330731630325317, "learning_rate": 3.0288005131494553e-06, "loss": 0.8838, "step": 34835 }, { "epoch": 0.42460360986191853, "grad_norm": 2.1175174713134766, "learning_rate": 3.0284797947402184e-06, "loss": 0.8384, "step": 34840 }, { "epoch": 0.4246645460860663, "grad_norm": 1.6148998737335205, "learning_rate": 3.0281590763309814e-06, "loss": 0.8172, "step": 34845 }, { "epoch": 0.4247254823102141, "grad_norm": 1.7704728841781616, "learning_rate": 3.0278383579217452e-06, "loss": 0.8825, "step": 34850 }, { "epoch": 0.42478641853436194, "grad_norm": 2.069131374359131, "learning_rate": 3.0275176395125082e-06, "loss": 0.7777, "step": 34855 }, { "epoch": 0.42484735475850977, "grad_norm": 1.8831712007522583, "learning_rate": 3.0271969211032713e-06, "loss": 0.8546, "step": 34860 }, { "epoch": 0.42490829098265753, "grad_norm": 1.967067003250122, "learning_rate": 3.026876202694035e-06, "loss": 0.9393, "step": 34865 }, { "epoch": 0.42496922720680536, "grad_norm": 2.340771198272705, "learning_rate": 3.026555484284798e-06, "loss": 0.8173, "step": 34870 }, { "epoch": 0.4250301634309532, "grad_norm": 1.8147883415222168, "learning_rate": 3.0262347658755616e-06, "loss": 0.8845, "step": 34875 }, { "epoch": 0.42509109965510095, "grad_norm": 2.0111842155456543, "learning_rate": 3.0259140474663246e-06, "loss": 0.8156, "step": 34880 }, { "epoch": 0.42515203587924877, "grad_norm": 1.799464464187622, "learning_rate": 3.025593329057088e-06, "loss": 0.8963, "step": 34885 }, { "epoch": 0.4252129721033966, "grad_norm": 2.3646674156188965, "learning_rate": 3.0252726106478515e-06, "loss": 0.941, "step": 34890 }, { "epoch": 0.4252739083275444, "grad_norm": 1.9763877391815186, "learning_rate": 3.0249518922386145e-06, "loss": 0.8716, "step": 34895 }, { "epoch": 0.4253348445516922, "grad_norm": 1.6746872663497925, "learning_rate": 3.0246311738293784e-06, "loss": 0.8933, "step": 34900 }, { "epoch": 0.42539578077584, "grad_norm": 1.9520343542099, "learning_rate": 3.0243104554201414e-06, "loss": 0.8364, "step": 34905 }, { "epoch": 0.4254567169999878, "grad_norm": 1.7354085445404053, "learning_rate": 3.0239897370109044e-06, "loss": 0.9574, "step": 34910 }, { "epoch": 0.4255176532241356, "grad_norm": 1.8482871055603027, "learning_rate": 3.0236690186016683e-06, "loss": 0.8424, "step": 34915 }, { "epoch": 0.4255785894482834, "grad_norm": 1.8391783237457275, "learning_rate": 3.0233483001924313e-06, "loss": 0.7773, "step": 34920 }, { "epoch": 0.42563952567243124, "grad_norm": 1.7442457675933838, "learning_rate": 3.0230275817831943e-06, "loss": 0.8405, "step": 34925 }, { "epoch": 0.42570046189657906, "grad_norm": 2.1378602981567383, "learning_rate": 3.022706863373958e-06, "loss": 0.9294, "step": 34930 }, { "epoch": 0.42576139812072683, "grad_norm": 1.8463609218597412, "learning_rate": 3.022386144964721e-06, "loss": 0.8624, "step": 34935 }, { "epoch": 0.42582233434487465, "grad_norm": 1.917428970336914, "learning_rate": 3.0220654265554846e-06, "loss": 0.9368, "step": 34940 }, { "epoch": 0.4258832705690225, "grad_norm": 2.0616719722747803, "learning_rate": 3.021744708146248e-06, "loss": 0.7472, "step": 34945 }, { "epoch": 0.42594420679317024, "grad_norm": 1.7811261415481567, "learning_rate": 3.021423989737011e-06, "loss": 0.8674, "step": 34950 }, { "epoch": 0.42600514301731807, "grad_norm": 1.824055790901184, "learning_rate": 3.0211032713277745e-06, "loss": 0.7973, "step": 34955 }, { "epoch": 0.4260660792414659, "grad_norm": 1.6401335000991821, "learning_rate": 3.0207825529185375e-06, "loss": 0.8872, "step": 34960 }, { "epoch": 0.4261270154656137, "grad_norm": 1.8605221509933472, "learning_rate": 3.0204618345093014e-06, "loss": 0.876, "step": 34965 }, { "epoch": 0.4261879516897615, "grad_norm": 2.1520867347717285, "learning_rate": 3.0201411161000644e-06, "loss": 0.8469, "step": 34970 }, { "epoch": 0.4262488879139093, "grad_norm": 2.1795544624328613, "learning_rate": 3.0198203976908274e-06, "loss": 0.8277, "step": 34975 }, { "epoch": 0.4263098241380571, "grad_norm": 1.823663592338562, "learning_rate": 3.0194996792815913e-06, "loss": 0.8279, "step": 34980 }, { "epoch": 0.4263707603622049, "grad_norm": 1.824204444885254, "learning_rate": 3.0191789608723543e-06, "loss": 0.8377, "step": 34985 }, { "epoch": 0.4264316965863527, "grad_norm": 2.2877159118652344, "learning_rate": 3.0188582424631173e-06, "loss": 0.8758, "step": 34990 }, { "epoch": 0.42649263281050054, "grad_norm": 1.900290608406067, "learning_rate": 3.018537524053881e-06, "loss": 0.8449, "step": 34995 }, { "epoch": 0.42655356903464836, "grad_norm": 2.105480670928955, "learning_rate": 3.018216805644644e-06, "loss": 0.8464, "step": 35000 }, { "epoch": 0.42661450525879613, "grad_norm": 1.9395949840545654, "learning_rate": 3.0178960872354072e-06, "loss": 0.815, "step": 35005 }, { "epoch": 0.42667544148294395, "grad_norm": 2.040468215942383, "learning_rate": 3.017575368826171e-06, "loss": 0.9265, "step": 35010 }, { "epoch": 0.4267363777070918, "grad_norm": 1.9809613227844238, "learning_rate": 3.017254650416934e-06, "loss": 0.8408, "step": 35015 }, { "epoch": 0.42679731393123954, "grad_norm": 2.4834704399108887, "learning_rate": 3.0169339320076975e-06, "loss": 0.885, "step": 35020 }, { "epoch": 0.42685825015538736, "grad_norm": 1.8308268785476685, "learning_rate": 3.016613213598461e-06, "loss": 0.7934, "step": 35025 }, { "epoch": 0.4269191863795352, "grad_norm": 1.9200525283813477, "learning_rate": 3.016292495189224e-06, "loss": 0.7448, "step": 35030 }, { "epoch": 0.426980122603683, "grad_norm": 2.248523473739624, "learning_rate": 3.0159717767799874e-06, "loss": 0.8467, "step": 35035 }, { "epoch": 0.4270410588278308, "grad_norm": 1.7878966331481934, "learning_rate": 3.0156510583707505e-06, "loss": 0.8706, "step": 35040 }, { "epoch": 0.4271019950519786, "grad_norm": 2.415555715560913, "learning_rate": 3.0153303399615143e-06, "loss": 0.8851, "step": 35045 }, { "epoch": 0.4271629312761264, "grad_norm": 1.8682003021240234, "learning_rate": 3.0150096215522773e-06, "loss": 0.9225, "step": 35050 }, { "epoch": 0.4272238675002742, "grad_norm": 1.823752999305725, "learning_rate": 3.0146889031430404e-06, "loss": 0.8572, "step": 35055 }, { "epoch": 0.427284803724422, "grad_norm": 1.7124028205871582, "learning_rate": 3.0143681847338042e-06, "loss": 0.8705, "step": 35060 }, { "epoch": 0.42734573994856984, "grad_norm": 1.6851435899734497, "learning_rate": 3.0140474663245672e-06, "loss": 0.8795, "step": 35065 }, { "epoch": 0.42740667617271766, "grad_norm": 1.7731784582138062, "learning_rate": 3.0137267479153303e-06, "loss": 0.8311, "step": 35070 }, { "epoch": 0.4274676123968654, "grad_norm": 2.091540575027466, "learning_rate": 3.013406029506094e-06, "loss": 0.8129, "step": 35075 }, { "epoch": 0.42752854862101325, "grad_norm": 1.891697883605957, "learning_rate": 3.013085311096857e-06, "loss": 0.7916, "step": 35080 }, { "epoch": 0.42758948484516107, "grad_norm": 1.8777456283569336, "learning_rate": 3.01276459268762e-06, "loss": 0.9708, "step": 35085 }, { "epoch": 0.42765042106930884, "grad_norm": 2.0186781883239746, "learning_rate": 3.012443874278384e-06, "loss": 0.8612, "step": 35090 }, { "epoch": 0.42771135729345666, "grad_norm": 1.8039456605911255, "learning_rate": 3.012123155869147e-06, "loss": 0.8445, "step": 35095 }, { "epoch": 0.4277722935176045, "grad_norm": 1.8665069341659546, "learning_rate": 3.0118024374599105e-06, "loss": 0.8122, "step": 35100 }, { "epoch": 0.4278332297417523, "grad_norm": 2.0261969566345215, "learning_rate": 3.011481719050674e-06, "loss": 0.8573, "step": 35105 }, { "epoch": 0.4278941659659001, "grad_norm": 1.9443708658218384, "learning_rate": 3.011161000641437e-06, "loss": 0.8122, "step": 35110 }, { "epoch": 0.4279551021900479, "grad_norm": 1.9385428428649902, "learning_rate": 3.0108402822322004e-06, "loss": 0.8162, "step": 35115 }, { "epoch": 0.4280160384141957, "grad_norm": 1.8351632356643677, "learning_rate": 3.010519563822964e-06, "loss": 0.8198, "step": 35120 }, { "epoch": 0.4280769746383435, "grad_norm": 2.194816827774048, "learning_rate": 3.0101988454137272e-06, "loss": 0.7927, "step": 35125 }, { "epoch": 0.4281379108624913, "grad_norm": 1.9374415874481201, "learning_rate": 3.0098781270044903e-06, "loss": 0.7932, "step": 35130 }, { "epoch": 0.42819884708663913, "grad_norm": 1.9894044399261475, "learning_rate": 3.0095574085952533e-06, "loss": 0.8107, "step": 35135 }, { "epoch": 0.42825978331078696, "grad_norm": 1.825574278831482, "learning_rate": 3.009236690186017e-06, "loss": 0.8991, "step": 35140 }, { "epoch": 0.4283207195349347, "grad_norm": 1.8300666809082031, "learning_rate": 3.00891597177678e-06, "loss": 0.8557, "step": 35145 }, { "epoch": 0.42838165575908255, "grad_norm": 2.40681529045105, "learning_rate": 3.008595253367543e-06, "loss": 0.8582, "step": 35150 }, { "epoch": 0.42844259198323037, "grad_norm": 1.7431262731552124, "learning_rate": 3.008274534958307e-06, "loss": 0.8387, "step": 35155 }, { "epoch": 0.42850352820737814, "grad_norm": 1.9463016986846924, "learning_rate": 3.00795381654907e-06, "loss": 0.8161, "step": 35160 }, { "epoch": 0.42856446443152596, "grad_norm": 1.772004246711731, "learning_rate": 3.007633098139833e-06, "loss": 0.8952, "step": 35165 }, { "epoch": 0.4286254006556738, "grad_norm": 1.8591254949569702, "learning_rate": 3.007312379730597e-06, "loss": 0.8528, "step": 35170 }, { "epoch": 0.4286863368798216, "grad_norm": 2.112290859222412, "learning_rate": 3.00699166132136e-06, "loss": 0.8867, "step": 35175 }, { "epoch": 0.4287472731039694, "grad_norm": 1.913154125213623, "learning_rate": 3.0066709429121234e-06, "loss": 0.8941, "step": 35180 }, { "epoch": 0.4288082093281172, "grad_norm": 2.1001386642456055, "learning_rate": 3.006350224502887e-06, "loss": 0.879, "step": 35185 }, { "epoch": 0.428869145552265, "grad_norm": 2.1185216903686523, "learning_rate": 3.0060295060936503e-06, "loss": 0.842, "step": 35190 }, { "epoch": 0.4289300817764128, "grad_norm": 1.8886778354644775, "learning_rate": 3.0057087876844133e-06, "loss": 0.8648, "step": 35195 }, { "epoch": 0.4289910180005606, "grad_norm": 1.7109298706054688, "learning_rate": 3.0053880692751767e-06, "loss": 0.8817, "step": 35200 }, { "epoch": 0.42905195422470843, "grad_norm": 1.9474726915359497, "learning_rate": 3.00506735086594e-06, "loss": 0.7917, "step": 35205 }, { "epoch": 0.42911289044885625, "grad_norm": 2.471132516860962, "learning_rate": 3.004746632456703e-06, "loss": 0.858, "step": 35210 }, { "epoch": 0.429173826673004, "grad_norm": 1.7686076164245605, "learning_rate": 3.004425914047466e-06, "loss": 0.8007, "step": 35215 }, { "epoch": 0.42923476289715184, "grad_norm": 2.1542890071868896, "learning_rate": 3.00410519563823e-06, "loss": 0.8682, "step": 35220 }, { "epoch": 0.42929569912129967, "grad_norm": 1.7449568510055542, "learning_rate": 3.003784477228993e-06, "loss": 0.8117, "step": 35225 }, { "epoch": 0.42935663534544743, "grad_norm": 2.1194968223571777, "learning_rate": 3.003463758819756e-06, "loss": 0.8579, "step": 35230 }, { "epoch": 0.42941757156959526, "grad_norm": 2.4138715267181396, "learning_rate": 3.00314304041052e-06, "loss": 0.8232, "step": 35235 }, { "epoch": 0.4294785077937431, "grad_norm": 2.2922208309173584, "learning_rate": 3.002822322001283e-06, "loss": 0.8694, "step": 35240 }, { "epoch": 0.42953944401789085, "grad_norm": 1.7806830406188965, "learning_rate": 3.0025016035920464e-06, "loss": 0.8669, "step": 35245 }, { "epoch": 0.42960038024203867, "grad_norm": 1.775052547454834, "learning_rate": 3.00218088518281e-06, "loss": 0.8609, "step": 35250 }, { "epoch": 0.4296613164661865, "grad_norm": 1.7954972982406616, "learning_rate": 3.001860166773573e-06, "loss": 0.8464, "step": 35255 }, { "epoch": 0.4297222526903343, "grad_norm": 2.043856620788574, "learning_rate": 3.0015394483643363e-06, "loss": 0.8414, "step": 35260 }, { "epoch": 0.4297831889144821, "grad_norm": 1.845327377319336, "learning_rate": 3.0012187299550998e-06, "loss": 0.8436, "step": 35265 }, { "epoch": 0.4298441251386299, "grad_norm": 1.6376858949661255, "learning_rate": 3.000898011545863e-06, "loss": 0.812, "step": 35270 }, { "epoch": 0.42990506136277773, "grad_norm": 1.8603029251098633, "learning_rate": 3.0005772931366262e-06, "loss": 0.8851, "step": 35275 }, { "epoch": 0.4299659975869255, "grad_norm": 1.8054569959640503, "learning_rate": 3.0002565747273897e-06, "loss": 0.7972, "step": 35280 }, { "epoch": 0.4300269338110733, "grad_norm": 1.9372564554214478, "learning_rate": 2.999935856318153e-06, "loss": 0.8315, "step": 35285 }, { "epoch": 0.43008787003522114, "grad_norm": 1.8627780675888062, "learning_rate": 2.999615137908916e-06, "loss": 0.8187, "step": 35290 }, { "epoch": 0.43014880625936897, "grad_norm": 2.1684110164642334, "learning_rate": 2.999294419499679e-06, "loss": 0.9701, "step": 35295 }, { "epoch": 0.43020974248351673, "grad_norm": 2.5353758335113525, "learning_rate": 2.998973701090443e-06, "loss": 0.7103, "step": 35300 }, { "epoch": 0.43027067870766456, "grad_norm": 1.8729889392852783, "learning_rate": 2.998652982681206e-06, "loss": 0.8401, "step": 35305 }, { "epoch": 0.4303316149318124, "grad_norm": 1.8447363376617432, "learning_rate": 2.998332264271969e-06, "loss": 0.8435, "step": 35310 }, { "epoch": 0.43039255115596015, "grad_norm": 1.8923618793487549, "learning_rate": 2.998011545862733e-06, "loss": 0.8121, "step": 35315 }, { "epoch": 0.43045348738010797, "grad_norm": 2.323284864425659, "learning_rate": 2.997690827453496e-06, "loss": 0.8245, "step": 35320 }, { "epoch": 0.4305144236042558, "grad_norm": 1.9138222932815552, "learning_rate": 2.9973701090442594e-06, "loss": 0.8238, "step": 35325 }, { "epoch": 0.4305753598284036, "grad_norm": 1.7309703826904297, "learning_rate": 2.997049390635023e-06, "loss": 0.9049, "step": 35330 }, { "epoch": 0.4306362960525514, "grad_norm": 2.1089093685150146, "learning_rate": 2.996728672225786e-06, "loss": 0.9001, "step": 35335 }, { "epoch": 0.4306972322766992, "grad_norm": 1.9621626138687134, "learning_rate": 2.9964079538165493e-06, "loss": 0.8206, "step": 35340 }, { "epoch": 0.430758168500847, "grad_norm": 1.6983230113983154, "learning_rate": 2.9960872354073127e-06, "loss": 0.7644, "step": 35345 }, { "epoch": 0.4308191047249948, "grad_norm": 1.721256971359253, "learning_rate": 2.995766516998076e-06, "loss": 0.8217, "step": 35350 }, { "epoch": 0.4308800409491426, "grad_norm": 1.9242773056030273, "learning_rate": 2.995445798588839e-06, "loss": 0.8792, "step": 35355 }, { "epoch": 0.43094097717329044, "grad_norm": 2.0485458374023438, "learning_rate": 2.9951250801796026e-06, "loss": 0.8875, "step": 35360 }, { "epoch": 0.43100191339743826, "grad_norm": 1.793670892715454, "learning_rate": 2.994804361770366e-06, "loss": 0.8225, "step": 35365 }, { "epoch": 0.43106284962158603, "grad_norm": 1.8358486890792847, "learning_rate": 2.994483643361129e-06, "loss": 0.8377, "step": 35370 }, { "epoch": 0.43112378584573385, "grad_norm": 1.7492398023605347, "learning_rate": 2.994162924951893e-06, "loss": 0.8346, "step": 35375 }, { "epoch": 0.4311847220698817, "grad_norm": 1.8819862604141235, "learning_rate": 2.993842206542656e-06, "loss": 0.902, "step": 35380 }, { "epoch": 0.43124565829402944, "grad_norm": 1.9258686304092407, "learning_rate": 2.993521488133419e-06, "loss": 0.795, "step": 35385 }, { "epoch": 0.43130659451817727, "grad_norm": 2.1529765129089355, "learning_rate": 2.993200769724182e-06, "loss": 0.8504, "step": 35390 }, { "epoch": 0.4313675307423251, "grad_norm": 2.079080104827881, "learning_rate": 2.992880051314946e-06, "loss": 0.8267, "step": 35395 }, { "epoch": 0.4314284669664729, "grad_norm": 1.6021560430526733, "learning_rate": 2.992559332905709e-06, "loss": 0.8213, "step": 35400 }, { "epoch": 0.4314894031906207, "grad_norm": 2.1287193298339844, "learning_rate": 2.9922386144964723e-06, "loss": 0.8252, "step": 35405 }, { "epoch": 0.4315503394147685, "grad_norm": 2.1213009357452393, "learning_rate": 2.9919178960872357e-06, "loss": 0.8854, "step": 35410 }, { "epoch": 0.4316112756389163, "grad_norm": 1.6531974077224731, "learning_rate": 2.991597177677999e-06, "loss": 0.8797, "step": 35415 }, { "epoch": 0.4316722118630641, "grad_norm": 2.3426554203033447, "learning_rate": 2.991276459268762e-06, "loss": 0.8333, "step": 35420 }, { "epoch": 0.4317331480872119, "grad_norm": 1.8037140369415283, "learning_rate": 2.9909557408595256e-06, "loss": 0.8613, "step": 35425 }, { "epoch": 0.43179408431135974, "grad_norm": 2.205167531967163, "learning_rate": 2.990635022450289e-06, "loss": 0.8615, "step": 35430 }, { "epoch": 0.43185502053550756, "grad_norm": 2.1060938835144043, "learning_rate": 2.990314304041052e-06, "loss": 0.9172, "step": 35435 }, { "epoch": 0.43191595675965533, "grad_norm": 2.1252102851867676, "learning_rate": 2.989993585631816e-06, "loss": 0.8933, "step": 35440 }, { "epoch": 0.43197689298380315, "grad_norm": 1.8521524667739868, "learning_rate": 2.989672867222579e-06, "loss": 0.8388, "step": 35445 }, { "epoch": 0.432037829207951, "grad_norm": 2.132460594177246, "learning_rate": 2.989352148813342e-06, "loss": 0.7853, "step": 35450 }, { "epoch": 0.43209876543209874, "grad_norm": 1.9930739402770996, "learning_rate": 2.989031430404106e-06, "loss": 0.8457, "step": 35455 }, { "epoch": 0.43215970165624656, "grad_norm": 1.792519450187683, "learning_rate": 2.988710711994869e-06, "loss": 0.8493, "step": 35460 }, { "epoch": 0.4322206378803944, "grad_norm": 1.6325297355651855, "learning_rate": 2.988389993585632e-06, "loss": 0.9004, "step": 35465 }, { "epoch": 0.4322815741045422, "grad_norm": 1.6297932863235474, "learning_rate": 2.9880692751763953e-06, "loss": 0.8381, "step": 35470 }, { "epoch": 0.43234251032869, "grad_norm": 1.9272172451019287, "learning_rate": 2.9877485567671588e-06, "loss": 0.8855, "step": 35475 }, { "epoch": 0.4324034465528378, "grad_norm": 2.0236616134643555, "learning_rate": 2.9874278383579218e-06, "loss": 0.8713, "step": 35480 }, { "epoch": 0.4324643827769856, "grad_norm": 2.1144251823425293, "learning_rate": 2.987107119948685e-06, "loss": 0.8837, "step": 35485 }, { "epoch": 0.4325253190011334, "grad_norm": 1.8892436027526855, "learning_rate": 2.9867864015394486e-06, "loss": 0.8734, "step": 35490 }, { "epoch": 0.4325862552252812, "grad_norm": 2.0610733032226562, "learning_rate": 2.986465683130212e-06, "loss": 0.9011, "step": 35495 }, { "epoch": 0.43264719144942904, "grad_norm": 1.9195882081985474, "learning_rate": 2.986144964720975e-06, "loss": 0.8459, "step": 35500 }, { "epoch": 0.43270812767357686, "grad_norm": 2.7486283779144287, "learning_rate": 2.9858242463117385e-06, "loss": 0.8219, "step": 35505 }, { "epoch": 0.4327690638977246, "grad_norm": 2.3247528076171875, "learning_rate": 2.985503527902502e-06, "loss": 0.7813, "step": 35510 }, { "epoch": 0.43283000012187245, "grad_norm": 2.1004650592803955, "learning_rate": 2.985182809493265e-06, "loss": 0.9367, "step": 35515 }, { "epoch": 0.43289093634602027, "grad_norm": 1.9451673030853271, "learning_rate": 2.984862091084029e-06, "loss": 0.8695, "step": 35520 }, { "epoch": 0.43295187257016804, "grad_norm": 2.0061659812927246, "learning_rate": 2.984541372674792e-06, "loss": 0.8369, "step": 35525 }, { "epoch": 0.43301280879431586, "grad_norm": 2.0384914875030518, "learning_rate": 2.984220654265555e-06, "loss": 0.8921, "step": 35530 }, { "epoch": 0.4330737450184637, "grad_norm": 2.2516157627105713, "learning_rate": 2.9838999358563188e-06, "loss": 0.7859, "step": 35535 }, { "epoch": 0.4331346812426115, "grad_norm": 2.055418014526367, "learning_rate": 2.9835792174470818e-06, "loss": 0.8203, "step": 35540 }, { "epoch": 0.4331956174667593, "grad_norm": 1.783906102180481, "learning_rate": 2.983258499037845e-06, "loss": 0.8584, "step": 35545 }, { "epoch": 0.4332565536909071, "grad_norm": 1.9194668531417847, "learning_rate": 2.9829377806286082e-06, "loss": 0.8843, "step": 35550 }, { "epoch": 0.4333174899150549, "grad_norm": 1.9866914749145508, "learning_rate": 2.9826170622193717e-06, "loss": 0.8333, "step": 35555 }, { "epoch": 0.4333784261392027, "grad_norm": 2.041564464569092, "learning_rate": 2.9822963438101347e-06, "loss": 0.8205, "step": 35560 }, { "epoch": 0.4334393623633505, "grad_norm": 1.8229727745056152, "learning_rate": 2.981975625400898e-06, "loss": 0.855, "step": 35565 }, { "epoch": 0.43350029858749833, "grad_norm": 1.877631664276123, "learning_rate": 2.9816549069916616e-06, "loss": 0.8874, "step": 35570 }, { "epoch": 0.43356123481164616, "grad_norm": 1.5674262046813965, "learning_rate": 2.981334188582425e-06, "loss": 0.8071, "step": 35575 }, { "epoch": 0.4336221710357939, "grad_norm": 1.8238335847854614, "learning_rate": 2.981013470173188e-06, "loss": 0.9004, "step": 35580 }, { "epoch": 0.43368310725994175, "grad_norm": 1.8686658143997192, "learning_rate": 2.9806927517639515e-06, "loss": 0.8553, "step": 35585 }, { "epoch": 0.43374404348408957, "grad_norm": 2.047395944595337, "learning_rate": 2.980372033354715e-06, "loss": 0.852, "step": 35590 }, { "epoch": 0.43380497970823734, "grad_norm": 2.1578047275543213, "learning_rate": 2.980051314945478e-06, "loss": 0.807, "step": 35595 }, { "epoch": 0.43386591593238516, "grad_norm": 2.0612754821777344, "learning_rate": 2.979730596536242e-06, "loss": 0.8507, "step": 35600 }, { "epoch": 0.433926852156533, "grad_norm": 2.525763988494873, "learning_rate": 2.979409878127005e-06, "loss": 0.8525, "step": 35605 }, { "epoch": 0.4339877883806808, "grad_norm": 2.2121059894561768, "learning_rate": 2.979089159717768e-06, "loss": 0.8326, "step": 35610 }, { "epoch": 0.4340487246048286, "grad_norm": 1.873390793800354, "learning_rate": 2.9787684413085317e-06, "loss": 0.8706, "step": 35615 }, { "epoch": 0.4341096608289764, "grad_norm": 2.1642565727233887, "learning_rate": 2.9784477228992947e-06, "loss": 0.8689, "step": 35620 }, { "epoch": 0.4341705970531242, "grad_norm": 1.731722116470337, "learning_rate": 2.9781270044900577e-06, "loss": 0.7719, "step": 35625 }, { "epoch": 0.434231533277272, "grad_norm": 1.7879583835601807, "learning_rate": 2.977806286080821e-06, "loss": 0.7931, "step": 35630 }, { "epoch": 0.4342924695014198, "grad_norm": 1.9098734855651855, "learning_rate": 2.9774855676715846e-06, "loss": 0.8596, "step": 35635 }, { "epoch": 0.43435340572556763, "grad_norm": 2.387258529663086, "learning_rate": 2.9771648492623476e-06, "loss": 0.8677, "step": 35640 }, { "epoch": 0.43441434194971545, "grad_norm": 1.7079371213912964, "learning_rate": 2.976844130853111e-06, "loss": 0.801, "step": 35645 }, { "epoch": 0.4344752781738632, "grad_norm": 1.8964030742645264, "learning_rate": 2.9765234124438745e-06, "loss": 0.8784, "step": 35650 }, { "epoch": 0.43453621439801104, "grad_norm": 1.6166248321533203, "learning_rate": 2.976202694034638e-06, "loss": 0.8527, "step": 35655 }, { "epoch": 0.43459715062215887, "grad_norm": 1.8456941843032837, "learning_rate": 2.975881975625401e-06, "loss": 0.7915, "step": 35660 }, { "epoch": 0.43465808684630663, "grad_norm": 1.7703936100006104, "learning_rate": 2.975561257216165e-06, "loss": 0.7761, "step": 35665 }, { "epoch": 0.43471902307045446, "grad_norm": 1.806633710861206, "learning_rate": 2.975240538806928e-06, "loss": 0.9325, "step": 35670 }, { "epoch": 0.4347799592946023, "grad_norm": 1.9140263795852661, "learning_rate": 2.974919820397691e-06, "loss": 0.8907, "step": 35675 }, { "epoch": 0.4348408955187501, "grad_norm": 1.9859904050827026, "learning_rate": 2.9745991019884547e-06, "loss": 0.8058, "step": 35680 }, { "epoch": 0.43490183174289787, "grad_norm": 2.5524775981903076, "learning_rate": 2.9742783835792177e-06, "loss": 0.8476, "step": 35685 }, { "epoch": 0.4349627679670457, "grad_norm": 2.1179721355438232, "learning_rate": 2.9739576651699808e-06, "loss": 0.8565, "step": 35690 }, { "epoch": 0.4350237041911935, "grad_norm": 1.9042565822601318, "learning_rate": 2.9736369467607446e-06, "loss": 0.8, "step": 35695 }, { "epoch": 0.4350846404153413, "grad_norm": 1.860410451889038, "learning_rate": 2.9733162283515076e-06, "loss": 0.8954, "step": 35700 }, { "epoch": 0.4351455766394891, "grad_norm": 2.156912326812744, "learning_rate": 2.9729955099422707e-06, "loss": 0.8348, "step": 35705 }, { "epoch": 0.43520651286363693, "grad_norm": 2.272204875946045, "learning_rate": 2.9726747915330345e-06, "loss": 0.8112, "step": 35710 }, { "epoch": 0.4352674490877847, "grad_norm": 1.8466886281967163, "learning_rate": 2.9723540731237975e-06, "loss": 0.844, "step": 35715 }, { "epoch": 0.4353283853119325, "grad_norm": 1.817553162574768, "learning_rate": 2.972033354714561e-06, "loss": 0.818, "step": 35720 }, { "epoch": 0.43538932153608034, "grad_norm": 1.7606836557388306, "learning_rate": 2.971712636305324e-06, "loss": 0.8082, "step": 35725 }, { "epoch": 0.43545025776022817, "grad_norm": 2.1306469440460205, "learning_rate": 2.9713919178960874e-06, "loss": 0.8646, "step": 35730 }, { "epoch": 0.43551119398437593, "grad_norm": 2.2779910564422607, "learning_rate": 2.971071199486851e-06, "loss": 0.8715, "step": 35735 }, { "epoch": 0.43557213020852376, "grad_norm": 1.807648777961731, "learning_rate": 2.970750481077614e-06, "loss": 0.8294, "step": 35740 }, { "epoch": 0.4356330664326716, "grad_norm": 2.2054431438446045, "learning_rate": 2.9704297626683778e-06, "loss": 0.8869, "step": 35745 }, { "epoch": 0.43569400265681935, "grad_norm": 2.0108296871185303, "learning_rate": 2.9701090442591408e-06, "loss": 0.8412, "step": 35750 }, { "epoch": 0.43575493888096717, "grad_norm": 1.7805622816085815, "learning_rate": 2.9697883258499038e-06, "loss": 0.854, "step": 35755 }, { "epoch": 0.435815875105115, "grad_norm": 1.758826732635498, "learning_rate": 2.9694676074406676e-06, "loss": 0.8115, "step": 35760 }, { "epoch": 0.4358768113292628, "grad_norm": 1.80535888671875, "learning_rate": 2.9691468890314307e-06, "loss": 0.7144, "step": 35765 }, { "epoch": 0.4359377475534106, "grad_norm": 1.729522705078125, "learning_rate": 2.9688261706221937e-06, "loss": 0.8875, "step": 35770 }, { "epoch": 0.4359986837775584, "grad_norm": 1.8768445253372192, "learning_rate": 2.9685054522129575e-06, "loss": 0.8786, "step": 35775 }, { "epoch": 0.4360596200017062, "grad_norm": 1.7880674600601196, "learning_rate": 2.9681847338037206e-06, "loss": 0.8385, "step": 35780 }, { "epoch": 0.436120556225854, "grad_norm": 2.2776591777801514, "learning_rate": 2.9678640153944836e-06, "loss": 0.8472, "step": 35785 }, { "epoch": 0.4361814924500018, "grad_norm": 1.6753809452056885, "learning_rate": 2.9675432969852474e-06, "loss": 0.8806, "step": 35790 }, { "epoch": 0.43624242867414964, "grad_norm": 2.4047017097473145, "learning_rate": 2.9672225785760105e-06, "loss": 0.878, "step": 35795 }, { "epoch": 0.43630336489829746, "grad_norm": 2.0213704109191895, "learning_rate": 2.966901860166774e-06, "loss": 0.9007, "step": 35800 }, { "epoch": 0.43636430112244523, "grad_norm": 2.0574653148651123, "learning_rate": 2.966581141757537e-06, "loss": 0.8815, "step": 35805 }, { "epoch": 0.43642523734659305, "grad_norm": 2.3807146549224854, "learning_rate": 2.9662604233483004e-06, "loss": 0.8836, "step": 35810 }, { "epoch": 0.4364861735707409, "grad_norm": 1.9347463846206665, "learning_rate": 2.965939704939064e-06, "loss": 0.7993, "step": 35815 }, { "epoch": 0.43654710979488864, "grad_norm": 1.869555950164795, "learning_rate": 2.965618986529827e-06, "loss": 0.7887, "step": 35820 }, { "epoch": 0.43660804601903647, "grad_norm": 1.840785264968872, "learning_rate": 2.9652982681205907e-06, "loss": 0.8891, "step": 35825 }, { "epoch": 0.4366689822431843, "grad_norm": 2.0853867530822754, "learning_rate": 2.9649775497113537e-06, "loss": 0.7732, "step": 35830 }, { "epoch": 0.4367299184673321, "grad_norm": 1.8491237163543701, "learning_rate": 2.9646568313021167e-06, "loss": 0.8246, "step": 35835 }, { "epoch": 0.4367908546914799, "grad_norm": 2.128209114074707, "learning_rate": 2.9643361128928806e-06, "loss": 0.8372, "step": 35840 }, { "epoch": 0.4368517909156277, "grad_norm": 2.1969683170318604, "learning_rate": 2.9640153944836436e-06, "loss": 0.8353, "step": 35845 }, { "epoch": 0.4369127271397755, "grad_norm": 1.9880253076553345, "learning_rate": 2.9636946760744066e-06, "loss": 0.8667, "step": 35850 }, { "epoch": 0.4369736633639233, "grad_norm": 2.1282432079315186, "learning_rate": 2.9633739576651705e-06, "loss": 0.8383, "step": 35855 }, { "epoch": 0.4370345995880711, "grad_norm": 2.0279057025909424, "learning_rate": 2.9630532392559335e-06, "loss": 0.8809, "step": 35860 }, { "epoch": 0.43709553581221894, "grad_norm": 1.8774605989456177, "learning_rate": 2.9627325208466965e-06, "loss": 0.8398, "step": 35865 }, { "epoch": 0.43715647203636676, "grad_norm": 2.238612174987793, "learning_rate": 2.9624118024374604e-06, "loss": 0.8555, "step": 35870 }, { "epoch": 0.43721740826051453, "grad_norm": 2.1077425479888916, "learning_rate": 2.9620910840282234e-06, "loss": 0.8135, "step": 35875 }, { "epoch": 0.43727834448466235, "grad_norm": 1.8824952840805054, "learning_rate": 2.961770365618987e-06, "loss": 0.9268, "step": 35880 }, { "epoch": 0.4373392807088102, "grad_norm": 1.9089998006820679, "learning_rate": 2.96144964720975e-06, "loss": 0.852, "step": 35885 }, { "epoch": 0.43740021693295794, "grad_norm": 1.8236260414123535, "learning_rate": 2.9611289288005137e-06, "loss": 0.8151, "step": 35890 }, { "epoch": 0.43746115315710576, "grad_norm": 2.0885279178619385, "learning_rate": 2.9608082103912767e-06, "loss": 0.829, "step": 35895 }, { "epoch": 0.4375220893812536, "grad_norm": 1.512966513633728, "learning_rate": 2.9604874919820397e-06, "loss": 0.8743, "step": 35900 }, { "epoch": 0.4375830256054014, "grad_norm": 1.7899285554885864, "learning_rate": 2.9601667735728036e-06, "loss": 0.7996, "step": 35905 }, { "epoch": 0.4376439618295492, "grad_norm": 2.006786346435547, "learning_rate": 2.9598460551635666e-06, "loss": 0.8438, "step": 35910 }, { "epoch": 0.437704898053697, "grad_norm": 1.9682039022445679, "learning_rate": 2.9595253367543296e-06, "loss": 0.8302, "step": 35915 }, { "epoch": 0.4377658342778448, "grad_norm": 2.1725332736968994, "learning_rate": 2.9592046183450935e-06, "loss": 0.87, "step": 35920 }, { "epoch": 0.4378267705019926, "grad_norm": 2.1359405517578125, "learning_rate": 2.9588838999358565e-06, "loss": 0.7712, "step": 35925 }, { "epoch": 0.4378877067261404, "grad_norm": 1.848848819732666, "learning_rate": 2.9585631815266195e-06, "loss": 0.8476, "step": 35930 }, { "epoch": 0.43794864295028824, "grad_norm": 2.0682108402252197, "learning_rate": 2.9582424631173834e-06, "loss": 0.869, "step": 35935 }, { "epoch": 0.43800957917443606, "grad_norm": 2.1337850093841553, "learning_rate": 2.9579217447081464e-06, "loss": 0.8556, "step": 35940 }, { "epoch": 0.4380705153985838, "grad_norm": 1.975400686264038, "learning_rate": 2.95760102629891e-06, "loss": 0.7827, "step": 35945 }, { "epoch": 0.43813145162273165, "grad_norm": 2.107515811920166, "learning_rate": 2.9572803078896733e-06, "loss": 0.8414, "step": 35950 }, { "epoch": 0.43819238784687947, "grad_norm": 2.458956003189087, "learning_rate": 2.9569595894804363e-06, "loss": 0.942, "step": 35955 }, { "epoch": 0.43825332407102724, "grad_norm": 1.89548921585083, "learning_rate": 2.9566388710711998e-06, "loss": 0.8086, "step": 35960 }, { "epoch": 0.43831426029517506, "grad_norm": 2.056497573852539, "learning_rate": 2.956318152661963e-06, "loss": 0.8656, "step": 35965 }, { "epoch": 0.4383751965193229, "grad_norm": 1.8579602241516113, "learning_rate": 2.9559974342527266e-06, "loss": 0.8284, "step": 35970 }, { "epoch": 0.4384361327434707, "grad_norm": 1.904467225074768, "learning_rate": 2.9556767158434897e-06, "loss": 0.8233, "step": 35975 }, { "epoch": 0.4384970689676185, "grad_norm": 1.6817905902862549, "learning_rate": 2.9553559974342527e-06, "loss": 0.8772, "step": 35980 }, { "epoch": 0.4385580051917663, "grad_norm": 2.0920350551605225, "learning_rate": 2.9550352790250165e-06, "loss": 0.8422, "step": 35985 }, { "epoch": 0.4386189414159141, "grad_norm": 1.6834238767623901, "learning_rate": 2.9547145606157795e-06, "loss": 0.8016, "step": 35990 }, { "epoch": 0.4386798776400619, "grad_norm": 2.1460039615631104, "learning_rate": 2.9543938422065426e-06, "loss": 0.8654, "step": 35995 }, { "epoch": 0.4387408138642097, "grad_norm": 2.0446362495422363, "learning_rate": 2.9540731237973064e-06, "loss": 0.9368, "step": 36000 }, { "epoch": 0.43880175008835753, "grad_norm": 1.9039603471755981, "learning_rate": 2.9537524053880694e-06, "loss": 0.9188, "step": 36005 }, { "epoch": 0.43886268631250536, "grad_norm": 2.0305304527282715, "learning_rate": 2.9534316869788325e-06, "loss": 0.8775, "step": 36010 }, { "epoch": 0.4389236225366531, "grad_norm": 1.7771847248077393, "learning_rate": 2.9531109685695963e-06, "loss": 0.8581, "step": 36015 }, { "epoch": 0.43898455876080095, "grad_norm": 1.9837466478347778, "learning_rate": 2.9527902501603593e-06, "loss": 0.8258, "step": 36020 }, { "epoch": 0.43904549498494877, "grad_norm": 2.214664936065674, "learning_rate": 2.9524695317511228e-06, "loss": 0.8416, "step": 36025 }, { "epoch": 0.43910643120909654, "grad_norm": 2.214256525039673, "learning_rate": 2.9521488133418862e-06, "loss": 0.8579, "step": 36030 }, { "epoch": 0.43916736743324436, "grad_norm": 2.2169370651245117, "learning_rate": 2.9518280949326492e-06, "loss": 0.7819, "step": 36035 }, { "epoch": 0.4392283036573922, "grad_norm": 2.243945837020874, "learning_rate": 2.9515073765234127e-06, "loss": 0.8963, "step": 36040 }, { "epoch": 0.43928923988154, "grad_norm": 2.099879503250122, "learning_rate": 2.951186658114176e-06, "loss": 0.8814, "step": 36045 }, { "epoch": 0.4393501761056878, "grad_norm": 1.8966385126113892, "learning_rate": 2.9508659397049396e-06, "loss": 0.8223, "step": 36050 }, { "epoch": 0.4394111123298356, "grad_norm": 2.201145887374878, "learning_rate": 2.9505452212957026e-06, "loss": 0.8653, "step": 36055 }, { "epoch": 0.4394720485539834, "grad_norm": 2.069131851196289, "learning_rate": 2.9502245028864656e-06, "loss": 0.8551, "step": 36060 }, { "epoch": 0.4395329847781312, "grad_norm": 1.92702054977417, "learning_rate": 2.9499037844772295e-06, "loss": 0.868, "step": 36065 }, { "epoch": 0.439593921002279, "grad_norm": 4.098628520965576, "learning_rate": 2.9495830660679925e-06, "loss": 0.8721, "step": 36070 }, { "epoch": 0.43965485722642683, "grad_norm": 1.9897819757461548, "learning_rate": 2.9492623476587555e-06, "loss": 0.8845, "step": 36075 }, { "epoch": 0.43971579345057465, "grad_norm": 1.8815546035766602, "learning_rate": 2.9489416292495194e-06, "loss": 0.7926, "step": 36080 }, { "epoch": 0.4397767296747224, "grad_norm": 2.310234546661377, "learning_rate": 2.9486209108402824e-06, "loss": 0.8876, "step": 36085 }, { "epoch": 0.43983766589887024, "grad_norm": 2.053256034851074, "learning_rate": 2.9483001924310454e-06, "loss": 0.8383, "step": 36090 }, { "epoch": 0.43989860212301807, "grad_norm": 2.0194284915924072, "learning_rate": 2.9479794740218093e-06, "loss": 0.7843, "step": 36095 }, { "epoch": 0.43995953834716583, "grad_norm": 1.8670755624771118, "learning_rate": 2.9476587556125723e-06, "loss": 0.848, "step": 36100 }, { "epoch": 0.44002047457131366, "grad_norm": 1.9534698724746704, "learning_rate": 2.9473380372033357e-06, "loss": 0.8791, "step": 36105 }, { "epoch": 0.4400814107954615, "grad_norm": 1.817612886428833, "learning_rate": 2.947017318794099e-06, "loss": 0.8416, "step": 36110 }, { "epoch": 0.4401423470196093, "grad_norm": 1.8908096551895142, "learning_rate": 2.9466966003848626e-06, "loss": 0.8709, "step": 36115 }, { "epoch": 0.44020328324375707, "grad_norm": 1.8203212022781372, "learning_rate": 2.9463758819756256e-06, "loss": 0.8129, "step": 36120 }, { "epoch": 0.4402642194679049, "grad_norm": 1.913888692855835, "learning_rate": 2.946055163566389e-06, "loss": 0.8085, "step": 36125 }, { "epoch": 0.4403251556920527, "grad_norm": 1.7968584299087524, "learning_rate": 2.9457344451571525e-06, "loss": 0.8173, "step": 36130 }, { "epoch": 0.4403860919162005, "grad_norm": 1.989518165588379, "learning_rate": 2.9454137267479155e-06, "loss": 0.8732, "step": 36135 }, { "epoch": 0.4404470281403483, "grad_norm": 1.7417891025543213, "learning_rate": 2.9450930083386785e-06, "loss": 0.8119, "step": 36140 }, { "epoch": 0.44050796436449613, "grad_norm": 1.912782907485962, "learning_rate": 2.9447722899294424e-06, "loss": 0.8571, "step": 36145 }, { "epoch": 0.44056890058864395, "grad_norm": 1.680657148361206, "learning_rate": 2.9444515715202054e-06, "loss": 0.7958, "step": 36150 }, { "epoch": 0.4406298368127917, "grad_norm": 1.9043151140213013, "learning_rate": 2.9441308531109684e-06, "loss": 0.8173, "step": 36155 }, { "epoch": 0.44069077303693954, "grad_norm": 1.934545636177063, "learning_rate": 2.9438101347017323e-06, "loss": 0.7977, "step": 36160 }, { "epoch": 0.44075170926108737, "grad_norm": 1.9197312593460083, "learning_rate": 2.9434894162924953e-06, "loss": 0.8979, "step": 36165 }, { "epoch": 0.44081264548523513, "grad_norm": 1.8237673044204712, "learning_rate": 2.9431686978832587e-06, "loss": 0.7821, "step": 36170 }, { "epoch": 0.44087358170938296, "grad_norm": 1.8908119201660156, "learning_rate": 2.942847979474022e-06, "loss": 0.8829, "step": 36175 }, { "epoch": 0.4409345179335308, "grad_norm": 2.3275747299194336, "learning_rate": 2.942527261064785e-06, "loss": 0.8941, "step": 36180 }, { "epoch": 0.4409954541576786, "grad_norm": 2.0419669151306152, "learning_rate": 2.9422065426555486e-06, "loss": 0.8109, "step": 36185 }, { "epoch": 0.44105639038182637, "grad_norm": 1.7000705003738403, "learning_rate": 2.941885824246312e-06, "loss": 0.818, "step": 36190 }, { "epoch": 0.4411173266059742, "grad_norm": 2.0112876892089844, "learning_rate": 2.9415651058370755e-06, "loss": 0.8754, "step": 36195 }, { "epoch": 0.441178262830122, "grad_norm": 2.437093734741211, "learning_rate": 2.9412443874278385e-06, "loss": 0.8098, "step": 36200 }, { "epoch": 0.4412391990542698, "grad_norm": 1.7081375122070312, "learning_rate": 2.940923669018602e-06, "loss": 0.8929, "step": 36205 }, { "epoch": 0.4413001352784176, "grad_norm": 2.945643424987793, "learning_rate": 2.9406029506093654e-06, "loss": 0.8873, "step": 36210 }, { "epoch": 0.4413610715025654, "grad_norm": 2.1032919883728027, "learning_rate": 2.9402822322001284e-06, "loss": 0.905, "step": 36215 }, { "epoch": 0.4414220077267132, "grad_norm": 2.141587495803833, "learning_rate": 2.9399615137908915e-06, "loss": 0.8435, "step": 36220 }, { "epoch": 0.441482943950861, "grad_norm": 1.830892562866211, "learning_rate": 2.9396407953816553e-06, "loss": 0.8542, "step": 36225 }, { "epoch": 0.44154388017500884, "grad_norm": 3.2768242359161377, "learning_rate": 2.9393200769724183e-06, "loss": 0.7733, "step": 36230 }, { "epoch": 0.44160481639915666, "grad_norm": 2.049760341644287, "learning_rate": 2.9389993585631813e-06, "loss": 0.9318, "step": 36235 }, { "epoch": 0.44166575262330443, "grad_norm": 1.9586232900619507, "learning_rate": 2.938678640153945e-06, "loss": 0.7903, "step": 36240 }, { "epoch": 0.44172668884745225, "grad_norm": 1.8389755487442017, "learning_rate": 2.9383579217447082e-06, "loss": 0.8038, "step": 36245 }, { "epoch": 0.4417876250716001, "grad_norm": 2.113436698913574, "learning_rate": 2.9380372033354717e-06, "loss": 0.8597, "step": 36250 }, { "epoch": 0.44184856129574784, "grad_norm": 2.022189140319824, "learning_rate": 2.937716484926235e-06, "loss": 0.915, "step": 36255 }, { "epoch": 0.44190949751989567, "grad_norm": 1.7501380443572998, "learning_rate": 2.937395766516998e-06, "loss": 0.8306, "step": 36260 }, { "epoch": 0.4419704337440435, "grad_norm": 2.0117592811584473, "learning_rate": 2.9370750481077616e-06, "loss": 0.7887, "step": 36265 }, { "epoch": 0.4420313699681913, "grad_norm": 2.0363035202026367, "learning_rate": 2.936754329698525e-06, "loss": 0.8236, "step": 36270 }, { "epoch": 0.4420923061923391, "grad_norm": 2.138300895690918, "learning_rate": 2.9364336112892884e-06, "loss": 0.8367, "step": 36275 }, { "epoch": 0.4421532424164869, "grad_norm": 1.7190167903900146, "learning_rate": 2.9361128928800515e-06, "loss": 0.868, "step": 36280 }, { "epoch": 0.4422141786406347, "grad_norm": 2.048330068588257, "learning_rate": 2.935792174470815e-06, "loss": 0.8209, "step": 36285 }, { "epoch": 0.4422751148647825, "grad_norm": 1.7670036554336548, "learning_rate": 2.9354714560615783e-06, "loss": 0.8544, "step": 36290 }, { "epoch": 0.4423360510889303, "grad_norm": 1.9735156297683716, "learning_rate": 2.9351507376523414e-06, "loss": 0.8628, "step": 36295 }, { "epoch": 0.44239698731307814, "grad_norm": 2.034907102584839, "learning_rate": 2.9348300192431052e-06, "loss": 0.8042, "step": 36300 }, { "epoch": 0.44245792353722596, "grad_norm": 2.444230556488037, "learning_rate": 2.9345093008338682e-06, "loss": 0.8842, "step": 36305 }, { "epoch": 0.44251885976137373, "grad_norm": 2.135274648666382, "learning_rate": 2.9341885824246313e-06, "loss": 0.867, "step": 36310 }, { "epoch": 0.44257979598552155, "grad_norm": 2.2735061645507812, "learning_rate": 2.9338678640153943e-06, "loss": 0.8575, "step": 36315 }, { "epoch": 0.4426407322096694, "grad_norm": 2.2256839275360107, "learning_rate": 2.933547145606158e-06, "loss": 0.9472, "step": 36320 }, { "epoch": 0.44270166843381714, "grad_norm": 2.0308680534362793, "learning_rate": 2.933226427196921e-06, "loss": 0.8561, "step": 36325 }, { "epoch": 0.44276260465796496, "grad_norm": 1.9156239032745361, "learning_rate": 2.9329057087876846e-06, "loss": 0.8916, "step": 36330 }, { "epoch": 0.4428235408821128, "grad_norm": 2.007970094680786, "learning_rate": 2.932584990378448e-06, "loss": 0.8091, "step": 36335 }, { "epoch": 0.4428844771062606, "grad_norm": 2.0831069946289062, "learning_rate": 2.932264271969211e-06, "loss": 0.8307, "step": 36340 }, { "epoch": 0.4429454133304084, "grad_norm": 1.9261592626571655, "learning_rate": 2.9319435535599745e-06, "loss": 0.8565, "step": 36345 }, { "epoch": 0.4430063495545562, "grad_norm": 1.747151255607605, "learning_rate": 2.931622835150738e-06, "loss": 0.8461, "step": 36350 }, { "epoch": 0.443067285778704, "grad_norm": 1.8492027521133423, "learning_rate": 2.9313021167415014e-06, "loss": 0.869, "step": 36355 }, { "epoch": 0.4431282220028518, "grad_norm": 1.9276437759399414, "learning_rate": 2.9309813983322644e-06, "loss": 0.9083, "step": 36360 }, { "epoch": 0.4431891582269996, "grad_norm": 1.871854305267334, "learning_rate": 2.9306606799230283e-06, "loss": 0.8535, "step": 36365 }, { "epoch": 0.44325009445114744, "grad_norm": 1.91230309009552, "learning_rate": 2.9303399615137913e-06, "loss": 0.9071, "step": 36370 }, { "epoch": 0.44331103067529526, "grad_norm": 1.9955146312713623, "learning_rate": 2.9300192431045543e-06, "loss": 0.866, "step": 36375 }, { "epoch": 0.443371966899443, "grad_norm": 2.1559340953826904, "learning_rate": 2.929698524695318e-06, "loss": 0.7354, "step": 36380 }, { "epoch": 0.44343290312359085, "grad_norm": 1.7413272857666016, "learning_rate": 2.929377806286081e-06, "loss": 0.871, "step": 36385 }, { "epoch": 0.44349383934773867, "grad_norm": 1.8281687498092651, "learning_rate": 2.929057087876844e-06, "loss": 0.8778, "step": 36390 }, { "epoch": 0.44355477557188644, "grad_norm": 1.9842298030853271, "learning_rate": 2.9287363694676076e-06, "loss": 0.802, "step": 36395 }, { "epoch": 0.44361571179603426, "grad_norm": 2.3687939643859863, "learning_rate": 2.928415651058371e-06, "loss": 0.8891, "step": 36400 }, { "epoch": 0.4436766480201821, "grad_norm": 1.6829559803009033, "learning_rate": 2.928094932649134e-06, "loss": 0.8203, "step": 36405 }, { "epoch": 0.4437375842443299, "grad_norm": 2.215362787246704, "learning_rate": 2.9277742142398975e-06, "loss": 0.9149, "step": 36410 }, { "epoch": 0.4437985204684777, "grad_norm": 1.6711127758026123, "learning_rate": 2.927453495830661e-06, "loss": 0.9165, "step": 36415 }, { "epoch": 0.4438594566926255, "grad_norm": 1.745876669883728, "learning_rate": 2.9271327774214244e-06, "loss": 0.907, "step": 36420 }, { "epoch": 0.4439203929167733, "grad_norm": 2.2064526081085205, "learning_rate": 2.9268120590121874e-06, "loss": 0.8924, "step": 36425 }, { "epoch": 0.4439813291409211, "grad_norm": 1.9489150047302246, "learning_rate": 2.926491340602951e-06, "loss": 0.8407, "step": 36430 }, { "epoch": 0.4440422653650689, "grad_norm": 1.7693212032318115, "learning_rate": 2.9261706221937143e-06, "loss": 0.8214, "step": 36435 }, { "epoch": 0.44410320158921673, "grad_norm": 1.9156408309936523, "learning_rate": 2.9258499037844773e-06, "loss": 0.8136, "step": 36440 }, { "epoch": 0.44416413781336456, "grad_norm": 1.9459055662155151, "learning_rate": 2.925529185375241e-06, "loss": 0.8438, "step": 36445 }, { "epoch": 0.4442250740375123, "grad_norm": 1.8335527181625366, "learning_rate": 2.925208466966004e-06, "loss": 0.8586, "step": 36450 }, { "epoch": 0.44428601026166015, "grad_norm": 1.7281343936920166, "learning_rate": 2.9248877485567672e-06, "loss": 0.849, "step": 36455 }, { "epoch": 0.44434694648580797, "grad_norm": 2.1561994552612305, "learning_rate": 2.924567030147531e-06, "loss": 0.8129, "step": 36460 }, { "epoch": 0.44440788270995574, "grad_norm": 2.550767660140991, "learning_rate": 2.924246311738294e-06, "loss": 0.8249, "step": 36465 }, { "epoch": 0.44446881893410356, "grad_norm": 2.0431182384490967, "learning_rate": 2.923925593329057e-06, "loss": 0.857, "step": 36470 }, { "epoch": 0.4445297551582514, "grad_norm": 1.7259016036987305, "learning_rate": 2.9236048749198206e-06, "loss": 0.902, "step": 36475 }, { "epoch": 0.4445906913823992, "grad_norm": 1.8581465482711792, "learning_rate": 2.923284156510584e-06, "loss": 0.8432, "step": 36480 }, { "epoch": 0.444651627606547, "grad_norm": 1.8766939640045166, "learning_rate": 2.922963438101347e-06, "loss": 0.8588, "step": 36485 }, { "epoch": 0.4447125638306948, "grad_norm": 1.7439675331115723, "learning_rate": 2.9226427196921104e-06, "loss": 0.8152, "step": 36490 }, { "epoch": 0.4447735000548426, "grad_norm": 1.9165326356887817, "learning_rate": 2.922322001282874e-06, "loss": 0.8475, "step": 36495 }, { "epoch": 0.4448344362789904, "grad_norm": 1.8076105117797852, "learning_rate": 2.9220012828736373e-06, "loss": 0.8234, "step": 36500 }, { "epoch": 0.4448953725031382, "grad_norm": 1.9994958639144897, "learning_rate": 2.9216805644644003e-06, "loss": 0.8182, "step": 36505 }, { "epoch": 0.44495630872728603, "grad_norm": 2.1887407302856445, "learning_rate": 2.9213598460551638e-06, "loss": 0.8611, "step": 36510 }, { "epoch": 0.44501724495143385, "grad_norm": 2.1474618911743164, "learning_rate": 2.9210391276459272e-06, "loss": 0.8645, "step": 36515 }, { "epoch": 0.4450781811755816, "grad_norm": 1.8372366428375244, "learning_rate": 2.9207184092366902e-06, "loss": 0.8121, "step": 36520 }, { "epoch": 0.44513911739972944, "grad_norm": 2.023881673812866, "learning_rate": 2.920397690827454e-06, "loss": 0.8495, "step": 36525 }, { "epoch": 0.44520005362387727, "grad_norm": 2.0713822841644287, "learning_rate": 2.920076972418217e-06, "loss": 0.7563, "step": 36530 }, { "epoch": 0.44526098984802503, "grad_norm": 1.7661077976226807, "learning_rate": 2.91975625400898e-06, "loss": 0.7762, "step": 36535 }, { "epoch": 0.44532192607217286, "grad_norm": 1.762607455253601, "learning_rate": 2.919435535599744e-06, "loss": 0.8338, "step": 36540 }, { "epoch": 0.4453828622963207, "grad_norm": 2.553595781326294, "learning_rate": 2.919114817190507e-06, "loss": 0.8742, "step": 36545 }, { "epoch": 0.4454437985204685, "grad_norm": 1.8482736349105835, "learning_rate": 2.91879409878127e-06, "loss": 0.7375, "step": 36550 }, { "epoch": 0.44550473474461627, "grad_norm": 2.0152223110198975, "learning_rate": 2.918473380372034e-06, "loss": 0.7992, "step": 36555 }, { "epoch": 0.4455656709687641, "grad_norm": 1.8909759521484375, "learning_rate": 2.918152661962797e-06, "loss": 0.825, "step": 36560 }, { "epoch": 0.4456266071929119, "grad_norm": 1.7790392637252808, "learning_rate": 2.91783194355356e-06, "loss": 0.8382, "step": 36565 }, { "epoch": 0.4456875434170597, "grad_norm": 2.1779208183288574, "learning_rate": 2.9175112251443234e-06, "loss": 0.8444, "step": 36570 }, { "epoch": 0.4457484796412075, "grad_norm": 2.0883567333221436, "learning_rate": 2.917190506735087e-06, "loss": 0.8715, "step": 36575 }, { "epoch": 0.44580941586535533, "grad_norm": 2.008579730987549, "learning_rate": 2.9168697883258503e-06, "loss": 0.8345, "step": 36580 }, { "epoch": 0.44587035208950315, "grad_norm": 1.8912146091461182, "learning_rate": 2.9165490699166133e-06, "loss": 0.8631, "step": 36585 }, { "epoch": 0.4459312883136509, "grad_norm": 2.504934549331665, "learning_rate": 2.916228351507377e-06, "loss": 0.8894, "step": 36590 }, { "epoch": 0.44599222453779874, "grad_norm": 2.0660977363586426, "learning_rate": 2.91590763309814e-06, "loss": 0.8579, "step": 36595 }, { "epoch": 0.44605316076194657, "grad_norm": 2.053130626678467, "learning_rate": 2.915586914688903e-06, "loss": 0.8586, "step": 36600 }, { "epoch": 0.44611409698609433, "grad_norm": 1.873281478881836, "learning_rate": 2.915266196279667e-06, "loss": 0.8135, "step": 36605 }, { "epoch": 0.44617503321024216, "grad_norm": 2.4856324195861816, "learning_rate": 2.91494547787043e-06, "loss": 0.8227, "step": 36610 }, { "epoch": 0.44623596943439, "grad_norm": 1.8832042217254639, "learning_rate": 2.914624759461193e-06, "loss": 0.8856, "step": 36615 }, { "epoch": 0.4462969056585378, "grad_norm": 1.852818250656128, "learning_rate": 2.914304041051957e-06, "loss": 0.9258, "step": 36620 }, { "epoch": 0.44635784188268557, "grad_norm": 2.141345262527466, "learning_rate": 2.91398332264272e-06, "loss": 0.8103, "step": 36625 }, { "epoch": 0.4464187781068334, "grad_norm": 2.027078866958618, "learning_rate": 2.913662604233483e-06, "loss": 0.8382, "step": 36630 }, { "epoch": 0.4464797143309812, "grad_norm": 1.9146288633346558, "learning_rate": 2.913341885824247e-06, "loss": 0.8153, "step": 36635 }, { "epoch": 0.446540650555129, "grad_norm": 1.7521402835845947, "learning_rate": 2.91302116741501e-06, "loss": 0.8435, "step": 36640 }, { "epoch": 0.4466015867792768, "grad_norm": 2.1098711490631104, "learning_rate": 2.9127004490057733e-06, "loss": 0.8774, "step": 36645 }, { "epoch": 0.4466625230034246, "grad_norm": 2.1328701972961426, "learning_rate": 2.9123797305965363e-06, "loss": 0.8574, "step": 36650 }, { "epoch": 0.44672345922757245, "grad_norm": 2.265664577484131, "learning_rate": 2.9120590121872997e-06, "loss": 0.8665, "step": 36655 }, { "epoch": 0.4467843954517202, "grad_norm": 2.101816177368164, "learning_rate": 2.911738293778063e-06, "loss": 0.8284, "step": 36660 }, { "epoch": 0.44684533167586804, "grad_norm": 1.9708186388015747, "learning_rate": 2.911417575368826e-06, "loss": 0.8226, "step": 36665 }, { "epoch": 0.44690626790001586, "grad_norm": 1.9660241603851318, "learning_rate": 2.91109685695959e-06, "loss": 0.7558, "step": 36670 }, { "epoch": 0.44696720412416363, "grad_norm": 1.8991670608520508, "learning_rate": 2.910776138550353e-06, "loss": 0.8212, "step": 36675 }, { "epoch": 0.44702814034831145, "grad_norm": 1.9253175258636475, "learning_rate": 2.910455420141116e-06, "loss": 0.8559, "step": 36680 }, { "epoch": 0.4470890765724593, "grad_norm": 2.3332314491271973, "learning_rate": 2.91013470173188e-06, "loss": 0.8951, "step": 36685 }, { "epoch": 0.44715001279660704, "grad_norm": 3.0066287517547607, "learning_rate": 2.909813983322643e-06, "loss": 0.7967, "step": 36690 }, { "epoch": 0.44721094902075487, "grad_norm": 1.6105902194976807, "learning_rate": 2.909493264913406e-06, "loss": 0.8242, "step": 36695 }, { "epoch": 0.4472718852449027, "grad_norm": 2.1244306564331055, "learning_rate": 2.90917254650417e-06, "loss": 0.851, "step": 36700 }, { "epoch": 0.4473328214690505, "grad_norm": 2.0907535552978516, "learning_rate": 2.908851828094933e-06, "loss": 0.8124, "step": 36705 }, { "epoch": 0.4473937576931983, "grad_norm": 2.10705828666687, "learning_rate": 2.908531109685696e-06, "loss": 0.8242, "step": 36710 }, { "epoch": 0.4474546939173461, "grad_norm": 2.1598637104034424, "learning_rate": 2.9082103912764598e-06, "loss": 0.8538, "step": 36715 }, { "epoch": 0.4475156301414939, "grad_norm": 1.8817832469940186, "learning_rate": 2.9078896728672228e-06, "loss": 0.8223, "step": 36720 }, { "epoch": 0.4475765663656417, "grad_norm": 1.9745317697525024, "learning_rate": 2.9075689544579862e-06, "loss": 0.7816, "step": 36725 }, { "epoch": 0.4476375025897895, "grad_norm": 2.053799629211426, "learning_rate": 2.9072482360487492e-06, "loss": 0.8118, "step": 36730 }, { "epoch": 0.44769843881393734, "grad_norm": 2.016979455947876, "learning_rate": 2.9069275176395127e-06, "loss": 0.851, "step": 36735 }, { "epoch": 0.44775937503808516, "grad_norm": 2.0545382499694824, "learning_rate": 2.906606799230276e-06, "loss": 0.7876, "step": 36740 }, { "epoch": 0.44782031126223293, "grad_norm": 2.2614636421203613, "learning_rate": 2.906286080821039e-06, "loss": 0.8879, "step": 36745 }, { "epoch": 0.44788124748638075, "grad_norm": 2.1140427589416504, "learning_rate": 2.905965362411803e-06, "loss": 0.8512, "step": 36750 }, { "epoch": 0.4479421837105286, "grad_norm": 1.8104890584945679, "learning_rate": 2.905644644002566e-06, "loss": 0.8589, "step": 36755 }, { "epoch": 0.44800311993467634, "grad_norm": 2.4618515968322754, "learning_rate": 2.905323925593329e-06, "loss": 0.8683, "step": 36760 }, { "epoch": 0.44806405615882416, "grad_norm": 2.1120944023132324, "learning_rate": 2.905003207184093e-06, "loss": 0.894, "step": 36765 }, { "epoch": 0.448124992382972, "grad_norm": 2.3298237323760986, "learning_rate": 2.904682488774856e-06, "loss": 0.8787, "step": 36770 }, { "epoch": 0.4481859286071198, "grad_norm": 1.9575806856155396, "learning_rate": 2.904361770365619e-06, "loss": 0.8845, "step": 36775 }, { "epoch": 0.4482468648312676, "grad_norm": 2.119417667388916, "learning_rate": 2.9040410519563828e-06, "loss": 0.814, "step": 36780 }, { "epoch": 0.4483078010554154, "grad_norm": 1.810543417930603, "learning_rate": 2.903720333547146e-06, "loss": 0.8302, "step": 36785 }, { "epoch": 0.4483687372795632, "grad_norm": 1.862260103225708, "learning_rate": 2.903399615137909e-06, "loss": 0.8406, "step": 36790 }, { "epoch": 0.448429673503711, "grad_norm": 1.6379238367080688, "learning_rate": 2.9030788967286727e-06, "loss": 0.8132, "step": 36795 }, { "epoch": 0.4484906097278588, "grad_norm": 1.960680603981018, "learning_rate": 2.9027581783194357e-06, "loss": 0.8743, "step": 36800 }, { "epoch": 0.44855154595200664, "grad_norm": 1.8538151979446411, "learning_rate": 2.902437459910199e-06, "loss": 0.8022, "step": 36805 }, { "epoch": 0.44861248217615446, "grad_norm": 1.7035820484161377, "learning_rate": 2.902116741500962e-06, "loss": 0.7665, "step": 36810 }, { "epoch": 0.4486734184003022, "grad_norm": 1.9659098386764526, "learning_rate": 2.901796023091726e-06, "loss": 0.8566, "step": 36815 }, { "epoch": 0.44873435462445005, "grad_norm": 2.154517650604248, "learning_rate": 2.901475304682489e-06, "loss": 0.8505, "step": 36820 }, { "epoch": 0.44879529084859787, "grad_norm": 1.8251065015792847, "learning_rate": 2.901154586273252e-06, "loss": 0.8907, "step": 36825 }, { "epoch": 0.44885622707274564, "grad_norm": 1.912399411201477, "learning_rate": 2.900833867864016e-06, "loss": 0.8248, "step": 36830 }, { "epoch": 0.44891716329689346, "grad_norm": 1.6987282037734985, "learning_rate": 2.900513149454779e-06, "loss": 0.7992, "step": 36835 }, { "epoch": 0.4489780995210413, "grad_norm": 1.817603349685669, "learning_rate": 2.900192431045542e-06, "loss": 0.8299, "step": 36840 }, { "epoch": 0.4490390357451891, "grad_norm": 1.845241904258728, "learning_rate": 2.899871712636306e-06, "loss": 0.8631, "step": 36845 }, { "epoch": 0.4490999719693369, "grad_norm": 1.949288249015808, "learning_rate": 2.899550994227069e-06, "loss": 0.8591, "step": 36850 }, { "epoch": 0.4491609081934847, "grad_norm": 2.9317848682403564, "learning_rate": 2.899230275817832e-06, "loss": 0.871, "step": 36855 }, { "epoch": 0.4492218444176325, "grad_norm": 1.7633520364761353, "learning_rate": 2.8989095574085957e-06, "loss": 0.8585, "step": 36860 }, { "epoch": 0.4492827806417803, "grad_norm": 2.038548231124878, "learning_rate": 2.8985888389993587e-06, "loss": 0.8699, "step": 36865 }, { "epoch": 0.4493437168659281, "grad_norm": 1.9989267587661743, "learning_rate": 2.898268120590122e-06, "loss": 0.9057, "step": 36870 }, { "epoch": 0.44940465309007593, "grad_norm": 1.864705204963684, "learning_rate": 2.8979474021808856e-06, "loss": 0.8199, "step": 36875 }, { "epoch": 0.44946558931422376, "grad_norm": 1.963340163230896, "learning_rate": 2.8976266837716486e-06, "loss": 0.896, "step": 36880 }, { "epoch": 0.4495265255383715, "grad_norm": 1.7991734743118286, "learning_rate": 2.897305965362412e-06, "loss": 0.8255, "step": 36885 }, { "epoch": 0.44958746176251935, "grad_norm": 1.895805835723877, "learning_rate": 2.8969852469531755e-06, "loss": 0.8001, "step": 36890 }, { "epoch": 0.44964839798666717, "grad_norm": 2.001394510269165, "learning_rate": 2.896664528543939e-06, "loss": 0.8599, "step": 36895 }, { "epoch": 0.44970933421081494, "grad_norm": 1.7300466299057007, "learning_rate": 2.896343810134702e-06, "loss": 0.9958, "step": 36900 }, { "epoch": 0.44977027043496276, "grad_norm": 2.044987440109253, "learning_rate": 2.896023091725465e-06, "loss": 0.8639, "step": 36905 }, { "epoch": 0.4498312066591106, "grad_norm": 1.8896684646606445, "learning_rate": 2.895702373316229e-06, "loss": 0.8195, "step": 36910 }, { "epoch": 0.4498921428832584, "grad_norm": 1.8246420621871948, "learning_rate": 2.895381654906992e-06, "loss": 0.8342, "step": 36915 }, { "epoch": 0.4499530791074062, "grad_norm": 2.36802339553833, "learning_rate": 2.895060936497755e-06, "loss": 0.8924, "step": 36920 }, { "epoch": 0.450014015331554, "grad_norm": 2.276348829269409, "learning_rate": 2.8947402180885187e-06, "loss": 0.8214, "step": 36925 }, { "epoch": 0.4500749515557018, "grad_norm": 1.5621473789215088, "learning_rate": 2.8944194996792818e-06, "loss": 0.8492, "step": 36930 }, { "epoch": 0.4501358877798496, "grad_norm": 2.155764579772949, "learning_rate": 2.8940987812700448e-06, "loss": 0.8667, "step": 36935 }, { "epoch": 0.4501968240039974, "grad_norm": 1.8961561918258667, "learning_rate": 2.8937780628608086e-06, "loss": 0.7935, "step": 36940 }, { "epoch": 0.45025776022814523, "grad_norm": 2.5877676010131836, "learning_rate": 2.8934573444515717e-06, "loss": 0.8307, "step": 36945 }, { "epoch": 0.45031869645229305, "grad_norm": 2.2132575511932373, "learning_rate": 2.893136626042335e-06, "loss": 0.8156, "step": 36950 }, { "epoch": 0.4503796326764408, "grad_norm": 1.8972996473312378, "learning_rate": 2.8928159076330985e-06, "loss": 0.7836, "step": 36955 }, { "epoch": 0.45044056890058864, "grad_norm": 2.405813455581665, "learning_rate": 2.8924951892238616e-06, "loss": 0.8993, "step": 36960 }, { "epoch": 0.45050150512473647, "grad_norm": 1.7794700860977173, "learning_rate": 2.892174470814625e-06, "loss": 0.8032, "step": 36965 }, { "epoch": 0.45056244134888424, "grad_norm": 1.9196858406066895, "learning_rate": 2.8918537524053884e-06, "loss": 0.8621, "step": 36970 }, { "epoch": 0.45062337757303206, "grad_norm": 2.0340983867645264, "learning_rate": 2.891533033996152e-06, "loss": 0.8621, "step": 36975 }, { "epoch": 0.4506843137971799, "grad_norm": 1.5878889560699463, "learning_rate": 2.891212315586915e-06, "loss": 0.8149, "step": 36980 }, { "epoch": 0.4507452500213277, "grad_norm": 1.782091736793518, "learning_rate": 2.890891597177678e-06, "loss": 0.8291, "step": 36985 }, { "epoch": 0.45080618624547547, "grad_norm": 1.8916877508163452, "learning_rate": 2.8905708787684418e-06, "loss": 0.8685, "step": 36990 }, { "epoch": 0.4508671224696233, "grad_norm": 2.4095559120178223, "learning_rate": 2.890250160359205e-06, "loss": 0.9038, "step": 36995 }, { "epoch": 0.4509280586937711, "grad_norm": 2.270524501800537, "learning_rate": 2.889929441949968e-06, "loss": 0.825, "step": 37000 }, { "epoch": 0.4509889949179189, "grad_norm": 1.9095101356506348, "learning_rate": 2.8896087235407317e-06, "loss": 0.8171, "step": 37005 }, { "epoch": 0.4510499311420667, "grad_norm": 2.069505453109741, "learning_rate": 2.8892880051314947e-06, "loss": 0.8363, "step": 37010 }, { "epoch": 0.45111086736621453, "grad_norm": 2.154772996902466, "learning_rate": 2.8889672867222577e-06, "loss": 0.8239, "step": 37015 }, { "epoch": 0.45117180359036235, "grad_norm": 1.7461873292922974, "learning_rate": 2.8886465683130216e-06, "loss": 0.919, "step": 37020 }, { "epoch": 0.4512327398145101, "grad_norm": 2.0164215564727783, "learning_rate": 2.8883258499037846e-06, "loss": 0.8854, "step": 37025 }, { "epoch": 0.45129367603865794, "grad_norm": 1.945056676864624, "learning_rate": 2.888005131494548e-06, "loss": 0.8484, "step": 37030 }, { "epoch": 0.45135461226280577, "grad_norm": 1.6384094953536987, "learning_rate": 2.8876844130853115e-06, "loss": 0.8642, "step": 37035 }, { "epoch": 0.45141554848695353, "grad_norm": 1.8976318836212158, "learning_rate": 2.8873636946760745e-06, "loss": 0.8365, "step": 37040 }, { "epoch": 0.45147648471110136, "grad_norm": 2.130965232849121, "learning_rate": 2.887042976266838e-06, "loss": 0.8706, "step": 37045 }, { "epoch": 0.4515374209352492, "grad_norm": 2.0957791805267334, "learning_rate": 2.8867222578576014e-06, "loss": 0.9073, "step": 37050 }, { "epoch": 0.451598357159397, "grad_norm": 1.6741026639938354, "learning_rate": 2.886401539448365e-06, "loss": 0.8579, "step": 37055 }, { "epoch": 0.45165929338354477, "grad_norm": 1.85493803024292, "learning_rate": 2.886080821039128e-06, "loss": 0.736, "step": 37060 }, { "epoch": 0.4517202296076926, "grad_norm": 1.9029840230941772, "learning_rate": 2.885760102629891e-06, "loss": 0.8428, "step": 37065 }, { "epoch": 0.4517811658318404, "grad_norm": 1.9224231243133545, "learning_rate": 2.8854393842206547e-06, "loss": 0.9126, "step": 37070 }, { "epoch": 0.4518421020559882, "grad_norm": 2.1216447353363037, "learning_rate": 2.8851186658114177e-06, "loss": 0.8521, "step": 37075 }, { "epoch": 0.451903038280136, "grad_norm": 1.607014536857605, "learning_rate": 2.8847979474021807e-06, "loss": 0.8476, "step": 37080 }, { "epoch": 0.4519639745042838, "grad_norm": 1.9662377834320068, "learning_rate": 2.8844772289929446e-06, "loss": 0.901, "step": 37085 }, { "epoch": 0.45202491072843165, "grad_norm": 2.0988237857818604, "learning_rate": 2.8841565105837076e-06, "loss": 0.8864, "step": 37090 }, { "epoch": 0.4520858469525794, "grad_norm": 2.0092663764953613, "learning_rate": 2.883835792174471e-06, "loss": 0.8714, "step": 37095 }, { "epoch": 0.45214678317672724, "grad_norm": 1.871936321258545, "learning_rate": 2.8835150737652345e-06, "loss": 0.7788, "step": 37100 }, { "epoch": 0.45220771940087506, "grad_norm": 2.2401351928710938, "learning_rate": 2.8831943553559975e-06, "loss": 0.8188, "step": 37105 }, { "epoch": 0.45226865562502283, "grad_norm": 1.8578848838806152, "learning_rate": 2.882873636946761e-06, "loss": 0.834, "step": 37110 }, { "epoch": 0.45232959184917065, "grad_norm": 2.6148135662078857, "learning_rate": 2.8825529185375244e-06, "loss": 0.8636, "step": 37115 }, { "epoch": 0.4523905280733185, "grad_norm": 2.0471925735473633, "learning_rate": 2.882232200128288e-06, "loss": 0.8092, "step": 37120 }, { "epoch": 0.4524514642974663, "grad_norm": 1.7734602689743042, "learning_rate": 2.881911481719051e-06, "loss": 0.7943, "step": 37125 }, { "epoch": 0.45251240052161407, "grad_norm": 2.3309688568115234, "learning_rate": 2.8815907633098143e-06, "loss": 0.8934, "step": 37130 }, { "epoch": 0.4525733367457619, "grad_norm": 2.02360463142395, "learning_rate": 2.8812700449005777e-06, "loss": 0.8536, "step": 37135 }, { "epoch": 0.4526342729699097, "grad_norm": 2.2109084129333496, "learning_rate": 2.8809493264913407e-06, "loss": 0.8148, "step": 37140 }, { "epoch": 0.4526952091940575, "grad_norm": 1.601645588874817, "learning_rate": 2.8806286080821038e-06, "loss": 0.8576, "step": 37145 }, { "epoch": 0.4527561454182053, "grad_norm": 1.9478405714035034, "learning_rate": 2.8803078896728676e-06, "loss": 0.8841, "step": 37150 }, { "epoch": 0.4528170816423531, "grad_norm": 1.9685076475143433, "learning_rate": 2.8799871712636306e-06, "loss": 0.8101, "step": 37155 }, { "epoch": 0.4528780178665009, "grad_norm": 1.8421481847763062, "learning_rate": 2.8796664528543937e-06, "loss": 0.8215, "step": 37160 }, { "epoch": 0.4529389540906487, "grad_norm": 2.3217992782592773, "learning_rate": 2.8793457344451575e-06, "loss": 0.9305, "step": 37165 }, { "epoch": 0.45299989031479654, "grad_norm": 1.9995250701904297, "learning_rate": 2.8790250160359205e-06, "loss": 0.8288, "step": 37170 }, { "epoch": 0.45306082653894436, "grad_norm": 2.3412742614746094, "learning_rate": 2.878704297626684e-06, "loss": 0.8131, "step": 37175 }, { "epoch": 0.45312176276309213, "grad_norm": 2.0331692695617676, "learning_rate": 2.8783835792174474e-06, "loss": 0.8399, "step": 37180 }, { "epoch": 0.45318269898723995, "grad_norm": 1.9468765258789062, "learning_rate": 2.8780628608082104e-06, "loss": 0.8166, "step": 37185 }, { "epoch": 0.4532436352113878, "grad_norm": 2.135422706604004, "learning_rate": 2.877742142398974e-06, "loss": 0.7844, "step": 37190 }, { "epoch": 0.45330457143553554, "grad_norm": 1.7121715545654297, "learning_rate": 2.8774214239897373e-06, "loss": 0.8576, "step": 37195 }, { "epoch": 0.45336550765968336, "grad_norm": 1.8606520891189575, "learning_rate": 2.8771007055805008e-06, "loss": 0.8426, "step": 37200 }, { "epoch": 0.4534264438838312, "grad_norm": 1.9856849908828735, "learning_rate": 2.8767799871712638e-06, "loss": 0.8238, "step": 37205 }, { "epoch": 0.453487380107979, "grad_norm": 2.189692497253418, "learning_rate": 2.8764592687620272e-06, "loss": 0.8848, "step": 37210 }, { "epoch": 0.4535483163321268, "grad_norm": 2.000430107116699, "learning_rate": 2.8761385503527907e-06, "loss": 0.8834, "step": 37215 }, { "epoch": 0.4536092525562746, "grad_norm": 2.2406063079833984, "learning_rate": 2.8758178319435537e-06, "loss": 0.845, "step": 37220 }, { "epoch": 0.4536701887804224, "grad_norm": 1.787050724029541, "learning_rate": 2.8754971135343175e-06, "loss": 0.785, "step": 37225 }, { "epoch": 0.4537311250045702, "grad_norm": 1.8928076028823853, "learning_rate": 2.8751763951250806e-06, "loss": 0.8741, "step": 37230 }, { "epoch": 0.453792061228718, "grad_norm": 1.9194198846817017, "learning_rate": 2.8748556767158436e-06, "loss": 0.876, "step": 37235 }, { "epoch": 0.45385299745286584, "grad_norm": 2.5072243213653564, "learning_rate": 2.8745349583066066e-06, "loss": 0.861, "step": 37240 }, { "epoch": 0.45391393367701366, "grad_norm": 2.044102191925049, "learning_rate": 2.8742142398973705e-06, "loss": 0.8576, "step": 37245 }, { "epoch": 0.4539748699011614, "grad_norm": 2.043935775756836, "learning_rate": 2.8738935214881335e-06, "loss": 0.8423, "step": 37250 }, { "epoch": 0.45403580612530925, "grad_norm": 1.800864815711975, "learning_rate": 2.873572803078897e-06, "loss": 0.8419, "step": 37255 }, { "epoch": 0.45409674234945707, "grad_norm": 1.841280460357666, "learning_rate": 2.8732520846696603e-06, "loss": 0.8393, "step": 37260 }, { "epoch": 0.45415767857360484, "grad_norm": 2.7210428714752197, "learning_rate": 2.8729313662604234e-06, "loss": 0.8792, "step": 37265 }, { "epoch": 0.45421861479775266, "grad_norm": 2.284984588623047, "learning_rate": 2.872610647851187e-06, "loss": 0.831, "step": 37270 }, { "epoch": 0.4542795510219005, "grad_norm": 1.911118984222412, "learning_rate": 2.8722899294419502e-06, "loss": 0.8698, "step": 37275 }, { "epoch": 0.4543404872460483, "grad_norm": 2.0450713634490967, "learning_rate": 2.8719692110327137e-06, "loss": 0.8019, "step": 37280 }, { "epoch": 0.4544014234701961, "grad_norm": 2.088486909866333, "learning_rate": 2.8716484926234767e-06, "loss": 0.7736, "step": 37285 }, { "epoch": 0.4544623596943439, "grad_norm": 1.9321835041046143, "learning_rate": 2.8713277742142406e-06, "loss": 0.8415, "step": 37290 }, { "epoch": 0.4545232959184917, "grad_norm": 1.5417622327804565, "learning_rate": 2.8710070558050036e-06, "loss": 0.779, "step": 37295 }, { "epoch": 0.4545842321426395, "grad_norm": 1.8859606981277466, "learning_rate": 2.8706863373957666e-06, "loss": 0.8048, "step": 37300 }, { "epoch": 0.4546451683667873, "grad_norm": 1.6545168161392212, "learning_rate": 2.8703656189865305e-06, "loss": 0.8489, "step": 37305 }, { "epoch": 0.45470610459093513, "grad_norm": 2.1518313884735107, "learning_rate": 2.8700449005772935e-06, "loss": 0.8245, "step": 37310 }, { "epoch": 0.45476704081508296, "grad_norm": 1.7939773797988892, "learning_rate": 2.8697241821680565e-06, "loss": 0.8371, "step": 37315 }, { "epoch": 0.4548279770392307, "grad_norm": 1.9731084108352661, "learning_rate": 2.86940346375882e-06, "loss": 0.8836, "step": 37320 }, { "epoch": 0.45488891326337855, "grad_norm": 2.147303819656372, "learning_rate": 2.8690827453495834e-06, "loss": 0.8265, "step": 37325 }, { "epoch": 0.45494984948752637, "grad_norm": 1.9019134044647217, "learning_rate": 2.8687620269403464e-06, "loss": 0.8442, "step": 37330 }, { "epoch": 0.45501078571167414, "grad_norm": 1.9007303714752197, "learning_rate": 2.86844130853111e-06, "loss": 0.8172, "step": 37335 }, { "epoch": 0.45507172193582196, "grad_norm": 1.7591073513031006, "learning_rate": 2.8681205901218733e-06, "loss": 0.8803, "step": 37340 }, { "epoch": 0.4551326581599698, "grad_norm": 1.9429610967636108, "learning_rate": 2.8677998717126367e-06, "loss": 0.8773, "step": 37345 }, { "epoch": 0.4551935943841176, "grad_norm": 2.2258098125457764, "learning_rate": 2.8674791533033997e-06, "loss": 0.8985, "step": 37350 }, { "epoch": 0.4552545306082654, "grad_norm": 2.244579553604126, "learning_rate": 2.867158434894163e-06, "loss": 0.8518, "step": 37355 }, { "epoch": 0.4553154668324132, "grad_norm": 1.7905349731445312, "learning_rate": 2.8668377164849266e-06, "loss": 0.7387, "step": 37360 }, { "epoch": 0.455376403056561, "grad_norm": 1.9314945936203003, "learning_rate": 2.8665169980756896e-06, "loss": 0.7904, "step": 37365 }, { "epoch": 0.4554373392807088, "grad_norm": 1.934594750404358, "learning_rate": 2.8661962796664535e-06, "loss": 0.8688, "step": 37370 }, { "epoch": 0.4554982755048566, "grad_norm": 1.8495842218399048, "learning_rate": 2.8658755612572165e-06, "loss": 0.7941, "step": 37375 }, { "epoch": 0.45555921172900443, "grad_norm": 1.7799948453903198, "learning_rate": 2.8655548428479795e-06, "loss": 0.8222, "step": 37380 }, { "epoch": 0.45562014795315225, "grad_norm": 1.8998825550079346, "learning_rate": 2.8652341244387434e-06, "loss": 0.8559, "step": 37385 }, { "epoch": 0.4556810841773, "grad_norm": 1.5734155178070068, "learning_rate": 2.8649134060295064e-06, "loss": 0.7953, "step": 37390 }, { "epoch": 0.45574202040144784, "grad_norm": 1.8763447999954224, "learning_rate": 2.8645926876202694e-06, "loss": 0.8601, "step": 37395 }, { "epoch": 0.45580295662559567, "grad_norm": 1.7447587251663208, "learning_rate": 2.864271969211033e-06, "loss": 0.8932, "step": 37400 }, { "epoch": 0.45586389284974344, "grad_norm": 1.9839204549789429, "learning_rate": 2.8639512508017963e-06, "loss": 0.9384, "step": 37405 }, { "epoch": 0.45592482907389126, "grad_norm": 1.8000001907348633, "learning_rate": 2.8636305323925593e-06, "loss": 0.8126, "step": 37410 }, { "epoch": 0.4559857652980391, "grad_norm": 1.9492149353027344, "learning_rate": 2.8633098139833228e-06, "loss": 0.8481, "step": 37415 }, { "epoch": 0.4560467015221869, "grad_norm": 2.513869047164917, "learning_rate": 2.862989095574086e-06, "loss": 0.884, "step": 37420 }, { "epoch": 0.45610763774633467, "grad_norm": 2.010028839111328, "learning_rate": 2.8626683771648496e-06, "loss": 0.7973, "step": 37425 }, { "epoch": 0.4561685739704825, "grad_norm": 2.014078378677368, "learning_rate": 2.8623476587556127e-06, "loss": 0.8071, "step": 37430 }, { "epoch": 0.4562295101946303, "grad_norm": 2.266187906265259, "learning_rate": 2.862026940346376e-06, "loss": 0.8134, "step": 37435 }, { "epoch": 0.4562904464187781, "grad_norm": 2.171738862991333, "learning_rate": 2.8617062219371395e-06, "loss": 0.9275, "step": 37440 }, { "epoch": 0.4563513826429259, "grad_norm": 1.9580122232437134, "learning_rate": 2.8613855035279026e-06, "loss": 0.8148, "step": 37445 }, { "epoch": 0.45641231886707373, "grad_norm": 1.8062214851379395, "learning_rate": 2.8610647851186664e-06, "loss": 0.8627, "step": 37450 }, { "epoch": 0.45647325509122155, "grad_norm": 1.9815007448196411, "learning_rate": 2.8607440667094294e-06, "loss": 0.8903, "step": 37455 }, { "epoch": 0.4565341913153693, "grad_norm": 2.1874632835388184, "learning_rate": 2.8604233483001925e-06, "loss": 0.8307, "step": 37460 }, { "epoch": 0.45659512753951714, "grad_norm": 2.271594524383545, "learning_rate": 2.8601026298909563e-06, "loss": 0.8907, "step": 37465 }, { "epoch": 0.45665606376366497, "grad_norm": 2.039032220840454, "learning_rate": 2.8597819114817193e-06, "loss": 0.857, "step": 37470 }, { "epoch": 0.45671699998781273, "grad_norm": 1.7580044269561768, "learning_rate": 2.8594611930724824e-06, "loss": 0.8126, "step": 37475 }, { "epoch": 0.45677793621196056, "grad_norm": 2.012646198272705, "learning_rate": 2.8591404746632462e-06, "loss": 0.794, "step": 37480 }, { "epoch": 0.4568388724361084, "grad_norm": 1.915771722793579, "learning_rate": 2.8588197562540092e-06, "loss": 0.8865, "step": 37485 }, { "epoch": 0.4568998086602562, "grad_norm": 2.116464614868164, "learning_rate": 2.8584990378447723e-06, "loss": 0.7849, "step": 37490 }, { "epoch": 0.45696074488440397, "grad_norm": 1.827295184135437, "learning_rate": 2.8581783194355357e-06, "loss": 0.8161, "step": 37495 }, { "epoch": 0.4570216811085518, "grad_norm": 2.0543177127838135, "learning_rate": 2.857857601026299e-06, "loss": 0.8701, "step": 37500 }, { "epoch": 0.4570826173326996, "grad_norm": 2.0085933208465576, "learning_rate": 2.8575368826170626e-06, "loss": 0.8896, "step": 37505 }, { "epoch": 0.4571435535568474, "grad_norm": 2.0005369186401367, "learning_rate": 2.8572161642078256e-06, "loss": 0.8055, "step": 37510 }, { "epoch": 0.4572044897809952, "grad_norm": 2.4027905464172363, "learning_rate": 2.856895445798589e-06, "loss": 0.8165, "step": 37515 }, { "epoch": 0.457265426005143, "grad_norm": 1.899946689605713, "learning_rate": 2.8565747273893525e-06, "loss": 0.7959, "step": 37520 }, { "epoch": 0.45732636222929085, "grad_norm": 1.8684782981872559, "learning_rate": 2.8562540089801155e-06, "loss": 0.9147, "step": 37525 }, { "epoch": 0.4573872984534386, "grad_norm": 2.114424228668213, "learning_rate": 2.8559332905708793e-06, "loss": 0.8393, "step": 37530 }, { "epoch": 0.45744823467758644, "grad_norm": 1.9396510124206543, "learning_rate": 2.8556125721616424e-06, "loss": 0.8702, "step": 37535 }, { "epoch": 0.45750917090173426, "grad_norm": 2.052211284637451, "learning_rate": 2.8552918537524054e-06, "loss": 0.8266, "step": 37540 }, { "epoch": 0.45757010712588203, "grad_norm": 1.838747262954712, "learning_rate": 2.8549711353431692e-06, "loss": 0.8711, "step": 37545 }, { "epoch": 0.45763104335002985, "grad_norm": 2.086775541305542, "learning_rate": 2.8546504169339323e-06, "loss": 0.8124, "step": 37550 }, { "epoch": 0.4576919795741777, "grad_norm": 1.9713455438613892, "learning_rate": 2.8543296985246953e-06, "loss": 0.8932, "step": 37555 }, { "epoch": 0.4577529157983255, "grad_norm": 1.8741564750671387, "learning_rate": 2.854008980115459e-06, "loss": 0.9007, "step": 37560 }, { "epoch": 0.45781385202247327, "grad_norm": 1.6857672929763794, "learning_rate": 2.853688261706222e-06, "loss": 0.8514, "step": 37565 }, { "epoch": 0.4578747882466211, "grad_norm": 1.9193158149719238, "learning_rate": 2.8533675432969856e-06, "loss": 0.779, "step": 37570 }, { "epoch": 0.4579357244707689, "grad_norm": 2.223527193069458, "learning_rate": 2.8530468248877486e-06, "loss": 0.8128, "step": 37575 }, { "epoch": 0.4579966606949167, "grad_norm": 1.9974088668823242, "learning_rate": 2.852726106478512e-06, "loss": 0.8498, "step": 37580 }, { "epoch": 0.4580575969190645, "grad_norm": 1.8432663679122925, "learning_rate": 2.8524053880692755e-06, "loss": 0.7825, "step": 37585 }, { "epoch": 0.4581185331432123, "grad_norm": 1.9844108819961548, "learning_rate": 2.8520846696600385e-06, "loss": 0.8713, "step": 37590 }, { "epoch": 0.45817946936736015, "grad_norm": 2.5203089714050293, "learning_rate": 2.8517639512508024e-06, "loss": 0.7404, "step": 37595 }, { "epoch": 0.4582404055915079, "grad_norm": 1.8152146339416504, "learning_rate": 2.8514432328415654e-06, "loss": 0.8418, "step": 37600 }, { "epoch": 0.45830134181565574, "grad_norm": 1.6080281734466553, "learning_rate": 2.8511225144323284e-06, "loss": 0.8333, "step": 37605 }, { "epoch": 0.45836227803980356, "grad_norm": 2.398078441619873, "learning_rate": 2.8508017960230923e-06, "loss": 0.8513, "step": 37610 }, { "epoch": 0.45842321426395133, "grad_norm": 1.8019413948059082, "learning_rate": 2.8504810776138553e-06, "loss": 0.8578, "step": 37615 }, { "epoch": 0.45848415048809915, "grad_norm": 2.4638173580169678, "learning_rate": 2.8501603592046183e-06, "loss": 0.8793, "step": 37620 }, { "epoch": 0.458545086712247, "grad_norm": 1.9956905841827393, "learning_rate": 2.849839640795382e-06, "loss": 0.8177, "step": 37625 }, { "epoch": 0.45860602293639474, "grad_norm": 2.1860458850860596, "learning_rate": 2.849518922386145e-06, "loss": 0.8681, "step": 37630 }, { "epoch": 0.45866695916054256, "grad_norm": 1.8562620878219604, "learning_rate": 2.849198203976908e-06, "loss": 0.8919, "step": 37635 }, { "epoch": 0.4587278953846904, "grad_norm": 2.1691792011260986, "learning_rate": 2.848877485567672e-06, "loss": 0.85, "step": 37640 }, { "epoch": 0.4587888316088382, "grad_norm": 2.2458112239837646, "learning_rate": 2.848556767158435e-06, "loss": 0.8741, "step": 37645 }, { "epoch": 0.458849767832986, "grad_norm": 2.307649850845337, "learning_rate": 2.8482360487491985e-06, "loss": 0.8215, "step": 37650 }, { "epoch": 0.4589107040571338, "grad_norm": 1.9046140909194946, "learning_rate": 2.8479153303399615e-06, "loss": 0.9142, "step": 37655 }, { "epoch": 0.4589716402812816, "grad_norm": 1.8327850103378296, "learning_rate": 2.847594611930725e-06, "loss": 0.8585, "step": 37660 }, { "epoch": 0.4590325765054294, "grad_norm": 1.8199225664138794, "learning_rate": 2.8472738935214884e-06, "loss": 0.8423, "step": 37665 }, { "epoch": 0.4590935127295772, "grad_norm": 2.1321208477020264, "learning_rate": 2.8469531751122514e-06, "loss": 0.7735, "step": 37670 }, { "epoch": 0.45915444895372504, "grad_norm": 1.8216527700424194, "learning_rate": 2.8466324567030153e-06, "loss": 0.8411, "step": 37675 }, { "epoch": 0.45921538517787286, "grad_norm": 2.0237040519714355, "learning_rate": 2.8463117382937783e-06, "loss": 0.7803, "step": 37680 }, { "epoch": 0.4592763214020206, "grad_norm": 2.0615158081054688, "learning_rate": 2.8459910198845413e-06, "loss": 0.8775, "step": 37685 }, { "epoch": 0.45933725762616845, "grad_norm": 2.0167007446289062, "learning_rate": 2.845670301475305e-06, "loss": 0.8185, "step": 37690 }, { "epoch": 0.45939819385031627, "grad_norm": 2.113938093185425, "learning_rate": 2.8453495830660682e-06, "loss": 0.82, "step": 37695 }, { "epoch": 0.45945913007446404, "grad_norm": 1.8947207927703857, "learning_rate": 2.8450288646568312e-06, "loss": 0.8691, "step": 37700 }, { "epoch": 0.45952006629861186, "grad_norm": 2.494603157043457, "learning_rate": 2.844708146247595e-06, "loss": 0.832, "step": 37705 }, { "epoch": 0.4595810025227597, "grad_norm": 2.343632936477661, "learning_rate": 2.844387427838358e-06, "loss": 0.8439, "step": 37710 }, { "epoch": 0.4596419387469075, "grad_norm": 2.1679465770721436, "learning_rate": 2.844066709429121e-06, "loss": 0.8419, "step": 37715 }, { "epoch": 0.4597028749710553, "grad_norm": 1.8721656799316406, "learning_rate": 2.843745991019885e-06, "loss": 0.7896, "step": 37720 }, { "epoch": 0.4597638111952031, "grad_norm": 2.0075902938842773, "learning_rate": 2.843425272610648e-06, "loss": 0.8412, "step": 37725 }, { "epoch": 0.4598247474193509, "grad_norm": 2.2700040340423584, "learning_rate": 2.8431045542014115e-06, "loss": 0.8263, "step": 37730 }, { "epoch": 0.4598856836434987, "grad_norm": 1.8006988763809204, "learning_rate": 2.8427838357921745e-06, "loss": 0.8106, "step": 37735 }, { "epoch": 0.4599466198676465, "grad_norm": 1.8360350131988525, "learning_rate": 2.842463117382938e-06, "loss": 0.8402, "step": 37740 }, { "epoch": 0.46000755609179433, "grad_norm": 1.5033361911773682, "learning_rate": 2.8421423989737014e-06, "loss": 0.8249, "step": 37745 }, { "epoch": 0.46006849231594216, "grad_norm": 1.7689837217330933, "learning_rate": 2.8418216805644644e-06, "loss": 0.8311, "step": 37750 }, { "epoch": 0.4601294285400899, "grad_norm": 1.759842872619629, "learning_rate": 2.8415009621552282e-06, "loss": 0.8299, "step": 37755 }, { "epoch": 0.46019036476423775, "grad_norm": 1.7933931350708008, "learning_rate": 2.8411802437459912e-06, "loss": 0.8083, "step": 37760 }, { "epoch": 0.46025130098838557, "grad_norm": 1.7402516603469849, "learning_rate": 2.8408595253367543e-06, "loss": 0.8562, "step": 37765 }, { "epoch": 0.46031223721253334, "grad_norm": 1.735795497894287, "learning_rate": 2.840538806927518e-06, "loss": 0.8874, "step": 37770 }, { "epoch": 0.46037317343668116, "grad_norm": 1.9540644884109497, "learning_rate": 2.840218088518281e-06, "loss": 0.8556, "step": 37775 }, { "epoch": 0.460434109660829, "grad_norm": 1.8521313667297363, "learning_rate": 2.839897370109044e-06, "loss": 0.8882, "step": 37780 }, { "epoch": 0.4604950458849768, "grad_norm": 2.1216049194335938, "learning_rate": 2.839576651699808e-06, "loss": 0.8976, "step": 37785 }, { "epoch": 0.4605559821091246, "grad_norm": 2.0073623657226562, "learning_rate": 2.839255933290571e-06, "loss": 0.793, "step": 37790 }, { "epoch": 0.4606169183332724, "grad_norm": 2.2842352390289307, "learning_rate": 2.8389352148813345e-06, "loss": 0.8056, "step": 37795 }, { "epoch": 0.4606778545574202, "grad_norm": 2.005450487136841, "learning_rate": 2.838614496472098e-06, "loss": 0.8131, "step": 37800 }, { "epoch": 0.460738790781568, "grad_norm": 2.1530022621154785, "learning_rate": 2.838293778062861e-06, "loss": 0.8329, "step": 37805 }, { "epoch": 0.4607997270057158, "grad_norm": 1.9864269495010376, "learning_rate": 2.8379730596536244e-06, "loss": 0.8909, "step": 37810 }, { "epoch": 0.46086066322986363, "grad_norm": 2.1857523918151855, "learning_rate": 2.837652341244388e-06, "loss": 0.8502, "step": 37815 }, { "epoch": 0.46092159945401145, "grad_norm": 2.0211148262023926, "learning_rate": 2.8373316228351513e-06, "loss": 0.8646, "step": 37820 }, { "epoch": 0.4609825356781592, "grad_norm": 2.030186653137207, "learning_rate": 2.8370109044259143e-06, "loss": 0.8145, "step": 37825 }, { "epoch": 0.46104347190230704, "grad_norm": 2.5452592372894287, "learning_rate": 2.8366901860166773e-06, "loss": 0.8085, "step": 37830 }, { "epoch": 0.46110440812645487, "grad_norm": 2.1367714405059814, "learning_rate": 2.836369467607441e-06, "loss": 0.835, "step": 37835 }, { "epoch": 0.46116534435060264, "grad_norm": 2.3346590995788574, "learning_rate": 2.836048749198204e-06, "loss": 0.8111, "step": 37840 }, { "epoch": 0.46122628057475046, "grad_norm": 2.091779947280884, "learning_rate": 2.835728030788967e-06, "loss": 0.842, "step": 37845 }, { "epoch": 0.4612872167988983, "grad_norm": 1.8810447454452515, "learning_rate": 2.835407312379731e-06, "loss": 0.8391, "step": 37850 }, { "epoch": 0.4613481530230461, "grad_norm": 1.9033379554748535, "learning_rate": 2.835086593970494e-06, "loss": 0.8915, "step": 37855 }, { "epoch": 0.46140908924719387, "grad_norm": 2.0827598571777344, "learning_rate": 2.834765875561257e-06, "loss": 0.7832, "step": 37860 }, { "epoch": 0.4614700254713417, "grad_norm": 2.0633437633514404, "learning_rate": 2.834445157152021e-06, "loss": 0.8212, "step": 37865 }, { "epoch": 0.4615309616954895, "grad_norm": 2.415536880493164, "learning_rate": 2.834124438742784e-06, "loss": 0.851, "step": 37870 }, { "epoch": 0.4615918979196373, "grad_norm": 2.006359577178955, "learning_rate": 2.8338037203335474e-06, "loss": 0.7942, "step": 37875 }, { "epoch": 0.4616528341437851, "grad_norm": 1.8420356512069702, "learning_rate": 2.833483001924311e-06, "loss": 0.8092, "step": 37880 }, { "epoch": 0.46171377036793293, "grad_norm": 2.349168300628662, "learning_rate": 2.833162283515074e-06, "loss": 0.8651, "step": 37885 }, { "epoch": 0.46177470659208075, "grad_norm": 1.8048770427703857, "learning_rate": 2.8328415651058373e-06, "loss": 0.8302, "step": 37890 }, { "epoch": 0.4618356428162285, "grad_norm": 2.281879425048828, "learning_rate": 2.8325208466966007e-06, "loss": 0.8828, "step": 37895 }, { "epoch": 0.46189657904037634, "grad_norm": 1.8938336372375488, "learning_rate": 2.832200128287364e-06, "loss": 0.8403, "step": 37900 }, { "epoch": 0.46195751526452417, "grad_norm": 2.0046091079711914, "learning_rate": 2.831879409878127e-06, "loss": 0.8436, "step": 37905 }, { "epoch": 0.46201845148867193, "grad_norm": 1.8445971012115479, "learning_rate": 2.8315586914688902e-06, "loss": 0.8015, "step": 37910 }, { "epoch": 0.46207938771281976, "grad_norm": 2.3333089351654053, "learning_rate": 2.831237973059654e-06, "loss": 0.8724, "step": 37915 }, { "epoch": 0.4621403239369676, "grad_norm": 1.8879895210266113, "learning_rate": 2.830917254650417e-06, "loss": 0.8542, "step": 37920 }, { "epoch": 0.4622012601611154, "grad_norm": 1.9344604015350342, "learning_rate": 2.83059653624118e-06, "loss": 0.8806, "step": 37925 }, { "epoch": 0.46226219638526317, "grad_norm": 1.827666997909546, "learning_rate": 2.830275817831944e-06, "loss": 0.8103, "step": 37930 }, { "epoch": 0.462323132609411, "grad_norm": 1.828263759613037, "learning_rate": 2.829955099422707e-06, "loss": 0.7571, "step": 37935 }, { "epoch": 0.4623840688335588, "grad_norm": 1.9707342386245728, "learning_rate": 2.82963438101347e-06, "loss": 0.9086, "step": 37940 }, { "epoch": 0.4624450050577066, "grad_norm": 1.7516496181488037, "learning_rate": 2.829313662604234e-06, "loss": 0.8584, "step": 37945 }, { "epoch": 0.4625059412818544, "grad_norm": 2.0650393962860107, "learning_rate": 2.828992944194997e-06, "loss": 0.8249, "step": 37950 }, { "epoch": 0.4625668775060022, "grad_norm": 1.7666008472442627, "learning_rate": 2.8286722257857603e-06, "loss": 0.8273, "step": 37955 }, { "epoch": 0.46262781373015005, "grad_norm": 1.8910993337631226, "learning_rate": 2.8283515073765238e-06, "loss": 0.8274, "step": 37960 }, { "epoch": 0.4626887499542978, "grad_norm": 2.2529408931732178, "learning_rate": 2.828030788967287e-06, "loss": 0.8796, "step": 37965 }, { "epoch": 0.46274968617844564, "grad_norm": 1.7370678186416626, "learning_rate": 2.8277100705580502e-06, "loss": 0.711, "step": 37970 }, { "epoch": 0.46281062240259346, "grad_norm": 2.5155861377716064, "learning_rate": 2.8273893521488137e-06, "loss": 0.9049, "step": 37975 }, { "epoch": 0.46287155862674123, "grad_norm": 2.2466206550598145, "learning_rate": 2.827068633739577e-06, "loss": 0.7712, "step": 37980 }, { "epoch": 0.46293249485088905, "grad_norm": 2.005610942840576, "learning_rate": 2.82674791533034e-06, "loss": 0.867, "step": 37985 }, { "epoch": 0.4629934310750369, "grad_norm": 2.0123403072357178, "learning_rate": 2.826427196921103e-06, "loss": 0.869, "step": 37990 }, { "epoch": 0.4630543672991847, "grad_norm": 1.8718061447143555, "learning_rate": 2.826106478511867e-06, "loss": 0.8944, "step": 37995 }, { "epoch": 0.46311530352333247, "grad_norm": 1.9715263843536377, "learning_rate": 2.82578576010263e-06, "loss": 0.8125, "step": 38000 }, { "epoch": 0.4631762397474803, "grad_norm": 1.7957652807235718, "learning_rate": 2.825465041693393e-06, "loss": 0.8034, "step": 38005 }, { "epoch": 0.4632371759716281, "grad_norm": 2.0849432945251465, "learning_rate": 2.825144323284157e-06, "loss": 0.8006, "step": 38010 }, { "epoch": 0.4632981121957759, "grad_norm": 2.3392064571380615, "learning_rate": 2.82482360487492e-06, "loss": 0.7916, "step": 38015 }, { "epoch": 0.4633590484199237, "grad_norm": 1.892204999923706, "learning_rate": 2.8245028864656834e-06, "loss": 0.8311, "step": 38020 }, { "epoch": 0.4634199846440715, "grad_norm": 2.2697434425354004, "learning_rate": 2.824182168056447e-06, "loss": 0.7932, "step": 38025 }, { "epoch": 0.46348092086821935, "grad_norm": 1.8673756122589111, "learning_rate": 2.82386144964721e-06, "loss": 0.9364, "step": 38030 }, { "epoch": 0.4635418570923671, "grad_norm": 2.0995781421661377, "learning_rate": 2.8235407312379733e-06, "loss": 0.8603, "step": 38035 }, { "epoch": 0.46360279331651494, "grad_norm": 1.9664807319641113, "learning_rate": 2.8232200128287367e-06, "loss": 0.7986, "step": 38040 }, { "epoch": 0.46366372954066276, "grad_norm": 2.7166967391967773, "learning_rate": 2.8228992944195e-06, "loss": 0.7655, "step": 38045 }, { "epoch": 0.46372466576481053, "grad_norm": 1.6434019804000854, "learning_rate": 2.822578576010263e-06, "loss": 0.814, "step": 38050 }, { "epoch": 0.46378560198895835, "grad_norm": 1.7219001054763794, "learning_rate": 2.8222578576010266e-06, "loss": 0.8478, "step": 38055 }, { "epoch": 0.4638465382131062, "grad_norm": 2.0677435398101807, "learning_rate": 2.82193713919179e-06, "loss": 0.8198, "step": 38060 }, { "epoch": 0.463907474437254, "grad_norm": 2.1921255588531494, "learning_rate": 2.821616420782553e-06, "loss": 0.9011, "step": 38065 }, { "epoch": 0.46396841066140176, "grad_norm": 2.026729106903076, "learning_rate": 2.821295702373317e-06, "loss": 0.837, "step": 38070 }, { "epoch": 0.4640293468855496, "grad_norm": 1.6796878576278687, "learning_rate": 2.82097498396408e-06, "loss": 0.7549, "step": 38075 }, { "epoch": 0.4640902831096974, "grad_norm": 2.2306580543518066, "learning_rate": 2.820654265554843e-06, "loss": 0.7887, "step": 38080 }, { "epoch": 0.4641512193338452, "grad_norm": 1.9624214172363281, "learning_rate": 2.820333547145606e-06, "loss": 0.8466, "step": 38085 }, { "epoch": 0.464212155557993, "grad_norm": 2.538193702697754, "learning_rate": 2.82001282873637e-06, "loss": 0.8335, "step": 38090 }, { "epoch": 0.4642730917821408, "grad_norm": 1.7194604873657227, "learning_rate": 2.819692110327133e-06, "loss": 0.8713, "step": 38095 }, { "epoch": 0.46433402800628865, "grad_norm": 1.7831209897994995, "learning_rate": 2.8193713919178963e-06, "loss": 0.8169, "step": 38100 }, { "epoch": 0.4643949642304364, "grad_norm": 1.9983294010162354, "learning_rate": 2.8190506735086597e-06, "loss": 0.8733, "step": 38105 }, { "epoch": 0.46445590045458424, "grad_norm": 2.0994937419891357, "learning_rate": 2.8187299550994228e-06, "loss": 0.8622, "step": 38110 }, { "epoch": 0.46451683667873206, "grad_norm": 1.9084433317184448, "learning_rate": 2.818409236690186e-06, "loss": 0.9267, "step": 38115 }, { "epoch": 0.4645777729028798, "grad_norm": 2.0031557083129883, "learning_rate": 2.8180885182809496e-06, "loss": 0.8258, "step": 38120 }, { "epoch": 0.46463870912702765, "grad_norm": 2.0451855659484863, "learning_rate": 2.817767799871713e-06, "loss": 0.8063, "step": 38125 }, { "epoch": 0.46469964535117547, "grad_norm": 1.7573285102844238, "learning_rate": 2.817447081462476e-06, "loss": 0.8974, "step": 38130 }, { "epoch": 0.46476058157532324, "grad_norm": 1.766317367553711, "learning_rate": 2.8171263630532395e-06, "loss": 0.8645, "step": 38135 }, { "epoch": 0.46482151779947106, "grad_norm": 2.178499221801758, "learning_rate": 2.816805644644003e-06, "loss": 0.8666, "step": 38140 }, { "epoch": 0.4648824540236189, "grad_norm": 1.9991165399551392, "learning_rate": 2.816484926234766e-06, "loss": 0.8419, "step": 38145 }, { "epoch": 0.4649433902477667, "grad_norm": 2.1461470127105713, "learning_rate": 2.81616420782553e-06, "loss": 0.8331, "step": 38150 }, { "epoch": 0.4650043264719145, "grad_norm": 2.1675727367401123, "learning_rate": 2.815843489416293e-06, "loss": 0.7958, "step": 38155 }, { "epoch": 0.4650652626960623, "grad_norm": 1.9032483100891113, "learning_rate": 2.815522771007056e-06, "loss": 0.7963, "step": 38160 }, { "epoch": 0.4651261989202101, "grad_norm": 1.9167855978012085, "learning_rate": 2.815202052597819e-06, "loss": 0.8632, "step": 38165 }, { "epoch": 0.4651871351443579, "grad_norm": 1.9249297380447388, "learning_rate": 2.8148813341885828e-06, "loss": 0.8416, "step": 38170 }, { "epoch": 0.4652480713685057, "grad_norm": 1.8040400743484497, "learning_rate": 2.8145606157793458e-06, "loss": 0.824, "step": 38175 }, { "epoch": 0.46530900759265353, "grad_norm": 2.0976412296295166, "learning_rate": 2.8142398973701092e-06, "loss": 0.8646, "step": 38180 }, { "epoch": 0.46536994381680136, "grad_norm": 2.0680906772613525, "learning_rate": 2.8139191789608727e-06, "loss": 0.7796, "step": 38185 }, { "epoch": 0.4654308800409491, "grad_norm": 1.8789734840393066, "learning_rate": 2.8135984605516357e-06, "loss": 0.766, "step": 38190 }, { "epoch": 0.46549181626509695, "grad_norm": 1.8494672775268555, "learning_rate": 2.813277742142399e-06, "loss": 0.8385, "step": 38195 }, { "epoch": 0.46555275248924477, "grad_norm": 1.9467296600341797, "learning_rate": 2.8129570237331626e-06, "loss": 0.8536, "step": 38200 }, { "epoch": 0.46561368871339254, "grad_norm": 2.127261161804199, "learning_rate": 2.812636305323926e-06, "loss": 0.8372, "step": 38205 }, { "epoch": 0.46567462493754036, "grad_norm": 1.9662809371948242, "learning_rate": 2.812315586914689e-06, "loss": 0.8971, "step": 38210 }, { "epoch": 0.4657355611616882, "grad_norm": 1.6872484683990479, "learning_rate": 2.8119948685054525e-06, "loss": 0.8156, "step": 38215 }, { "epoch": 0.465796497385836, "grad_norm": 1.945185661315918, "learning_rate": 2.811674150096216e-06, "loss": 0.7944, "step": 38220 }, { "epoch": 0.4658574336099838, "grad_norm": 2.339872360229492, "learning_rate": 2.811353431686979e-06, "loss": 0.875, "step": 38225 }, { "epoch": 0.4659183698341316, "grad_norm": 2.0118002891540527, "learning_rate": 2.8110327132777428e-06, "loss": 0.8227, "step": 38230 }, { "epoch": 0.4659793060582794, "grad_norm": 1.89322030544281, "learning_rate": 2.810711994868506e-06, "loss": 0.8302, "step": 38235 }, { "epoch": 0.4660402422824272, "grad_norm": 2.3326869010925293, "learning_rate": 2.810391276459269e-06, "loss": 0.812, "step": 38240 }, { "epoch": 0.466101178506575, "grad_norm": 1.585574746131897, "learning_rate": 2.810070558050032e-06, "loss": 0.9039, "step": 38245 }, { "epoch": 0.46616211473072283, "grad_norm": 1.8970022201538086, "learning_rate": 2.8097498396407957e-06, "loss": 0.892, "step": 38250 }, { "epoch": 0.46622305095487065, "grad_norm": 1.9265333414077759, "learning_rate": 2.8094291212315587e-06, "loss": 0.8373, "step": 38255 }, { "epoch": 0.4662839871790184, "grad_norm": 1.9383820295333862, "learning_rate": 2.809108402822322e-06, "loss": 0.8399, "step": 38260 }, { "epoch": 0.46634492340316624, "grad_norm": 1.9270738363265991, "learning_rate": 2.8087876844130856e-06, "loss": 0.8398, "step": 38265 }, { "epoch": 0.46640585962731407, "grad_norm": 1.7301294803619385, "learning_rate": 2.808466966003849e-06, "loss": 0.8281, "step": 38270 }, { "epoch": 0.46646679585146184, "grad_norm": 1.8222177028656006, "learning_rate": 2.808146247594612e-06, "loss": 0.8343, "step": 38275 }, { "epoch": 0.46652773207560966, "grad_norm": 1.8749020099639893, "learning_rate": 2.8078255291853755e-06, "loss": 0.8449, "step": 38280 }, { "epoch": 0.4665886682997575, "grad_norm": 1.918566107749939, "learning_rate": 2.807504810776139e-06, "loss": 0.8954, "step": 38285 }, { "epoch": 0.4666496045239053, "grad_norm": 1.8242460489273071, "learning_rate": 2.807184092366902e-06, "loss": 0.8017, "step": 38290 }, { "epoch": 0.46671054074805307, "grad_norm": 2.157498836517334, "learning_rate": 2.806863373957666e-06, "loss": 0.8012, "step": 38295 }, { "epoch": 0.4667714769722009, "grad_norm": 1.9178413152694702, "learning_rate": 2.806542655548429e-06, "loss": 0.8822, "step": 38300 }, { "epoch": 0.4668324131963487, "grad_norm": 1.9754148721694946, "learning_rate": 2.806221937139192e-06, "loss": 0.8409, "step": 38305 }, { "epoch": 0.4668933494204965, "grad_norm": 1.8896135091781616, "learning_rate": 2.8059012187299557e-06, "loss": 0.8403, "step": 38310 }, { "epoch": 0.4669542856446443, "grad_norm": 1.806882381439209, "learning_rate": 2.8055805003207187e-06, "loss": 0.7815, "step": 38315 }, { "epoch": 0.46701522186879213, "grad_norm": 1.9590091705322266, "learning_rate": 2.8052597819114817e-06, "loss": 0.8794, "step": 38320 }, { "epoch": 0.46707615809293995, "grad_norm": 2.142003297805786, "learning_rate": 2.804939063502245e-06, "loss": 0.8342, "step": 38325 }, { "epoch": 0.4671370943170877, "grad_norm": 2.0386791229248047, "learning_rate": 2.8046183450930086e-06, "loss": 0.8955, "step": 38330 }, { "epoch": 0.46719803054123554, "grad_norm": 1.9450548887252808, "learning_rate": 2.8042976266837716e-06, "loss": 0.7721, "step": 38335 }, { "epoch": 0.46725896676538337, "grad_norm": 1.6543585062026978, "learning_rate": 2.803976908274535e-06, "loss": 0.8462, "step": 38340 }, { "epoch": 0.46731990298953113, "grad_norm": 1.7154592275619507, "learning_rate": 2.8036561898652985e-06, "loss": 0.8814, "step": 38345 }, { "epoch": 0.46738083921367896, "grad_norm": 1.6173303127288818, "learning_rate": 2.803335471456062e-06, "loss": 0.8828, "step": 38350 }, { "epoch": 0.4674417754378268, "grad_norm": 1.8788467645645142, "learning_rate": 2.803014753046825e-06, "loss": 0.8276, "step": 38355 }, { "epoch": 0.4675027116619746, "grad_norm": 1.8257828950881958, "learning_rate": 2.8026940346375884e-06, "loss": 0.7881, "step": 38360 }, { "epoch": 0.46756364788612237, "grad_norm": 1.7170239686965942, "learning_rate": 2.802373316228352e-06, "loss": 0.7971, "step": 38365 }, { "epoch": 0.4676245841102702, "grad_norm": 1.8138532638549805, "learning_rate": 2.802052597819115e-06, "loss": 0.8723, "step": 38370 }, { "epoch": 0.467685520334418, "grad_norm": 1.8956587314605713, "learning_rate": 2.8017318794098787e-06, "loss": 0.8253, "step": 38375 }, { "epoch": 0.4677464565585658, "grad_norm": 1.661705493927002, "learning_rate": 2.8014111610006418e-06, "loss": 0.8051, "step": 38380 }, { "epoch": 0.4678073927827136, "grad_norm": 2.378453493118286, "learning_rate": 2.8010904425914048e-06, "loss": 0.8151, "step": 38385 }, { "epoch": 0.4678683290068614, "grad_norm": 1.593080997467041, "learning_rate": 2.8007697241821686e-06, "loss": 0.7683, "step": 38390 }, { "epoch": 0.46792926523100925, "grad_norm": 2.070096492767334, "learning_rate": 2.8004490057729316e-06, "loss": 0.9072, "step": 38395 }, { "epoch": 0.467990201455157, "grad_norm": 2.5342342853546143, "learning_rate": 2.8001282873636947e-06, "loss": 0.8439, "step": 38400 }, { "epoch": 0.46805113767930484, "grad_norm": 1.8201878070831299, "learning_rate": 2.7998075689544585e-06, "loss": 0.8523, "step": 38405 }, { "epoch": 0.46811207390345266, "grad_norm": 2.2609825134277344, "learning_rate": 2.7994868505452215e-06, "loss": 0.842, "step": 38410 }, { "epoch": 0.46817301012760043, "grad_norm": 2.292724132537842, "learning_rate": 2.7991661321359846e-06, "loss": 0.8643, "step": 38415 }, { "epoch": 0.46823394635174825, "grad_norm": 1.8435553312301636, "learning_rate": 2.798845413726748e-06, "loss": 0.8183, "step": 38420 }, { "epoch": 0.4682948825758961, "grad_norm": 1.7521331310272217, "learning_rate": 2.7985246953175114e-06, "loss": 0.971, "step": 38425 }, { "epoch": 0.4683558188000439, "grad_norm": 1.8323476314544678, "learning_rate": 2.798203976908275e-06, "loss": 0.8887, "step": 38430 }, { "epoch": 0.46841675502419167, "grad_norm": 1.7733961343765259, "learning_rate": 2.797883258499038e-06, "loss": 0.8012, "step": 38435 }, { "epoch": 0.4684776912483395, "grad_norm": 2.0219762325286865, "learning_rate": 2.7975625400898013e-06, "loss": 0.8721, "step": 38440 }, { "epoch": 0.4685386274724873, "grad_norm": 2.1376328468322754, "learning_rate": 2.7972418216805648e-06, "loss": 0.8458, "step": 38445 }, { "epoch": 0.4685995636966351, "grad_norm": 1.9292150735855103, "learning_rate": 2.796921103271328e-06, "loss": 0.8001, "step": 38450 }, { "epoch": 0.4686604999207829, "grad_norm": 1.7794297933578491, "learning_rate": 2.7966003848620917e-06, "loss": 0.8754, "step": 38455 }, { "epoch": 0.4687214361449307, "grad_norm": 1.6726751327514648, "learning_rate": 2.7962796664528547e-06, "loss": 0.8165, "step": 38460 }, { "epoch": 0.46878237236907855, "grad_norm": 1.7953404188156128, "learning_rate": 2.7959589480436177e-06, "loss": 0.9391, "step": 38465 }, { "epoch": 0.4688433085932263, "grad_norm": 2.38869047164917, "learning_rate": 2.7956382296343816e-06, "loss": 0.8407, "step": 38470 }, { "epoch": 0.46890424481737414, "grad_norm": 2.235218048095703, "learning_rate": 2.7953175112251446e-06, "loss": 0.8847, "step": 38475 }, { "epoch": 0.46896518104152196, "grad_norm": 2.0350890159606934, "learning_rate": 2.7949967928159076e-06, "loss": 0.861, "step": 38480 }, { "epoch": 0.46902611726566973, "grad_norm": 1.986693024635315, "learning_rate": 2.7946760744066715e-06, "loss": 0.8821, "step": 38485 }, { "epoch": 0.46908705348981755, "grad_norm": 1.8177319765090942, "learning_rate": 2.7943553559974345e-06, "loss": 0.7875, "step": 38490 }, { "epoch": 0.4691479897139654, "grad_norm": 2.0202784538269043, "learning_rate": 2.794034637588198e-06, "loss": 0.9745, "step": 38495 }, { "epoch": 0.4692089259381132, "grad_norm": 2.026517629623413, "learning_rate": 2.793713919178961e-06, "loss": 0.8469, "step": 38500 }, { "epoch": 0.46926986216226096, "grad_norm": 1.9049792289733887, "learning_rate": 2.7933932007697244e-06, "loss": 0.8475, "step": 38505 }, { "epoch": 0.4693307983864088, "grad_norm": 1.7654423713684082, "learning_rate": 2.793072482360488e-06, "loss": 0.8365, "step": 38510 }, { "epoch": 0.4693917346105566, "grad_norm": 2.2697858810424805, "learning_rate": 2.792751763951251e-06, "loss": 0.836, "step": 38515 }, { "epoch": 0.4694526708347044, "grad_norm": 1.7936509847640991, "learning_rate": 2.7924310455420147e-06, "loss": 0.8096, "step": 38520 }, { "epoch": 0.4695136070588522, "grad_norm": 2.010073661804199, "learning_rate": 2.7921103271327777e-06, "loss": 0.7914, "step": 38525 }, { "epoch": 0.469574543283, "grad_norm": 1.9450730085372925, "learning_rate": 2.7917896087235407e-06, "loss": 0.8626, "step": 38530 }, { "epoch": 0.46963547950714785, "grad_norm": 2.1420693397521973, "learning_rate": 2.7914688903143046e-06, "loss": 0.8241, "step": 38535 }, { "epoch": 0.4696964157312956, "grad_norm": 1.872571587562561, "learning_rate": 2.7911481719050676e-06, "loss": 0.8163, "step": 38540 }, { "epoch": 0.46975735195544344, "grad_norm": 1.994362711906433, "learning_rate": 2.7908274534958306e-06, "loss": 0.8401, "step": 38545 }, { "epoch": 0.46981828817959126, "grad_norm": 2.2693369388580322, "learning_rate": 2.7905067350865945e-06, "loss": 0.8595, "step": 38550 }, { "epoch": 0.469879224403739, "grad_norm": 1.8910819292068481, "learning_rate": 2.7901860166773575e-06, "loss": 0.8386, "step": 38555 }, { "epoch": 0.46994016062788685, "grad_norm": 2.0650384426116943, "learning_rate": 2.7898652982681205e-06, "loss": 0.8482, "step": 38560 }, { "epoch": 0.47000109685203467, "grad_norm": 2.556356430053711, "learning_rate": 2.7895445798588844e-06, "loss": 0.8317, "step": 38565 }, { "epoch": 0.4700620330761825, "grad_norm": 1.8060482740402222, "learning_rate": 2.7892238614496474e-06, "loss": 0.8738, "step": 38570 }, { "epoch": 0.47012296930033026, "grad_norm": 1.9157309532165527, "learning_rate": 2.788903143040411e-06, "loss": 0.8031, "step": 38575 }, { "epoch": 0.4701839055244781, "grad_norm": 1.975569248199463, "learning_rate": 2.788582424631174e-06, "loss": 0.8039, "step": 38580 }, { "epoch": 0.4702448417486259, "grad_norm": 1.6554841995239258, "learning_rate": 2.7882617062219373e-06, "loss": 0.8112, "step": 38585 }, { "epoch": 0.4703057779727737, "grad_norm": 1.9885313510894775, "learning_rate": 2.7879409878127007e-06, "loss": 0.8817, "step": 38590 }, { "epoch": 0.4703667141969215, "grad_norm": 1.6617995500564575, "learning_rate": 2.7876202694034638e-06, "loss": 0.7854, "step": 38595 }, { "epoch": 0.4704276504210693, "grad_norm": 1.8993639945983887, "learning_rate": 2.7872995509942276e-06, "loss": 0.8935, "step": 38600 }, { "epoch": 0.4704885866452171, "grad_norm": 2.0404884815216064, "learning_rate": 2.7869788325849906e-06, "loss": 0.9259, "step": 38605 }, { "epoch": 0.4705495228693649, "grad_norm": 2.3402962684631348, "learning_rate": 2.7866581141757537e-06, "loss": 0.8054, "step": 38610 }, { "epoch": 0.47061045909351273, "grad_norm": 2.1606853008270264, "learning_rate": 2.7863373957665175e-06, "loss": 0.8311, "step": 38615 }, { "epoch": 0.47067139531766056, "grad_norm": 1.842876672744751, "learning_rate": 2.7860166773572805e-06, "loss": 0.8136, "step": 38620 }, { "epoch": 0.4707323315418083, "grad_norm": 1.7520583868026733, "learning_rate": 2.7856959589480436e-06, "loss": 0.9131, "step": 38625 }, { "epoch": 0.47079326776595615, "grad_norm": 2.031646251678467, "learning_rate": 2.7853752405388074e-06, "loss": 0.8116, "step": 38630 }, { "epoch": 0.47085420399010397, "grad_norm": 2.0789756774902344, "learning_rate": 2.7850545221295704e-06, "loss": 0.8733, "step": 38635 }, { "epoch": 0.47091514021425174, "grad_norm": 2.0606555938720703, "learning_rate": 2.7847338037203334e-06, "loss": 0.8161, "step": 38640 }, { "epoch": 0.47097607643839956, "grad_norm": 2.120769739151001, "learning_rate": 2.7844130853110973e-06, "loss": 0.8139, "step": 38645 }, { "epoch": 0.4710370126625474, "grad_norm": 1.875077724456787, "learning_rate": 2.7840923669018603e-06, "loss": 0.9529, "step": 38650 }, { "epoch": 0.4710979488866952, "grad_norm": 1.8554428815841675, "learning_rate": 2.7837716484926238e-06, "loss": 0.8209, "step": 38655 }, { "epoch": 0.471158885110843, "grad_norm": 1.9504718780517578, "learning_rate": 2.7834509300833868e-06, "loss": 0.8452, "step": 38660 }, { "epoch": 0.4712198213349908, "grad_norm": 1.9400659799575806, "learning_rate": 2.7831302116741502e-06, "loss": 0.8829, "step": 38665 }, { "epoch": 0.4712807575591386, "grad_norm": 1.7189342975616455, "learning_rate": 2.7828094932649137e-06, "loss": 0.8297, "step": 38670 }, { "epoch": 0.4713416937832864, "grad_norm": 2.2368805408477783, "learning_rate": 2.7824887748556767e-06, "loss": 0.8552, "step": 38675 }, { "epoch": 0.4714026300074342, "grad_norm": 1.7798486948013306, "learning_rate": 2.7821680564464405e-06, "loss": 0.7689, "step": 38680 }, { "epoch": 0.47146356623158203, "grad_norm": 2.141887903213501, "learning_rate": 2.7818473380372036e-06, "loss": 0.879, "step": 38685 }, { "epoch": 0.47152450245572985, "grad_norm": 1.9221670627593994, "learning_rate": 2.7815266196279666e-06, "loss": 0.7789, "step": 38690 }, { "epoch": 0.4715854386798776, "grad_norm": 1.7019894123077393, "learning_rate": 2.7812059012187304e-06, "loss": 0.9226, "step": 38695 }, { "epoch": 0.47164637490402544, "grad_norm": 1.8554801940917969, "learning_rate": 2.7808851828094935e-06, "loss": 0.7584, "step": 38700 }, { "epoch": 0.47170731112817327, "grad_norm": 2.085707902908325, "learning_rate": 2.7805644644002565e-06, "loss": 0.867, "step": 38705 }, { "epoch": 0.47176824735232104, "grad_norm": 1.9255915880203247, "learning_rate": 2.7802437459910203e-06, "loss": 0.8295, "step": 38710 }, { "epoch": 0.47182918357646886, "grad_norm": 2.161940336227417, "learning_rate": 2.7799230275817834e-06, "loss": 0.8856, "step": 38715 }, { "epoch": 0.4718901198006167, "grad_norm": 1.7543766498565674, "learning_rate": 2.779602309172547e-06, "loss": 0.8496, "step": 38720 }, { "epoch": 0.4719510560247645, "grad_norm": 1.9798481464385986, "learning_rate": 2.7792815907633102e-06, "loss": 0.8941, "step": 38725 }, { "epoch": 0.47201199224891227, "grad_norm": 2.1707351207733154, "learning_rate": 2.7789608723540733e-06, "loss": 0.8562, "step": 38730 }, { "epoch": 0.4720729284730601, "grad_norm": 2.003124952316284, "learning_rate": 2.7786401539448367e-06, "loss": 0.8821, "step": 38735 }, { "epoch": 0.4721338646972079, "grad_norm": 2.0772206783294678, "learning_rate": 2.7783194355356e-06, "loss": 0.8124, "step": 38740 }, { "epoch": 0.4721948009213557, "grad_norm": 1.942932367324829, "learning_rate": 2.7779987171263636e-06, "loss": 0.8312, "step": 38745 }, { "epoch": 0.4722557371455035, "grad_norm": 1.7076148986816406, "learning_rate": 2.7776779987171266e-06, "loss": 0.7647, "step": 38750 }, { "epoch": 0.47231667336965133, "grad_norm": 1.8593344688415527, "learning_rate": 2.7773572803078896e-06, "loss": 0.8011, "step": 38755 }, { "epoch": 0.47237760959379915, "grad_norm": 1.8787014484405518, "learning_rate": 2.7770365618986535e-06, "loss": 0.8861, "step": 38760 }, { "epoch": 0.4724385458179469, "grad_norm": 2.091775417327881, "learning_rate": 2.7767158434894165e-06, "loss": 0.8235, "step": 38765 }, { "epoch": 0.47249948204209474, "grad_norm": 1.8454866409301758, "learning_rate": 2.7763951250801795e-06, "loss": 0.8405, "step": 38770 }, { "epoch": 0.47256041826624257, "grad_norm": 2.589689016342163, "learning_rate": 2.7760744066709434e-06, "loss": 0.9293, "step": 38775 }, { "epoch": 0.47262135449039033, "grad_norm": 2.166950225830078, "learning_rate": 2.7757536882617064e-06, "loss": 0.8198, "step": 38780 }, { "epoch": 0.47268229071453816, "grad_norm": 1.8712681531906128, "learning_rate": 2.7754329698524694e-06, "loss": 0.8704, "step": 38785 }, { "epoch": 0.472743226938686, "grad_norm": 1.8482575416564941, "learning_rate": 2.7751122514432333e-06, "loss": 0.7662, "step": 38790 }, { "epoch": 0.4728041631628338, "grad_norm": 1.9085407257080078, "learning_rate": 2.7747915330339963e-06, "loss": 0.8391, "step": 38795 }, { "epoch": 0.47286509938698157, "grad_norm": 1.9356878995895386, "learning_rate": 2.7744708146247597e-06, "loss": 0.803, "step": 38800 }, { "epoch": 0.4729260356111294, "grad_norm": 1.7101001739501953, "learning_rate": 2.774150096215523e-06, "loss": 0.9113, "step": 38805 }, { "epoch": 0.4729869718352772, "grad_norm": 1.8137410879135132, "learning_rate": 2.773829377806286e-06, "loss": 0.795, "step": 38810 }, { "epoch": 0.473047908059425, "grad_norm": 2.1124024391174316, "learning_rate": 2.7735086593970496e-06, "loss": 0.8271, "step": 38815 }, { "epoch": 0.4731088442835728, "grad_norm": 1.6627659797668457, "learning_rate": 2.773187940987813e-06, "loss": 0.8021, "step": 38820 }, { "epoch": 0.4731697805077206, "grad_norm": 1.8946796655654907, "learning_rate": 2.7728672225785765e-06, "loss": 0.8494, "step": 38825 }, { "epoch": 0.47323071673186845, "grad_norm": 2.3230371475219727, "learning_rate": 2.7725465041693395e-06, "loss": 0.8669, "step": 38830 }, { "epoch": 0.4732916529560162, "grad_norm": 1.7412986755371094, "learning_rate": 2.7722257857601025e-06, "loss": 0.9022, "step": 38835 }, { "epoch": 0.47335258918016404, "grad_norm": 1.8712207078933716, "learning_rate": 2.7719050673508664e-06, "loss": 0.8102, "step": 38840 }, { "epoch": 0.47341352540431186, "grad_norm": 1.8464106321334839, "learning_rate": 2.7715843489416294e-06, "loss": 0.8587, "step": 38845 }, { "epoch": 0.47347446162845963, "grad_norm": 1.7037185430526733, "learning_rate": 2.7712636305323924e-06, "loss": 0.7825, "step": 38850 }, { "epoch": 0.47353539785260745, "grad_norm": 1.749703288078308, "learning_rate": 2.7709429121231563e-06, "loss": 0.8181, "step": 38855 }, { "epoch": 0.4735963340767553, "grad_norm": 2.019865036010742, "learning_rate": 2.7706221937139193e-06, "loss": 0.8617, "step": 38860 }, { "epoch": 0.4736572703009031, "grad_norm": 2.2208454608917236, "learning_rate": 2.7703014753046823e-06, "loss": 0.8106, "step": 38865 }, { "epoch": 0.47371820652505087, "grad_norm": 2.776540756225586, "learning_rate": 2.769980756895446e-06, "loss": 0.8554, "step": 38870 }, { "epoch": 0.4737791427491987, "grad_norm": 1.8731569051742554, "learning_rate": 2.7696600384862092e-06, "loss": 0.8669, "step": 38875 }, { "epoch": 0.4738400789733465, "grad_norm": 2.0044171810150146, "learning_rate": 2.7693393200769727e-06, "loss": 0.771, "step": 38880 }, { "epoch": 0.4739010151974943, "grad_norm": 1.8578726053237915, "learning_rate": 2.769018601667736e-06, "loss": 0.7843, "step": 38885 }, { "epoch": 0.4739619514216421, "grad_norm": 1.9950242042541504, "learning_rate": 2.768697883258499e-06, "loss": 0.8905, "step": 38890 }, { "epoch": 0.4740228876457899, "grad_norm": 1.968848705291748, "learning_rate": 2.7683771648492626e-06, "loss": 0.863, "step": 38895 }, { "epoch": 0.47408382386993775, "grad_norm": 1.9057449102401733, "learning_rate": 2.768056446440026e-06, "loss": 0.8381, "step": 38900 }, { "epoch": 0.4741447600940855, "grad_norm": 1.7362170219421387, "learning_rate": 2.7677357280307894e-06, "loss": 0.8259, "step": 38905 }, { "epoch": 0.47420569631823334, "grad_norm": 2.1855859756469727, "learning_rate": 2.7674150096215524e-06, "loss": 0.8519, "step": 38910 }, { "epoch": 0.47426663254238116, "grad_norm": 1.9331086874008179, "learning_rate": 2.7670942912123155e-06, "loss": 0.8422, "step": 38915 }, { "epoch": 0.47432756876652893, "grad_norm": 1.9770898818969727, "learning_rate": 2.7667735728030793e-06, "loss": 0.876, "step": 38920 }, { "epoch": 0.47438850499067675, "grad_norm": 1.9019945859909058, "learning_rate": 2.7664528543938423e-06, "loss": 0.8231, "step": 38925 }, { "epoch": 0.4744494412148246, "grad_norm": 1.8040119409561157, "learning_rate": 2.7661321359846054e-06, "loss": 0.8405, "step": 38930 }, { "epoch": 0.4745103774389724, "grad_norm": 2.1829700469970703, "learning_rate": 2.7658114175753692e-06, "loss": 0.8271, "step": 38935 }, { "epoch": 0.47457131366312016, "grad_norm": 1.7925121784210205, "learning_rate": 2.7654906991661322e-06, "loss": 0.8583, "step": 38940 }, { "epoch": 0.474632249887268, "grad_norm": 1.8570442199707031, "learning_rate": 2.7651699807568953e-06, "loss": 0.8185, "step": 38945 }, { "epoch": 0.4746931861114158, "grad_norm": 1.9881846904754639, "learning_rate": 2.764849262347659e-06, "loss": 0.8505, "step": 38950 }, { "epoch": 0.4747541223355636, "grad_norm": 2.0780630111694336, "learning_rate": 2.764528543938422e-06, "loss": 0.8187, "step": 38955 }, { "epoch": 0.4748150585597114, "grad_norm": 1.9487053155899048, "learning_rate": 2.7642078255291856e-06, "loss": 0.8108, "step": 38960 }, { "epoch": 0.4748759947838592, "grad_norm": 1.7641925811767578, "learning_rate": 2.763887107119949e-06, "loss": 0.8094, "step": 38965 }, { "epoch": 0.47493693100800705, "grad_norm": 1.7179346084594727, "learning_rate": 2.7635663887107125e-06, "loss": 0.924, "step": 38970 }, { "epoch": 0.4749978672321548, "grad_norm": 2.0665369033813477, "learning_rate": 2.7632456703014755e-06, "loss": 0.8268, "step": 38975 }, { "epoch": 0.47505880345630264, "grad_norm": 1.9225260019302368, "learning_rate": 2.762924951892239e-06, "loss": 0.8151, "step": 38980 }, { "epoch": 0.47511973968045046, "grad_norm": 1.8618061542510986, "learning_rate": 2.7626042334830024e-06, "loss": 0.7908, "step": 38985 }, { "epoch": 0.4751806759045982, "grad_norm": 2.0168893337249756, "learning_rate": 2.7622835150737654e-06, "loss": 0.8933, "step": 38990 }, { "epoch": 0.47524161212874605, "grad_norm": 2.3496532440185547, "learning_rate": 2.7619627966645292e-06, "loss": 0.8387, "step": 38995 }, { "epoch": 0.47530254835289387, "grad_norm": 1.9355560541152954, "learning_rate": 2.7616420782552923e-06, "loss": 0.8402, "step": 39000 }, { "epoch": 0.4753634845770417, "grad_norm": 2.2995285987854004, "learning_rate": 2.7613213598460553e-06, "loss": 0.8509, "step": 39005 }, { "epoch": 0.47542442080118946, "grad_norm": 1.913286805152893, "learning_rate": 2.7610006414368183e-06, "loss": 0.8446, "step": 39010 }, { "epoch": 0.4754853570253373, "grad_norm": 2.312168598175049, "learning_rate": 2.760679923027582e-06, "loss": 0.7911, "step": 39015 }, { "epoch": 0.4755462932494851, "grad_norm": 2.1389503479003906, "learning_rate": 2.760359204618345e-06, "loss": 0.8728, "step": 39020 }, { "epoch": 0.4756072294736329, "grad_norm": 1.9988186359405518, "learning_rate": 2.7600384862091086e-06, "loss": 0.7635, "step": 39025 }, { "epoch": 0.4756681656977807, "grad_norm": 1.9016355276107788, "learning_rate": 2.759717767799872e-06, "loss": 0.778, "step": 39030 }, { "epoch": 0.4757291019219285, "grad_norm": 1.6618390083312988, "learning_rate": 2.759397049390635e-06, "loss": 0.8253, "step": 39035 }, { "epoch": 0.47579003814607634, "grad_norm": 1.8807040452957153, "learning_rate": 2.7590763309813985e-06, "loss": 0.8387, "step": 39040 }, { "epoch": 0.4758509743702241, "grad_norm": 1.8331396579742432, "learning_rate": 2.758755612572162e-06, "loss": 0.826, "step": 39045 }, { "epoch": 0.47591191059437193, "grad_norm": 1.835514783859253, "learning_rate": 2.7584348941629254e-06, "loss": 0.8252, "step": 39050 }, { "epoch": 0.47597284681851976, "grad_norm": 1.919555902481079, "learning_rate": 2.7581141757536884e-06, "loss": 0.9009, "step": 39055 }, { "epoch": 0.4760337830426675, "grad_norm": 2.102750778198242, "learning_rate": 2.757793457344452e-06, "loss": 0.8908, "step": 39060 }, { "epoch": 0.47609471926681535, "grad_norm": 2.0748555660247803, "learning_rate": 2.7574727389352153e-06, "loss": 0.808, "step": 39065 }, { "epoch": 0.47615565549096317, "grad_norm": 1.6774709224700928, "learning_rate": 2.7571520205259783e-06, "loss": 0.7809, "step": 39070 }, { "epoch": 0.47621659171511094, "grad_norm": 1.9025156497955322, "learning_rate": 2.756831302116742e-06, "loss": 0.9059, "step": 39075 }, { "epoch": 0.47627752793925876, "grad_norm": 2.078709125518799, "learning_rate": 2.756510583707505e-06, "loss": 0.8562, "step": 39080 }, { "epoch": 0.4763384641634066, "grad_norm": 1.8130323886871338, "learning_rate": 2.756189865298268e-06, "loss": 0.8169, "step": 39085 }, { "epoch": 0.4763994003875544, "grad_norm": 1.976538896560669, "learning_rate": 2.7558691468890312e-06, "loss": 0.835, "step": 39090 }, { "epoch": 0.4764603366117022, "grad_norm": 1.8727470636367798, "learning_rate": 2.755548428479795e-06, "loss": 0.8602, "step": 39095 }, { "epoch": 0.47652127283585, "grad_norm": 1.902233362197876, "learning_rate": 2.755227710070558e-06, "loss": 0.8991, "step": 39100 }, { "epoch": 0.4765822090599978, "grad_norm": 1.970717191696167, "learning_rate": 2.7549069916613215e-06, "loss": 0.909, "step": 39105 }, { "epoch": 0.4766431452841456, "grad_norm": 2.224073648452759, "learning_rate": 2.754586273252085e-06, "loss": 0.7802, "step": 39110 }, { "epoch": 0.4767040815082934, "grad_norm": 2.29542875289917, "learning_rate": 2.754265554842848e-06, "loss": 0.8989, "step": 39115 }, { "epoch": 0.47676501773244123, "grad_norm": 1.7154074907302856, "learning_rate": 2.7539448364336114e-06, "loss": 0.7824, "step": 39120 }, { "epoch": 0.47682595395658905, "grad_norm": 2.037160873413086, "learning_rate": 2.753624118024375e-06, "loss": 0.9058, "step": 39125 }, { "epoch": 0.4768868901807368, "grad_norm": 1.6767444610595703, "learning_rate": 2.7533033996151383e-06, "loss": 0.908, "step": 39130 }, { "epoch": 0.47694782640488464, "grad_norm": 1.767194151878357, "learning_rate": 2.7529826812059013e-06, "loss": 0.8136, "step": 39135 }, { "epoch": 0.47700876262903247, "grad_norm": 1.915452241897583, "learning_rate": 2.7526619627966648e-06, "loss": 0.8939, "step": 39140 }, { "epoch": 0.47706969885318024, "grad_norm": 2.3426496982574463, "learning_rate": 2.7523412443874282e-06, "loss": 0.8372, "step": 39145 }, { "epoch": 0.47713063507732806, "grad_norm": 2.043640613555908, "learning_rate": 2.7520205259781912e-06, "loss": 0.7968, "step": 39150 }, { "epoch": 0.4771915713014759, "grad_norm": 2.429037570953369, "learning_rate": 2.751699807568955e-06, "loss": 0.8999, "step": 39155 }, { "epoch": 0.4772525075256237, "grad_norm": 2.2829339504241943, "learning_rate": 2.751379089159718e-06, "loss": 0.844, "step": 39160 }, { "epoch": 0.47731344374977147, "grad_norm": 1.974507212638855, "learning_rate": 2.751058370750481e-06, "loss": 0.8015, "step": 39165 }, { "epoch": 0.4773743799739193, "grad_norm": 2.585367202758789, "learning_rate": 2.750737652341244e-06, "loss": 0.8921, "step": 39170 }, { "epoch": 0.4774353161980671, "grad_norm": 2.163989543914795, "learning_rate": 2.750416933932008e-06, "loss": 0.8564, "step": 39175 }, { "epoch": 0.4774962524222149, "grad_norm": 1.7696882486343384, "learning_rate": 2.750096215522771e-06, "loss": 0.7841, "step": 39180 }, { "epoch": 0.4775571886463627, "grad_norm": 1.6766798496246338, "learning_rate": 2.7497754971135345e-06, "loss": 0.8158, "step": 39185 }, { "epoch": 0.47761812487051053, "grad_norm": 2.045259952545166, "learning_rate": 2.749454778704298e-06, "loss": 0.8663, "step": 39190 }, { "epoch": 0.47767906109465835, "grad_norm": 1.8843200206756592, "learning_rate": 2.7491340602950613e-06, "loss": 0.8459, "step": 39195 }, { "epoch": 0.4777399973188061, "grad_norm": 2.1579716205596924, "learning_rate": 2.7488133418858244e-06, "loss": 0.8007, "step": 39200 }, { "epoch": 0.47780093354295394, "grad_norm": 1.9482390880584717, "learning_rate": 2.748492623476588e-06, "loss": 0.8232, "step": 39205 }, { "epoch": 0.47786186976710177, "grad_norm": 2.2830302715301514, "learning_rate": 2.7481719050673512e-06, "loss": 0.8112, "step": 39210 }, { "epoch": 0.47792280599124953, "grad_norm": 1.8191035985946655, "learning_rate": 2.7478511866581143e-06, "loss": 0.892, "step": 39215 }, { "epoch": 0.47798374221539736, "grad_norm": 2.0138423442840576, "learning_rate": 2.747530468248878e-06, "loss": 0.8638, "step": 39220 }, { "epoch": 0.4780446784395452, "grad_norm": 1.7286365032196045, "learning_rate": 2.747209749839641e-06, "loss": 0.9172, "step": 39225 }, { "epoch": 0.478105614663693, "grad_norm": 2.2140402793884277, "learning_rate": 2.746889031430404e-06, "loss": 0.8642, "step": 39230 }, { "epoch": 0.47816655088784077, "grad_norm": 1.836928129196167, "learning_rate": 2.746568313021168e-06, "loss": 0.8584, "step": 39235 }, { "epoch": 0.4782274871119886, "grad_norm": 1.6583802700042725, "learning_rate": 2.746247594611931e-06, "loss": 0.8467, "step": 39240 }, { "epoch": 0.4782884233361364, "grad_norm": 1.9965801239013672, "learning_rate": 2.745926876202694e-06, "loss": 0.7223, "step": 39245 }, { "epoch": 0.4783493595602842, "grad_norm": 1.8863946199417114, "learning_rate": 2.7456061577934575e-06, "loss": 0.8328, "step": 39250 }, { "epoch": 0.478410295784432, "grad_norm": 1.9012199640274048, "learning_rate": 2.745285439384221e-06, "loss": 0.871, "step": 39255 }, { "epoch": 0.4784712320085798, "grad_norm": 1.9452481269836426, "learning_rate": 2.744964720974984e-06, "loss": 0.8457, "step": 39260 }, { "epoch": 0.47853216823272765, "grad_norm": 2.0494208335876465, "learning_rate": 2.7446440025657474e-06, "loss": 0.8901, "step": 39265 }, { "epoch": 0.4785931044568754, "grad_norm": 1.9873284101486206, "learning_rate": 2.744323284156511e-06, "loss": 0.9054, "step": 39270 }, { "epoch": 0.47865404068102324, "grad_norm": 2.046262741088867, "learning_rate": 2.7440025657472743e-06, "loss": 0.8123, "step": 39275 }, { "epoch": 0.47871497690517106, "grad_norm": 2.3032381534576416, "learning_rate": 2.7436818473380373e-06, "loss": 0.8491, "step": 39280 }, { "epoch": 0.47877591312931883, "grad_norm": 2.1168549060821533, "learning_rate": 2.7433611289288007e-06, "loss": 0.8625, "step": 39285 }, { "epoch": 0.47883684935346665, "grad_norm": 1.9941874742507935, "learning_rate": 2.743040410519564e-06, "loss": 0.8824, "step": 39290 }, { "epoch": 0.4788977855776145, "grad_norm": 2.155043363571167, "learning_rate": 2.742719692110327e-06, "loss": 0.8386, "step": 39295 }, { "epoch": 0.4789587218017623, "grad_norm": 1.9043046236038208, "learning_rate": 2.742398973701091e-06, "loss": 0.8722, "step": 39300 }, { "epoch": 0.47901965802591007, "grad_norm": 1.8842344284057617, "learning_rate": 2.742078255291854e-06, "loss": 0.8004, "step": 39305 }, { "epoch": 0.4790805942500579, "grad_norm": 1.9770632982254028, "learning_rate": 2.741757536882617e-06, "loss": 0.8321, "step": 39310 }, { "epoch": 0.4791415304742057, "grad_norm": 1.8660259246826172, "learning_rate": 2.741436818473381e-06, "loss": 0.8985, "step": 39315 }, { "epoch": 0.4792024666983535, "grad_norm": 1.7922040224075317, "learning_rate": 2.741116100064144e-06, "loss": 0.7836, "step": 39320 }, { "epoch": 0.4792634029225013, "grad_norm": 2.0091733932495117, "learning_rate": 2.740795381654907e-06, "loss": 0.9267, "step": 39325 }, { "epoch": 0.4793243391466491, "grad_norm": 1.8153417110443115, "learning_rate": 2.740474663245671e-06, "loss": 0.802, "step": 39330 }, { "epoch": 0.47938527537079695, "grad_norm": 2.1852691173553467, "learning_rate": 2.740153944836434e-06, "loss": 0.8825, "step": 39335 }, { "epoch": 0.4794462115949447, "grad_norm": 2.008842945098877, "learning_rate": 2.739833226427197e-06, "loss": 0.8789, "step": 39340 }, { "epoch": 0.47950714781909254, "grad_norm": 1.9790070056915283, "learning_rate": 2.7395125080179603e-06, "loss": 0.9174, "step": 39345 }, { "epoch": 0.47956808404324036, "grad_norm": 1.6840224266052246, "learning_rate": 2.7391917896087238e-06, "loss": 0.8521, "step": 39350 }, { "epoch": 0.47962902026738813, "grad_norm": 1.7127560377120972, "learning_rate": 2.738871071199487e-06, "loss": 0.9016, "step": 39355 }, { "epoch": 0.47968995649153595, "grad_norm": 1.854935884475708, "learning_rate": 2.7385503527902502e-06, "loss": 0.8622, "step": 39360 }, { "epoch": 0.4797508927156838, "grad_norm": 2.2615973949432373, "learning_rate": 2.7382296343810137e-06, "loss": 0.841, "step": 39365 }, { "epoch": 0.4798118289398316, "grad_norm": 2.0851399898529053, "learning_rate": 2.737908915971777e-06, "loss": 0.8319, "step": 39370 }, { "epoch": 0.47987276516397936, "grad_norm": 2.270695924758911, "learning_rate": 2.73758819756254e-06, "loss": 0.8435, "step": 39375 }, { "epoch": 0.4799337013881272, "grad_norm": 1.7316830158233643, "learning_rate": 2.737267479153304e-06, "loss": 0.8387, "step": 39380 }, { "epoch": 0.479994637612275, "grad_norm": 1.8651597499847412, "learning_rate": 2.736946760744067e-06, "loss": 0.8028, "step": 39385 }, { "epoch": 0.4800555738364228, "grad_norm": 1.9724540710449219, "learning_rate": 2.73662604233483e-06, "loss": 0.877, "step": 39390 }, { "epoch": 0.4801165100605706, "grad_norm": 1.9156509637832642, "learning_rate": 2.736305323925594e-06, "loss": 0.8306, "step": 39395 }, { "epoch": 0.4801774462847184, "grad_norm": 1.7029893398284912, "learning_rate": 2.735984605516357e-06, "loss": 0.7871, "step": 39400 }, { "epoch": 0.48023838250886625, "grad_norm": 1.9228435754776, "learning_rate": 2.73566388710712e-06, "loss": 0.8645, "step": 39405 }, { "epoch": 0.480299318733014, "grad_norm": 1.9449708461761475, "learning_rate": 2.7353431686978838e-06, "loss": 0.8085, "step": 39410 }, { "epoch": 0.48036025495716184, "grad_norm": 1.948091983795166, "learning_rate": 2.7350224502886468e-06, "loss": 0.7906, "step": 39415 }, { "epoch": 0.48042119118130966, "grad_norm": 2.0719592571258545, "learning_rate": 2.73470173187941e-06, "loss": 0.8139, "step": 39420 }, { "epoch": 0.4804821274054574, "grad_norm": 1.7682589292526245, "learning_rate": 2.7343810134701732e-06, "loss": 0.8357, "step": 39425 }, { "epoch": 0.48054306362960525, "grad_norm": 1.9576085805892944, "learning_rate": 2.7340602950609367e-06, "loss": 0.8812, "step": 39430 }, { "epoch": 0.48060399985375307, "grad_norm": 1.8718422651290894, "learning_rate": 2.7337395766517e-06, "loss": 0.8322, "step": 39435 }, { "epoch": 0.4806649360779009, "grad_norm": 1.8668677806854248, "learning_rate": 2.733418858242463e-06, "loss": 0.8387, "step": 39440 }, { "epoch": 0.48072587230204866, "grad_norm": 2.1511595249176025, "learning_rate": 2.733098139833227e-06, "loss": 0.7968, "step": 39445 }, { "epoch": 0.4807868085261965, "grad_norm": 1.9320638179779053, "learning_rate": 2.73277742142399e-06, "loss": 0.8228, "step": 39450 }, { "epoch": 0.4808477447503443, "grad_norm": 2.24250864982605, "learning_rate": 2.732456703014753e-06, "loss": 0.8885, "step": 39455 }, { "epoch": 0.4809086809744921, "grad_norm": 2.1773810386657715, "learning_rate": 2.732135984605517e-06, "loss": 0.9026, "step": 39460 }, { "epoch": 0.4809696171986399, "grad_norm": 2.2427964210510254, "learning_rate": 2.73181526619628e-06, "loss": 0.8791, "step": 39465 }, { "epoch": 0.4810305534227877, "grad_norm": 1.9220296144485474, "learning_rate": 2.731494547787043e-06, "loss": 0.9351, "step": 39470 }, { "epoch": 0.48109148964693554, "grad_norm": 1.6683956384658813, "learning_rate": 2.731173829377807e-06, "loss": 0.8686, "step": 39475 }, { "epoch": 0.4811524258710833, "grad_norm": 1.8635715246200562, "learning_rate": 2.73085311096857e-06, "loss": 0.9059, "step": 39480 }, { "epoch": 0.48121336209523113, "grad_norm": 1.6152255535125732, "learning_rate": 2.730532392559333e-06, "loss": 0.8806, "step": 39485 }, { "epoch": 0.48127429831937896, "grad_norm": 2.2605600357055664, "learning_rate": 2.7302116741500967e-06, "loss": 0.8696, "step": 39490 }, { "epoch": 0.4813352345435267, "grad_norm": 2.1701061725616455, "learning_rate": 2.7298909557408597e-06, "loss": 0.8663, "step": 39495 }, { "epoch": 0.48139617076767455, "grad_norm": 1.806854486465454, "learning_rate": 2.729570237331623e-06, "loss": 0.7981, "step": 39500 }, { "epoch": 0.48145710699182237, "grad_norm": 1.9174381494522095, "learning_rate": 2.729249518922386e-06, "loss": 0.7552, "step": 39505 }, { "epoch": 0.4815180432159702, "grad_norm": 2.336451292037964, "learning_rate": 2.7289288005131496e-06, "loss": 0.8047, "step": 39510 }, { "epoch": 0.48157897944011796, "grad_norm": 2.3100194931030273, "learning_rate": 2.728608082103913e-06, "loss": 0.8261, "step": 39515 }, { "epoch": 0.4816399156642658, "grad_norm": 1.819797396659851, "learning_rate": 2.728287363694676e-06, "loss": 0.8138, "step": 39520 }, { "epoch": 0.4817008518884136, "grad_norm": 1.6776059865951538, "learning_rate": 2.72796664528544e-06, "loss": 0.8747, "step": 39525 }, { "epoch": 0.4817617881125614, "grad_norm": 1.8202499151229858, "learning_rate": 2.727645926876203e-06, "loss": 0.8385, "step": 39530 }, { "epoch": 0.4818227243367092, "grad_norm": 2.750955104827881, "learning_rate": 2.727325208466966e-06, "loss": 0.8991, "step": 39535 }, { "epoch": 0.481883660560857, "grad_norm": 1.798668384552002, "learning_rate": 2.72700449005773e-06, "loss": 0.8306, "step": 39540 }, { "epoch": 0.4819445967850048, "grad_norm": 2.109036922454834, "learning_rate": 2.726683771648493e-06, "loss": 0.7864, "step": 39545 }, { "epoch": 0.4820055330091526, "grad_norm": 2.2832794189453125, "learning_rate": 2.726363053239256e-06, "loss": 0.8031, "step": 39550 }, { "epoch": 0.48206646923330043, "grad_norm": 1.7961399555206299, "learning_rate": 2.7260423348300197e-06, "loss": 0.8481, "step": 39555 }, { "epoch": 0.48212740545744825, "grad_norm": 1.7133970260620117, "learning_rate": 2.7257216164207827e-06, "loss": 0.7523, "step": 39560 }, { "epoch": 0.482188341681596, "grad_norm": 1.959643006324768, "learning_rate": 2.7254008980115458e-06, "loss": 0.7819, "step": 39565 }, { "epoch": 0.48224927790574385, "grad_norm": 2.1132559776306152, "learning_rate": 2.7250801796023096e-06, "loss": 0.8434, "step": 39570 }, { "epoch": 0.48231021412989167, "grad_norm": 2.0027945041656494, "learning_rate": 2.7247594611930726e-06, "loss": 0.828, "step": 39575 }, { "epoch": 0.48237115035403944, "grad_norm": 2.514054298400879, "learning_rate": 2.724438742783836e-06, "loss": 0.8728, "step": 39580 }, { "epoch": 0.48243208657818726, "grad_norm": 1.8731364011764526, "learning_rate": 2.7241180243745995e-06, "loss": 0.8439, "step": 39585 }, { "epoch": 0.4824930228023351, "grad_norm": 2.0289268493652344, "learning_rate": 2.7237973059653625e-06, "loss": 0.8378, "step": 39590 }, { "epoch": 0.4825539590264829, "grad_norm": 1.9856319427490234, "learning_rate": 2.723476587556126e-06, "loss": 0.9096, "step": 39595 }, { "epoch": 0.48261489525063067, "grad_norm": 2.1483941078186035, "learning_rate": 2.723155869146889e-06, "loss": 0.8842, "step": 39600 }, { "epoch": 0.4826758314747785, "grad_norm": 1.9125337600708008, "learning_rate": 2.722835150737653e-06, "loss": 0.8261, "step": 39605 }, { "epoch": 0.4827367676989263, "grad_norm": 2.055753707885742, "learning_rate": 2.722514432328416e-06, "loss": 0.8284, "step": 39610 }, { "epoch": 0.4827977039230741, "grad_norm": 1.9787970781326294, "learning_rate": 2.722193713919179e-06, "loss": 0.8276, "step": 39615 }, { "epoch": 0.4828586401472219, "grad_norm": 2.1200220584869385, "learning_rate": 2.7218729955099428e-06, "loss": 0.8519, "step": 39620 }, { "epoch": 0.48291957637136973, "grad_norm": 1.8716164827346802, "learning_rate": 2.7215522771007058e-06, "loss": 0.8513, "step": 39625 }, { "epoch": 0.48298051259551755, "grad_norm": 1.951004981994629, "learning_rate": 2.721231558691469e-06, "loss": 0.8641, "step": 39630 }, { "epoch": 0.4830414488196653, "grad_norm": 1.8713902235031128, "learning_rate": 2.7209108402822327e-06, "loss": 0.7764, "step": 39635 }, { "epoch": 0.48310238504381314, "grad_norm": 2.0570454597473145, "learning_rate": 2.7205901218729957e-06, "loss": 0.8708, "step": 39640 }, { "epoch": 0.48316332126796097, "grad_norm": 2.1440649032592773, "learning_rate": 2.7202694034637587e-06, "loss": 0.8576, "step": 39645 }, { "epoch": 0.48322425749210873, "grad_norm": 2.1369171142578125, "learning_rate": 2.7199486850545226e-06, "loss": 0.8394, "step": 39650 }, { "epoch": 0.48328519371625656, "grad_norm": 1.7925987243652344, "learning_rate": 2.7196279666452856e-06, "loss": 0.8896, "step": 39655 }, { "epoch": 0.4833461299404044, "grad_norm": 2.0101544857025146, "learning_rate": 2.719307248236049e-06, "loss": 0.7755, "step": 39660 }, { "epoch": 0.4834070661645522, "grad_norm": 2.040680170059204, "learning_rate": 2.7189865298268124e-06, "loss": 0.8819, "step": 39665 }, { "epoch": 0.48346800238869997, "grad_norm": 2.142141342163086, "learning_rate": 2.718665811417576e-06, "loss": 0.8698, "step": 39670 }, { "epoch": 0.4835289386128478, "grad_norm": 2.2977070808410645, "learning_rate": 2.718345093008339e-06, "loss": 0.8868, "step": 39675 }, { "epoch": 0.4835898748369956, "grad_norm": 1.668831706047058, "learning_rate": 2.718024374599102e-06, "loss": 0.7808, "step": 39680 }, { "epoch": 0.4836508110611434, "grad_norm": 2.0941638946533203, "learning_rate": 2.7177036561898658e-06, "loss": 0.8753, "step": 39685 }, { "epoch": 0.4837117472852912, "grad_norm": 2.1888880729675293, "learning_rate": 2.717382937780629e-06, "loss": 0.9152, "step": 39690 }, { "epoch": 0.483772683509439, "grad_norm": 2.1424782276153564, "learning_rate": 2.717062219371392e-06, "loss": 0.8559, "step": 39695 }, { "epoch": 0.48383361973358685, "grad_norm": 2.247626543045044, "learning_rate": 2.7167415009621557e-06, "loss": 0.8335, "step": 39700 }, { "epoch": 0.4838945559577346, "grad_norm": 2.3454582691192627, "learning_rate": 2.7164207825529187e-06, "loss": 0.831, "step": 39705 }, { "epoch": 0.48395549218188244, "grad_norm": 2.3132312297821045, "learning_rate": 2.7161000641436817e-06, "loss": 0.9118, "step": 39710 }, { "epoch": 0.48401642840603026, "grad_norm": 2.3598928451538086, "learning_rate": 2.7157793457344456e-06, "loss": 0.855, "step": 39715 }, { "epoch": 0.48407736463017803, "grad_norm": 1.9743788242340088, "learning_rate": 2.7154586273252086e-06, "loss": 0.8023, "step": 39720 }, { "epoch": 0.48413830085432585, "grad_norm": 1.9207121133804321, "learning_rate": 2.715137908915972e-06, "loss": 0.7855, "step": 39725 }, { "epoch": 0.4841992370784737, "grad_norm": 1.8516892194747925, "learning_rate": 2.7148171905067355e-06, "loss": 0.8643, "step": 39730 }, { "epoch": 0.4842601733026215, "grad_norm": 1.9082117080688477, "learning_rate": 2.7144964720974985e-06, "loss": 0.8842, "step": 39735 }, { "epoch": 0.48432110952676927, "grad_norm": 1.7838562726974487, "learning_rate": 2.714175753688262e-06, "loss": 0.8679, "step": 39740 }, { "epoch": 0.4843820457509171, "grad_norm": 2.6171436309814453, "learning_rate": 2.7138550352790254e-06, "loss": 0.8635, "step": 39745 }, { "epoch": 0.4844429819750649, "grad_norm": 2.226608991622925, "learning_rate": 2.713534316869789e-06, "loss": 0.86, "step": 39750 }, { "epoch": 0.4845039181992127, "grad_norm": 1.8207603693008423, "learning_rate": 2.713213598460552e-06, "loss": 0.8275, "step": 39755 }, { "epoch": 0.4845648544233605, "grad_norm": 1.9227081537246704, "learning_rate": 2.712892880051315e-06, "loss": 0.832, "step": 39760 }, { "epoch": 0.4846257906475083, "grad_norm": 1.9821571111679077, "learning_rate": 2.7125721616420787e-06, "loss": 0.8313, "step": 39765 }, { "epoch": 0.48468672687165615, "grad_norm": 1.694677472114563, "learning_rate": 2.7122514432328417e-06, "loss": 0.7999, "step": 39770 }, { "epoch": 0.4847476630958039, "grad_norm": 1.9245033264160156, "learning_rate": 2.7119307248236047e-06, "loss": 0.8433, "step": 39775 }, { "epoch": 0.48480859931995174, "grad_norm": 1.8254647254943848, "learning_rate": 2.7116100064143686e-06, "loss": 0.8424, "step": 39780 }, { "epoch": 0.48486953554409956, "grad_norm": 2.1098456382751465, "learning_rate": 2.7112892880051316e-06, "loss": 0.7679, "step": 39785 }, { "epoch": 0.48493047176824733, "grad_norm": 1.82103431224823, "learning_rate": 2.7109685695958946e-06, "loss": 0.8741, "step": 39790 }, { "epoch": 0.48499140799239515, "grad_norm": 1.8759279251098633, "learning_rate": 2.7106478511866585e-06, "loss": 0.8851, "step": 39795 }, { "epoch": 0.485052344216543, "grad_norm": 1.9367775917053223, "learning_rate": 2.7103271327774215e-06, "loss": 0.8741, "step": 39800 }, { "epoch": 0.4851132804406908, "grad_norm": 2.0360867977142334, "learning_rate": 2.710006414368185e-06, "loss": 0.8078, "step": 39805 }, { "epoch": 0.48517421666483856, "grad_norm": 1.9199810028076172, "learning_rate": 2.7096856959589484e-06, "loss": 0.7434, "step": 39810 }, { "epoch": 0.4852351528889864, "grad_norm": 2.054774284362793, "learning_rate": 2.7093649775497114e-06, "loss": 0.9047, "step": 39815 }, { "epoch": 0.4852960891131342, "grad_norm": 1.8723251819610596, "learning_rate": 2.709044259140475e-06, "loss": 0.8298, "step": 39820 }, { "epoch": 0.485357025337282, "grad_norm": 1.841751217842102, "learning_rate": 2.7087235407312383e-06, "loss": 0.8801, "step": 39825 }, { "epoch": 0.4854179615614298, "grad_norm": 1.7607513666152954, "learning_rate": 2.7084028223220017e-06, "loss": 0.8685, "step": 39830 }, { "epoch": 0.4854788977855776, "grad_norm": 2.0504984855651855, "learning_rate": 2.7080821039127648e-06, "loss": 0.8551, "step": 39835 }, { "epoch": 0.48553983400972545, "grad_norm": 1.5893399715423584, "learning_rate": 2.7077613855035278e-06, "loss": 0.8466, "step": 39840 }, { "epoch": 0.4856007702338732, "grad_norm": 1.7151814699172974, "learning_rate": 2.7074406670942916e-06, "loss": 0.8415, "step": 39845 }, { "epoch": 0.48566170645802104, "grad_norm": 1.7240707874298096, "learning_rate": 2.7071199486850547e-06, "loss": 0.8206, "step": 39850 }, { "epoch": 0.48572264268216886, "grad_norm": 2.119290828704834, "learning_rate": 2.7067992302758177e-06, "loss": 0.8298, "step": 39855 }, { "epoch": 0.4857835789063166, "grad_norm": 1.7060061693191528, "learning_rate": 2.7064785118665815e-06, "loss": 0.8609, "step": 39860 }, { "epoch": 0.48584451513046445, "grad_norm": 2.073505163192749, "learning_rate": 2.7061577934573446e-06, "loss": 0.8828, "step": 39865 }, { "epoch": 0.48590545135461227, "grad_norm": 1.7580028772354126, "learning_rate": 2.7058370750481076e-06, "loss": 0.8737, "step": 39870 }, { "epoch": 0.4859663875787601, "grad_norm": 2.4495773315429688, "learning_rate": 2.7055163566388714e-06, "loss": 0.866, "step": 39875 }, { "epoch": 0.48602732380290786, "grad_norm": 1.7894314527511597, "learning_rate": 2.7051956382296345e-06, "loss": 0.8549, "step": 39880 }, { "epoch": 0.4860882600270557, "grad_norm": 2.281726121902466, "learning_rate": 2.704874919820398e-06, "loss": 0.8896, "step": 39885 }, { "epoch": 0.4861491962512035, "grad_norm": 2.077310800552368, "learning_rate": 2.7045542014111613e-06, "loss": 0.8385, "step": 39890 }, { "epoch": 0.4862101324753513, "grad_norm": 2.0766000747680664, "learning_rate": 2.7042334830019248e-06, "loss": 0.8478, "step": 39895 }, { "epoch": 0.4862710686994991, "grad_norm": 1.8396574258804321, "learning_rate": 2.703912764592688e-06, "loss": 0.8169, "step": 39900 }, { "epoch": 0.4863320049236469, "grad_norm": 2.096569061279297, "learning_rate": 2.7035920461834512e-06, "loss": 0.8086, "step": 39905 }, { "epoch": 0.48639294114779474, "grad_norm": 1.875910758972168, "learning_rate": 2.7032713277742147e-06, "loss": 0.8359, "step": 39910 }, { "epoch": 0.4864538773719425, "grad_norm": 2.0632781982421875, "learning_rate": 2.7029506093649777e-06, "loss": 0.8156, "step": 39915 }, { "epoch": 0.48651481359609033, "grad_norm": 2.8202242851257324, "learning_rate": 2.7026298909557416e-06, "loss": 0.8201, "step": 39920 }, { "epoch": 0.48657574982023816, "grad_norm": 2.0056169033050537, "learning_rate": 2.7023091725465046e-06, "loss": 0.9221, "step": 39925 }, { "epoch": 0.4866366860443859, "grad_norm": 1.9055174589157104, "learning_rate": 2.7019884541372676e-06, "loss": 0.8375, "step": 39930 }, { "epoch": 0.48669762226853375, "grad_norm": 1.7070726156234741, "learning_rate": 2.7016677357280306e-06, "loss": 0.8279, "step": 39935 }, { "epoch": 0.48675855849268157, "grad_norm": 2.0570948123931885, "learning_rate": 2.7013470173187945e-06, "loss": 0.846, "step": 39940 }, { "epoch": 0.4868194947168294, "grad_norm": 2.061706304550171, "learning_rate": 2.7010262989095575e-06, "loss": 0.8447, "step": 39945 }, { "epoch": 0.48688043094097716, "grad_norm": 2.0471930503845215, "learning_rate": 2.700705580500321e-06, "loss": 0.8623, "step": 39950 }, { "epoch": 0.486941367165125, "grad_norm": 1.917981743812561, "learning_rate": 2.7003848620910844e-06, "loss": 0.8485, "step": 39955 }, { "epoch": 0.4870023033892728, "grad_norm": 2.1827404499053955, "learning_rate": 2.7000641436818474e-06, "loss": 0.8311, "step": 39960 }, { "epoch": 0.4870632396134206, "grad_norm": 2.694434642791748, "learning_rate": 2.699743425272611e-06, "loss": 0.8609, "step": 39965 }, { "epoch": 0.4871241758375684, "grad_norm": 2.071211576461792, "learning_rate": 2.6994227068633743e-06, "loss": 0.864, "step": 39970 }, { "epoch": 0.4871851120617162, "grad_norm": 1.7027641534805298, "learning_rate": 2.6991019884541377e-06, "loss": 0.8487, "step": 39975 }, { "epoch": 0.48724604828586404, "grad_norm": 2.1042943000793457, "learning_rate": 2.6987812700449007e-06, "loss": 0.8066, "step": 39980 }, { "epoch": 0.4873069845100118, "grad_norm": 1.8843128681182861, "learning_rate": 2.698460551635664e-06, "loss": 0.7647, "step": 39985 }, { "epoch": 0.48736792073415963, "grad_norm": 2.2843282222747803, "learning_rate": 2.6981398332264276e-06, "loss": 0.8613, "step": 39990 }, { "epoch": 0.48742885695830745, "grad_norm": 2.238664150238037, "learning_rate": 2.6978191148171906e-06, "loss": 0.8031, "step": 39995 }, { "epoch": 0.4874897931824552, "grad_norm": 1.9687647819519043, "learning_rate": 2.6974983964079545e-06, "loss": 0.7993, "step": 40000 }, { "epoch": 0.48755072940660305, "grad_norm": 1.7605433464050293, "learning_rate": 2.6971776779987175e-06, "loss": 0.796, "step": 40005 }, { "epoch": 0.48761166563075087, "grad_norm": 2.02964186668396, "learning_rate": 2.6968569595894805e-06, "loss": 0.778, "step": 40010 }, { "epoch": 0.4876726018548987, "grad_norm": 2.725862979888916, "learning_rate": 2.6965362411802435e-06, "loss": 0.821, "step": 40015 }, { "epoch": 0.48773353807904646, "grad_norm": 1.6493436098098755, "learning_rate": 2.6962155227710074e-06, "loss": 0.8516, "step": 40020 }, { "epoch": 0.4877944743031943, "grad_norm": 1.8809038400650024, "learning_rate": 2.6958948043617704e-06, "loss": 0.8362, "step": 40025 }, { "epoch": 0.4878554105273421, "grad_norm": 1.8594797849655151, "learning_rate": 2.695574085952534e-06, "loss": 0.8392, "step": 40030 }, { "epoch": 0.48791634675148987, "grad_norm": 1.8940333127975464, "learning_rate": 2.6952533675432973e-06, "loss": 0.805, "step": 40035 }, { "epoch": 0.4879772829756377, "grad_norm": 1.852413296699524, "learning_rate": 2.6949326491340603e-06, "loss": 0.8146, "step": 40040 }, { "epoch": 0.4880382191997855, "grad_norm": 1.945694923400879, "learning_rate": 2.6946119307248237e-06, "loss": 0.8895, "step": 40045 }, { "epoch": 0.4880991554239333, "grad_norm": 2.125624179840088, "learning_rate": 2.694291212315587e-06, "loss": 0.8465, "step": 40050 }, { "epoch": 0.4881600916480811, "grad_norm": 2.0879483222961426, "learning_rate": 2.6939704939063506e-06, "loss": 0.7844, "step": 40055 }, { "epoch": 0.48822102787222893, "grad_norm": 1.664243459701538, "learning_rate": 2.6936497754971136e-06, "loss": 0.8461, "step": 40060 }, { "epoch": 0.48828196409637675, "grad_norm": 1.8321077823638916, "learning_rate": 2.693329057087877e-06, "loss": 0.8004, "step": 40065 }, { "epoch": 0.4883429003205245, "grad_norm": 1.8328791856765747, "learning_rate": 2.6930083386786405e-06, "loss": 0.8091, "step": 40070 }, { "epoch": 0.48840383654467234, "grad_norm": 1.911524772644043, "learning_rate": 2.6926876202694035e-06, "loss": 0.7612, "step": 40075 }, { "epoch": 0.48846477276882017, "grad_norm": 1.920837640762329, "learning_rate": 2.6923669018601674e-06, "loss": 0.8582, "step": 40080 }, { "epoch": 0.48852570899296793, "grad_norm": 1.9018205404281616, "learning_rate": 2.6920461834509304e-06, "loss": 0.8467, "step": 40085 }, { "epoch": 0.48858664521711576, "grad_norm": 1.9192386865615845, "learning_rate": 2.6917254650416934e-06, "loss": 0.8542, "step": 40090 }, { "epoch": 0.4886475814412636, "grad_norm": 1.9633945226669312, "learning_rate": 2.6914047466324565e-06, "loss": 0.8811, "step": 40095 }, { "epoch": 0.4887085176654114, "grad_norm": 2.2137956619262695, "learning_rate": 2.6910840282232203e-06, "loss": 0.8252, "step": 40100 }, { "epoch": 0.48876945388955917, "grad_norm": 2.2027411460876465, "learning_rate": 2.6907633098139833e-06, "loss": 0.8937, "step": 40105 }, { "epoch": 0.488830390113707, "grad_norm": 1.5602136850357056, "learning_rate": 2.6904425914047468e-06, "loss": 0.8331, "step": 40110 }, { "epoch": 0.4888913263378548, "grad_norm": 1.933852195739746, "learning_rate": 2.6901218729955102e-06, "loss": 0.8607, "step": 40115 }, { "epoch": 0.4889522625620026, "grad_norm": 2.1674370765686035, "learning_rate": 2.6898011545862732e-06, "loss": 0.7951, "step": 40120 }, { "epoch": 0.4890131987861504, "grad_norm": 1.7250416278839111, "learning_rate": 2.6894804361770367e-06, "loss": 0.8742, "step": 40125 }, { "epoch": 0.4890741350102982, "grad_norm": 1.6293338537216187, "learning_rate": 2.6891597177678e-06, "loss": 0.8294, "step": 40130 }, { "epoch": 0.48913507123444605, "grad_norm": 1.7778377532958984, "learning_rate": 2.6888389993585636e-06, "loss": 0.8346, "step": 40135 }, { "epoch": 0.4891960074585938, "grad_norm": 2.1641857624053955, "learning_rate": 2.6885182809493266e-06, "loss": 0.8372, "step": 40140 }, { "epoch": 0.48925694368274164, "grad_norm": 2.4875736236572266, "learning_rate": 2.6881975625400904e-06, "loss": 0.8322, "step": 40145 }, { "epoch": 0.48931787990688946, "grad_norm": 2.0744011402130127, "learning_rate": 2.6878768441308535e-06, "loss": 0.8199, "step": 40150 }, { "epoch": 0.48937881613103723, "grad_norm": 2.0472185611724854, "learning_rate": 2.6875561257216165e-06, "loss": 0.8772, "step": 40155 }, { "epoch": 0.48943975235518505, "grad_norm": 1.8272969722747803, "learning_rate": 2.6872354073123803e-06, "loss": 0.788, "step": 40160 }, { "epoch": 0.4895006885793329, "grad_norm": 1.7954949140548706, "learning_rate": 2.6869146889031434e-06, "loss": 0.8294, "step": 40165 }, { "epoch": 0.4895616248034807, "grad_norm": 2.1508748531341553, "learning_rate": 2.6865939704939064e-06, "loss": 0.8171, "step": 40170 }, { "epoch": 0.48962256102762847, "grad_norm": 2.1720101833343506, "learning_rate": 2.68627325208467e-06, "loss": 0.8175, "step": 40175 }, { "epoch": 0.4896834972517763, "grad_norm": 1.8177690505981445, "learning_rate": 2.6859525336754332e-06, "loss": 0.8857, "step": 40180 }, { "epoch": 0.4897444334759241, "grad_norm": 1.695083498954773, "learning_rate": 2.6856318152661963e-06, "loss": 0.8485, "step": 40185 }, { "epoch": 0.4898053697000719, "grad_norm": 1.9153987169265747, "learning_rate": 2.6853110968569597e-06, "loss": 0.8758, "step": 40190 }, { "epoch": 0.4898663059242197, "grad_norm": 1.7542016506195068, "learning_rate": 2.684990378447723e-06, "loss": 0.7628, "step": 40195 }, { "epoch": 0.4899272421483675, "grad_norm": 2.388669729232788, "learning_rate": 2.6846696600384866e-06, "loss": 0.9003, "step": 40200 }, { "epoch": 0.48998817837251535, "grad_norm": 2.5479843616485596, "learning_rate": 2.6843489416292496e-06, "loss": 0.8248, "step": 40205 }, { "epoch": 0.4900491145966631, "grad_norm": 2.313481569290161, "learning_rate": 2.684028223220013e-06, "loss": 0.8623, "step": 40210 }, { "epoch": 0.49011005082081094, "grad_norm": 2.3600196838378906, "learning_rate": 2.6837075048107765e-06, "loss": 0.8309, "step": 40215 }, { "epoch": 0.49017098704495876, "grad_norm": 2.3170340061187744, "learning_rate": 2.6833867864015395e-06, "loss": 0.7532, "step": 40220 }, { "epoch": 0.49023192326910653, "grad_norm": 1.7098431587219238, "learning_rate": 2.6830660679923034e-06, "loss": 0.8161, "step": 40225 }, { "epoch": 0.49029285949325435, "grad_norm": 2.020900011062622, "learning_rate": 2.6827453495830664e-06, "loss": 0.7964, "step": 40230 }, { "epoch": 0.4903537957174022, "grad_norm": 1.6635029315948486, "learning_rate": 2.6824246311738294e-06, "loss": 0.9009, "step": 40235 }, { "epoch": 0.49041473194155, "grad_norm": 1.8963158130645752, "learning_rate": 2.6821039127645933e-06, "loss": 0.8954, "step": 40240 }, { "epoch": 0.49047566816569776, "grad_norm": 1.7831562757492065, "learning_rate": 2.6817831943553563e-06, "loss": 0.7863, "step": 40245 }, { "epoch": 0.4905366043898456, "grad_norm": 1.8022531270980835, "learning_rate": 2.6814624759461193e-06, "loss": 0.757, "step": 40250 }, { "epoch": 0.4905975406139934, "grad_norm": 2.0841007232666016, "learning_rate": 2.681141757536883e-06, "loss": 0.8137, "step": 40255 }, { "epoch": 0.4906584768381412, "grad_norm": 1.952199935913086, "learning_rate": 2.680821039127646e-06, "loss": 0.9303, "step": 40260 }, { "epoch": 0.490719413062289, "grad_norm": 2.7194886207580566, "learning_rate": 2.680500320718409e-06, "loss": 0.8631, "step": 40265 }, { "epoch": 0.4907803492864368, "grad_norm": 1.6773101091384888, "learning_rate": 2.6801796023091726e-06, "loss": 0.7848, "step": 40270 }, { "epoch": 0.49084128551058465, "grad_norm": 2.494015693664551, "learning_rate": 2.679858883899936e-06, "loss": 0.8804, "step": 40275 }, { "epoch": 0.4909022217347324, "grad_norm": 1.7864512205123901, "learning_rate": 2.6795381654906995e-06, "loss": 0.8574, "step": 40280 }, { "epoch": 0.49096315795888024, "grad_norm": 1.7386474609375, "learning_rate": 2.6792174470814625e-06, "loss": 0.8174, "step": 40285 }, { "epoch": 0.49102409418302806, "grad_norm": 2.062377452850342, "learning_rate": 2.678896728672226e-06, "loss": 0.7453, "step": 40290 }, { "epoch": 0.4910850304071758, "grad_norm": 1.7044258117675781, "learning_rate": 2.6785760102629894e-06, "loss": 0.7852, "step": 40295 }, { "epoch": 0.49114596663132365, "grad_norm": 1.9652763605117798, "learning_rate": 2.6782552918537524e-06, "loss": 0.8443, "step": 40300 }, { "epoch": 0.4912069028554715, "grad_norm": 2.056546449661255, "learning_rate": 2.6779345734445163e-06, "loss": 0.8555, "step": 40305 }, { "epoch": 0.4912678390796193, "grad_norm": 2.167748212814331, "learning_rate": 2.6776138550352793e-06, "loss": 0.8495, "step": 40310 }, { "epoch": 0.49132877530376706, "grad_norm": 1.9746015071868896, "learning_rate": 2.6772931366260423e-06, "loss": 0.7535, "step": 40315 }, { "epoch": 0.4913897115279149, "grad_norm": 1.6187831163406372, "learning_rate": 2.676972418216806e-06, "loss": 0.8019, "step": 40320 }, { "epoch": 0.4914506477520627, "grad_norm": 1.738517165184021, "learning_rate": 2.676651699807569e-06, "loss": 0.8362, "step": 40325 }, { "epoch": 0.4915115839762105, "grad_norm": 2.073916435241699, "learning_rate": 2.6763309813983322e-06, "loss": 0.9029, "step": 40330 }, { "epoch": 0.4915725202003583, "grad_norm": 1.852410078048706, "learning_rate": 2.676010262989096e-06, "loss": 0.8581, "step": 40335 }, { "epoch": 0.4916334564245061, "grad_norm": 2.136760711669922, "learning_rate": 2.675689544579859e-06, "loss": 0.825, "step": 40340 }, { "epoch": 0.49169439264865394, "grad_norm": 2.0966427326202393, "learning_rate": 2.675368826170622e-06, "loss": 0.8752, "step": 40345 }, { "epoch": 0.4917553288728017, "grad_norm": 1.91632878780365, "learning_rate": 2.6750481077613856e-06, "loss": 0.7762, "step": 40350 }, { "epoch": 0.49181626509694953, "grad_norm": 2.0580930709838867, "learning_rate": 2.674727389352149e-06, "loss": 0.8149, "step": 40355 }, { "epoch": 0.49187720132109736, "grad_norm": 1.6504638195037842, "learning_rate": 2.6744066709429124e-06, "loss": 0.8058, "step": 40360 }, { "epoch": 0.4919381375452451, "grad_norm": 1.836727261543274, "learning_rate": 2.6740859525336755e-06, "loss": 0.7943, "step": 40365 }, { "epoch": 0.49199907376939295, "grad_norm": 1.721693754196167, "learning_rate": 2.6737652341244393e-06, "loss": 0.8697, "step": 40370 }, { "epoch": 0.49206000999354077, "grad_norm": 1.709200382232666, "learning_rate": 2.6734445157152023e-06, "loss": 0.8055, "step": 40375 }, { "epoch": 0.4921209462176886, "grad_norm": 1.788905382156372, "learning_rate": 2.6731237973059654e-06, "loss": 0.8886, "step": 40380 }, { "epoch": 0.49218188244183636, "grad_norm": 2.025273561477661, "learning_rate": 2.6728030788967292e-06, "loss": 0.8825, "step": 40385 }, { "epoch": 0.4922428186659842, "grad_norm": 2.224924325942993, "learning_rate": 2.6724823604874922e-06, "loss": 0.8477, "step": 40390 }, { "epoch": 0.492303754890132, "grad_norm": 2.087651491165161, "learning_rate": 2.6721616420782553e-06, "loss": 0.8384, "step": 40395 }, { "epoch": 0.4923646911142798, "grad_norm": 1.9907423257827759, "learning_rate": 2.671840923669019e-06, "loss": 0.8517, "step": 40400 }, { "epoch": 0.4924256273384276, "grad_norm": 1.9804155826568604, "learning_rate": 2.671520205259782e-06, "loss": 0.8489, "step": 40405 }, { "epoch": 0.4924865635625754, "grad_norm": 2.2846856117248535, "learning_rate": 2.671199486850545e-06, "loss": 0.9428, "step": 40410 }, { "epoch": 0.49254749978672324, "grad_norm": 2.0728394985198975, "learning_rate": 2.670878768441309e-06, "loss": 0.8829, "step": 40415 }, { "epoch": 0.492608436010871, "grad_norm": 1.9171223640441895, "learning_rate": 2.670558050032072e-06, "loss": 0.8599, "step": 40420 }, { "epoch": 0.49266937223501883, "grad_norm": 1.9656169414520264, "learning_rate": 2.6702373316228355e-06, "loss": 0.8351, "step": 40425 }, { "epoch": 0.49273030845916665, "grad_norm": 1.6834447383880615, "learning_rate": 2.6699166132135985e-06, "loss": 0.8973, "step": 40430 }, { "epoch": 0.4927912446833144, "grad_norm": 1.8718172311782837, "learning_rate": 2.669595894804362e-06, "loss": 0.8354, "step": 40435 }, { "epoch": 0.49285218090746225, "grad_norm": 2.0618278980255127, "learning_rate": 2.6692751763951254e-06, "loss": 0.8643, "step": 40440 }, { "epoch": 0.49291311713161007, "grad_norm": 2.008164644241333, "learning_rate": 2.6689544579858884e-06, "loss": 0.8564, "step": 40445 }, { "epoch": 0.4929740533557579, "grad_norm": 2.031921863555908, "learning_rate": 2.6686337395766522e-06, "loss": 0.8289, "step": 40450 }, { "epoch": 0.49303498957990566, "grad_norm": 1.672338843345642, "learning_rate": 2.6683130211674153e-06, "loss": 0.8399, "step": 40455 }, { "epoch": 0.4930959258040535, "grad_norm": 2.673820972442627, "learning_rate": 2.6679923027581783e-06, "loss": 0.8433, "step": 40460 }, { "epoch": 0.4931568620282013, "grad_norm": 1.9001870155334473, "learning_rate": 2.667671584348942e-06, "loss": 0.8805, "step": 40465 }, { "epoch": 0.49321779825234907, "grad_norm": 2.0585451126098633, "learning_rate": 2.667350865939705e-06, "loss": 0.806, "step": 40470 }, { "epoch": 0.4932787344764969, "grad_norm": 1.8523235321044922, "learning_rate": 2.667030147530468e-06, "loss": 0.9013, "step": 40475 }, { "epoch": 0.4933396707006447, "grad_norm": 2.1452219486236572, "learning_rate": 2.666709429121232e-06, "loss": 0.8897, "step": 40480 }, { "epoch": 0.49340060692479254, "grad_norm": 1.959816575050354, "learning_rate": 2.666388710711995e-06, "loss": 0.8285, "step": 40485 }, { "epoch": 0.4934615431489403, "grad_norm": 2.2578203678131104, "learning_rate": 2.666067992302758e-06, "loss": 0.863, "step": 40490 }, { "epoch": 0.49352247937308813, "grad_norm": 1.9393972158432007, "learning_rate": 2.665747273893522e-06, "loss": 0.8369, "step": 40495 }, { "epoch": 0.49358341559723595, "grad_norm": 1.9307376146316528, "learning_rate": 2.665426555484285e-06, "loss": 0.8339, "step": 40500 }, { "epoch": 0.4936443518213837, "grad_norm": 1.9095673561096191, "learning_rate": 2.6651058370750484e-06, "loss": 0.8267, "step": 40505 }, { "epoch": 0.49370528804553154, "grad_norm": 1.9235880374908447, "learning_rate": 2.664785118665812e-06, "loss": 0.7936, "step": 40510 }, { "epoch": 0.49376622426967937, "grad_norm": 2.0996479988098145, "learning_rate": 2.664464400256575e-06, "loss": 0.9333, "step": 40515 }, { "epoch": 0.49382716049382713, "grad_norm": 2.502300500869751, "learning_rate": 2.6641436818473383e-06, "loss": 0.8131, "step": 40520 }, { "epoch": 0.49388809671797496, "grad_norm": 1.9560209512710571, "learning_rate": 2.6638229634381013e-06, "loss": 0.9406, "step": 40525 }, { "epoch": 0.4939490329421228, "grad_norm": 2.0203232765197754, "learning_rate": 2.663502245028865e-06, "loss": 0.8928, "step": 40530 }, { "epoch": 0.4940099691662706, "grad_norm": 2.0574707984924316, "learning_rate": 2.663181526619628e-06, "loss": 0.8002, "step": 40535 }, { "epoch": 0.49407090539041837, "grad_norm": 1.9059733152389526, "learning_rate": 2.662860808210391e-06, "loss": 0.8511, "step": 40540 }, { "epoch": 0.4941318416145662, "grad_norm": 2.1944921016693115, "learning_rate": 2.662540089801155e-06, "loss": 0.8725, "step": 40545 }, { "epoch": 0.494192777838714, "grad_norm": 2.0885634422302246, "learning_rate": 2.662219371391918e-06, "loss": 0.8203, "step": 40550 }, { "epoch": 0.4942537140628618, "grad_norm": 2.2785987854003906, "learning_rate": 2.661898652982681e-06, "loss": 0.903, "step": 40555 }, { "epoch": 0.4943146502870096, "grad_norm": 1.908605694770813, "learning_rate": 2.661577934573445e-06, "loss": 0.9182, "step": 40560 }, { "epoch": 0.4943755865111574, "grad_norm": 2.019047498703003, "learning_rate": 2.661257216164208e-06, "loss": 0.8517, "step": 40565 }, { "epoch": 0.49443652273530525, "grad_norm": 1.8136264085769653, "learning_rate": 2.660936497754971e-06, "loss": 0.8584, "step": 40570 }, { "epoch": 0.494497458959453, "grad_norm": 2.2620623111724854, "learning_rate": 2.660615779345735e-06, "loss": 0.883, "step": 40575 }, { "epoch": 0.49455839518360084, "grad_norm": 2.0102956295013428, "learning_rate": 2.660295060936498e-06, "loss": 0.9156, "step": 40580 }, { "epoch": 0.49461933140774866, "grad_norm": 2.002595901489258, "learning_rate": 2.6599743425272613e-06, "loss": 0.8444, "step": 40585 }, { "epoch": 0.49468026763189643, "grad_norm": 2.7383601665496826, "learning_rate": 2.6596536241180248e-06, "loss": 0.8201, "step": 40590 }, { "epoch": 0.49474120385604425, "grad_norm": 2.3525679111480713, "learning_rate": 2.659332905708788e-06, "loss": 0.7445, "step": 40595 }, { "epoch": 0.4948021400801921, "grad_norm": 1.8618675470352173, "learning_rate": 2.6590121872995512e-06, "loss": 0.7598, "step": 40600 }, { "epoch": 0.4948630763043399, "grad_norm": 1.9267808198928833, "learning_rate": 2.6586914688903142e-06, "loss": 0.8317, "step": 40605 }, { "epoch": 0.49492401252848767, "grad_norm": 1.8120423555374146, "learning_rate": 2.658370750481078e-06, "loss": 0.7422, "step": 40610 }, { "epoch": 0.4949849487526355, "grad_norm": 1.8226635456085205, "learning_rate": 2.658050032071841e-06, "loss": 0.8773, "step": 40615 }, { "epoch": 0.4950458849767833, "grad_norm": 1.8629330396652222, "learning_rate": 2.657729313662604e-06, "loss": 0.8652, "step": 40620 }, { "epoch": 0.4951068212009311, "grad_norm": 2.059284210205078, "learning_rate": 2.657408595253368e-06, "loss": 0.8762, "step": 40625 }, { "epoch": 0.4951677574250789, "grad_norm": 1.8224269151687622, "learning_rate": 2.657087876844131e-06, "loss": 0.8476, "step": 40630 }, { "epoch": 0.4952286936492267, "grad_norm": 1.7239998579025269, "learning_rate": 2.656767158434894e-06, "loss": 0.792, "step": 40635 }, { "epoch": 0.49528962987337455, "grad_norm": 1.4929797649383545, "learning_rate": 2.656446440025658e-06, "loss": 0.9237, "step": 40640 }, { "epoch": 0.4953505660975223, "grad_norm": 1.8404691219329834, "learning_rate": 2.656125721616421e-06, "loss": 0.8989, "step": 40645 }, { "epoch": 0.49541150232167014, "grad_norm": 1.9238873720169067, "learning_rate": 2.6558050032071844e-06, "loss": 0.8365, "step": 40650 }, { "epoch": 0.49547243854581796, "grad_norm": 1.8451308012008667, "learning_rate": 2.655484284797948e-06, "loss": 0.8713, "step": 40655 }, { "epoch": 0.49553337476996573, "grad_norm": 2.6063640117645264, "learning_rate": 2.655163566388711e-06, "loss": 0.8096, "step": 40660 }, { "epoch": 0.49559431099411355, "grad_norm": 2.2725026607513428, "learning_rate": 2.6548428479794743e-06, "loss": 0.8408, "step": 40665 }, { "epoch": 0.4956552472182614, "grad_norm": 2.0692412853240967, "learning_rate": 2.6545221295702377e-06, "loss": 0.8076, "step": 40670 }, { "epoch": 0.4957161834424092, "grad_norm": 1.8169026374816895, "learning_rate": 2.654201411161001e-06, "loss": 0.8211, "step": 40675 }, { "epoch": 0.49577711966655696, "grad_norm": 1.9191635847091675, "learning_rate": 2.653880692751764e-06, "loss": 0.8574, "step": 40680 }, { "epoch": 0.4958380558907048, "grad_norm": 1.9371975660324097, "learning_rate": 2.653559974342527e-06, "loss": 0.8409, "step": 40685 }, { "epoch": 0.4958989921148526, "grad_norm": 1.7712262868881226, "learning_rate": 2.653239255933291e-06, "loss": 0.8963, "step": 40690 }, { "epoch": 0.4959599283390004, "grad_norm": 1.773909330368042, "learning_rate": 2.652918537524054e-06, "loss": 0.8052, "step": 40695 }, { "epoch": 0.4960208645631482, "grad_norm": 1.6315685510635376, "learning_rate": 2.652597819114817e-06, "loss": 0.8272, "step": 40700 }, { "epoch": 0.496081800787296, "grad_norm": 2.3682870864868164, "learning_rate": 2.652277100705581e-06, "loss": 0.932, "step": 40705 }, { "epoch": 0.49614273701144385, "grad_norm": 1.7945687770843506, "learning_rate": 2.651956382296344e-06, "loss": 0.8087, "step": 40710 }, { "epoch": 0.4962036732355916, "grad_norm": 1.8209259510040283, "learning_rate": 2.651635663887107e-06, "loss": 0.8166, "step": 40715 }, { "epoch": 0.49626460945973944, "grad_norm": 2.0864076614379883, "learning_rate": 2.651314945477871e-06, "loss": 0.851, "step": 40720 }, { "epoch": 0.49632554568388726, "grad_norm": 1.8630770444869995, "learning_rate": 2.650994227068634e-06, "loss": 0.8387, "step": 40725 }, { "epoch": 0.496386481908035, "grad_norm": 1.968095064163208, "learning_rate": 2.6506735086593973e-06, "loss": 0.8662, "step": 40730 }, { "epoch": 0.49644741813218285, "grad_norm": 1.983931303024292, "learning_rate": 2.6503527902501607e-06, "loss": 0.8396, "step": 40735 }, { "epoch": 0.4965083543563307, "grad_norm": 1.9475924968719482, "learning_rate": 2.6500320718409237e-06, "loss": 0.7746, "step": 40740 }, { "epoch": 0.4965692905804785, "grad_norm": 2.222165822982788, "learning_rate": 2.649711353431687e-06, "loss": 0.8607, "step": 40745 }, { "epoch": 0.49663022680462626, "grad_norm": 1.828777551651001, "learning_rate": 2.6493906350224506e-06, "loss": 0.7894, "step": 40750 }, { "epoch": 0.4966911630287741, "grad_norm": 1.7922660112380981, "learning_rate": 2.649069916613214e-06, "loss": 0.7598, "step": 40755 }, { "epoch": 0.4967520992529219, "grad_norm": 2.438717842102051, "learning_rate": 2.648749198203977e-06, "loss": 0.8103, "step": 40760 }, { "epoch": 0.4968130354770697, "grad_norm": 1.89734947681427, "learning_rate": 2.64842847979474e-06, "loss": 0.8641, "step": 40765 }, { "epoch": 0.4968739717012175, "grad_norm": 1.9935791492462158, "learning_rate": 2.648107761385504e-06, "loss": 0.815, "step": 40770 }, { "epoch": 0.4969349079253653, "grad_norm": 2.2201156616210938, "learning_rate": 2.647787042976267e-06, "loss": 0.8947, "step": 40775 }, { "epoch": 0.49699584414951314, "grad_norm": 1.9984686374664307, "learning_rate": 2.64746632456703e-06, "loss": 0.8056, "step": 40780 }, { "epoch": 0.4970567803736609, "grad_norm": 2.1846628189086914, "learning_rate": 2.647145606157794e-06, "loss": 0.9004, "step": 40785 }, { "epoch": 0.49711771659780873, "grad_norm": 2.3967671394348145, "learning_rate": 2.646824887748557e-06, "loss": 0.8693, "step": 40790 }, { "epoch": 0.49717865282195656, "grad_norm": 1.9994300603866577, "learning_rate": 2.64650416933932e-06, "loss": 0.857, "step": 40795 }, { "epoch": 0.4972395890461043, "grad_norm": 2.291379451751709, "learning_rate": 2.6461834509300838e-06, "loss": 0.8517, "step": 40800 }, { "epoch": 0.49730052527025215, "grad_norm": 1.8876346349716187, "learning_rate": 2.6458627325208468e-06, "loss": 0.8819, "step": 40805 }, { "epoch": 0.49736146149439997, "grad_norm": 2.13895845413208, "learning_rate": 2.64554201411161e-06, "loss": 0.785, "step": 40810 }, { "epoch": 0.4974223977185478, "grad_norm": 1.8710192441940308, "learning_rate": 2.6452212957023736e-06, "loss": 0.8378, "step": 40815 }, { "epoch": 0.49748333394269556, "grad_norm": 1.9719775915145874, "learning_rate": 2.6449005772931367e-06, "loss": 0.8727, "step": 40820 }, { "epoch": 0.4975442701668434, "grad_norm": 1.9541735649108887, "learning_rate": 2.6445798588839e-06, "loss": 0.7876, "step": 40825 }, { "epoch": 0.4976052063909912, "grad_norm": 2.1903138160705566, "learning_rate": 2.6442591404746635e-06, "loss": 0.8339, "step": 40830 }, { "epoch": 0.497666142615139, "grad_norm": 2.0935440063476562, "learning_rate": 2.643938422065427e-06, "loss": 0.9135, "step": 40835 }, { "epoch": 0.4977270788392868, "grad_norm": 1.7338745594024658, "learning_rate": 2.64361770365619e-06, "loss": 0.8439, "step": 40840 }, { "epoch": 0.4977880150634346, "grad_norm": 2.2305428981781006, "learning_rate": 2.643296985246954e-06, "loss": 0.8809, "step": 40845 }, { "epoch": 0.49784895128758244, "grad_norm": 1.8697326183319092, "learning_rate": 2.642976266837717e-06, "loss": 0.839, "step": 40850 }, { "epoch": 0.4979098875117302, "grad_norm": 2.1025948524475098, "learning_rate": 2.64265554842848e-06, "loss": 0.7993, "step": 40855 }, { "epoch": 0.49797082373587803, "grad_norm": 2.325432777404785, "learning_rate": 2.642334830019243e-06, "loss": 0.8663, "step": 40860 }, { "epoch": 0.49803175996002585, "grad_norm": 1.637768268585205, "learning_rate": 2.6420141116100068e-06, "loss": 0.7673, "step": 40865 }, { "epoch": 0.4980926961841736, "grad_norm": 2.0629031658172607, "learning_rate": 2.64169339320077e-06, "loss": 0.7444, "step": 40870 }, { "epoch": 0.49815363240832145, "grad_norm": 2.0710034370422363, "learning_rate": 2.6413726747915332e-06, "loss": 0.8045, "step": 40875 }, { "epoch": 0.49821456863246927, "grad_norm": 1.9743376970291138, "learning_rate": 2.6410519563822967e-06, "loss": 0.8758, "step": 40880 }, { "epoch": 0.4982755048566171, "grad_norm": 2.1462926864624023, "learning_rate": 2.6407312379730597e-06, "loss": 0.8375, "step": 40885 }, { "epoch": 0.49833644108076486, "grad_norm": 2.01977801322937, "learning_rate": 2.640410519563823e-06, "loss": 0.8292, "step": 40890 }, { "epoch": 0.4983973773049127, "grad_norm": 2.268889904022217, "learning_rate": 2.6400898011545866e-06, "loss": 0.8484, "step": 40895 }, { "epoch": 0.4984583135290605, "grad_norm": 1.9263458251953125, "learning_rate": 2.63976908274535e-06, "loss": 0.833, "step": 40900 }, { "epoch": 0.49851924975320827, "grad_norm": 1.800944447517395, "learning_rate": 2.639448364336113e-06, "loss": 0.8437, "step": 40905 }, { "epoch": 0.4985801859773561, "grad_norm": 2.104820489883423, "learning_rate": 2.6391276459268765e-06, "loss": 0.8227, "step": 40910 }, { "epoch": 0.4986411222015039, "grad_norm": 2.060590982437134, "learning_rate": 2.63880692751764e-06, "loss": 0.8707, "step": 40915 }, { "epoch": 0.49870205842565174, "grad_norm": 1.8725879192352295, "learning_rate": 2.638486209108403e-06, "loss": 0.8238, "step": 40920 }, { "epoch": 0.4987629946497995, "grad_norm": 2.2639033794403076, "learning_rate": 2.638165490699167e-06, "loss": 0.745, "step": 40925 }, { "epoch": 0.49882393087394733, "grad_norm": 2.2839605808258057, "learning_rate": 2.63784477228993e-06, "loss": 0.8238, "step": 40930 }, { "epoch": 0.49888486709809515, "grad_norm": 1.761461615562439, "learning_rate": 2.637524053880693e-06, "loss": 0.9042, "step": 40935 }, { "epoch": 0.4989458033222429, "grad_norm": 1.9452269077301025, "learning_rate": 2.637203335471456e-06, "loss": 0.8512, "step": 40940 }, { "epoch": 0.49900673954639074, "grad_norm": 2.16800594329834, "learning_rate": 2.6368826170622197e-06, "loss": 0.8819, "step": 40945 }, { "epoch": 0.49906767577053857, "grad_norm": 2.2205519676208496, "learning_rate": 2.6365618986529827e-06, "loss": 0.8818, "step": 40950 }, { "epoch": 0.4991286119946864, "grad_norm": 1.9419400691986084, "learning_rate": 2.636241180243746e-06, "loss": 0.8762, "step": 40955 }, { "epoch": 0.49918954821883416, "grad_norm": 1.872243881225586, "learning_rate": 2.6359204618345096e-06, "loss": 0.87, "step": 40960 }, { "epoch": 0.499250484442982, "grad_norm": 1.7354552745819092, "learning_rate": 2.6355997434252726e-06, "loss": 0.8445, "step": 40965 }, { "epoch": 0.4993114206671298, "grad_norm": 1.9192113876342773, "learning_rate": 2.635279025016036e-06, "loss": 0.8024, "step": 40970 }, { "epoch": 0.49937235689127757, "grad_norm": 2.0882840156555176, "learning_rate": 2.6349583066067995e-06, "loss": 0.8263, "step": 40975 }, { "epoch": 0.4994332931154254, "grad_norm": 2.052651882171631, "learning_rate": 2.634637588197563e-06, "loss": 0.8485, "step": 40980 }, { "epoch": 0.4994942293395732, "grad_norm": 2.226452112197876, "learning_rate": 2.634316869788326e-06, "loss": 0.8395, "step": 40985 }, { "epoch": 0.499555165563721, "grad_norm": 1.815142035484314, "learning_rate": 2.6339961513790894e-06, "loss": 0.8689, "step": 40990 }, { "epoch": 0.4996161017878688, "grad_norm": 1.9023785591125488, "learning_rate": 2.633675432969853e-06, "loss": 0.8814, "step": 40995 }, { "epoch": 0.4996770380120166, "grad_norm": 1.8188790082931519, "learning_rate": 2.633354714560616e-06, "loss": 0.8016, "step": 41000 }, { "epoch": 0.49973797423616445, "grad_norm": 1.9843864440917969, "learning_rate": 2.6330339961513797e-06, "loss": 0.8808, "step": 41005 }, { "epoch": 0.4997989104603122, "grad_norm": 1.9537652730941772, "learning_rate": 2.6327132777421427e-06, "loss": 0.7937, "step": 41010 }, { "epoch": 0.49985984668446004, "grad_norm": 1.7964506149291992, "learning_rate": 2.6323925593329058e-06, "loss": 0.7975, "step": 41015 }, { "epoch": 0.49992078290860786, "grad_norm": 2.498384475708008, "learning_rate": 2.6320718409236688e-06, "loss": 0.8598, "step": 41020 }, { "epoch": 0.49998171913275563, "grad_norm": 1.745296597480774, "learning_rate": 2.6317511225144326e-06, "loss": 0.77, "step": 41025 }, { "epoch": 0.5000426553569035, "grad_norm": 2.304353952407837, "learning_rate": 2.6314304041051957e-06, "loss": 0.8372, "step": 41030 }, { "epoch": 0.5001035915810512, "grad_norm": 1.7733798027038574, "learning_rate": 2.631109685695959e-06, "loss": 0.8419, "step": 41035 }, { "epoch": 0.500164527805199, "grad_norm": 2.078834056854248, "learning_rate": 2.6307889672867225e-06, "loss": 0.8438, "step": 41040 }, { "epoch": 0.5002254640293469, "grad_norm": 2.3183789253234863, "learning_rate": 2.6304682488774855e-06, "loss": 0.8005, "step": 41045 }, { "epoch": 0.5002864002534947, "grad_norm": 1.9931378364562988, "learning_rate": 2.630147530468249e-06, "loss": 0.7979, "step": 41050 }, { "epoch": 0.5003473364776425, "grad_norm": 1.8601336479187012, "learning_rate": 2.6298268120590124e-06, "loss": 0.8773, "step": 41055 }, { "epoch": 0.5004082727017903, "grad_norm": 2.2903225421905518, "learning_rate": 2.629506093649776e-06, "loss": 0.8531, "step": 41060 }, { "epoch": 0.5004692089259382, "grad_norm": 2.3894290924072266, "learning_rate": 2.629185375240539e-06, "loss": 0.8756, "step": 41065 }, { "epoch": 0.5005301451500859, "grad_norm": 1.7662988901138306, "learning_rate": 2.6288646568313027e-06, "loss": 0.7754, "step": 41070 }, { "epoch": 0.5005910813742337, "grad_norm": 1.9031890630722046, "learning_rate": 2.6285439384220658e-06, "loss": 0.7953, "step": 41075 }, { "epoch": 0.5006520175983815, "grad_norm": 1.8772028684616089, "learning_rate": 2.6282232200128288e-06, "loss": 0.8768, "step": 41080 }, { "epoch": 0.5007129538225293, "grad_norm": 2.200108051300049, "learning_rate": 2.6279025016035926e-06, "loss": 0.8503, "step": 41085 }, { "epoch": 0.5007738900466772, "grad_norm": 1.9626270532608032, "learning_rate": 2.6275817831943557e-06, "loss": 0.8575, "step": 41090 }, { "epoch": 0.500834826270825, "grad_norm": 1.8270187377929688, "learning_rate": 2.6272610647851187e-06, "loss": 0.7797, "step": 41095 }, { "epoch": 0.5008957624949728, "grad_norm": 2.1692755222320557, "learning_rate": 2.6269403463758825e-06, "loss": 0.8222, "step": 41100 }, { "epoch": 0.5009566987191205, "grad_norm": 1.864622712135315, "learning_rate": 2.6266196279666456e-06, "loss": 0.7632, "step": 41105 }, { "epoch": 0.5010176349432683, "grad_norm": 1.6645030975341797, "learning_rate": 2.6262989095574086e-06, "loss": 0.8103, "step": 41110 }, { "epoch": 0.5010785711674162, "grad_norm": 1.950490951538086, "learning_rate": 2.625978191148172e-06, "loss": 0.8612, "step": 41115 }, { "epoch": 0.501139507391564, "grad_norm": 1.5779163837432861, "learning_rate": 2.6256574727389355e-06, "loss": 0.8993, "step": 41120 }, { "epoch": 0.5012004436157118, "grad_norm": 1.9990795850753784, "learning_rate": 2.625336754329699e-06, "loss": 0.8101, "step": 41125 }, { "epoch": 0.5012613798398596, "grad_norm": 2.3289825916290283, "learning_rate": 2.625016035920462e-06, "loss": 0.7909, "step": 41130 }, { "epoch": 0.5013223160640075, "grad_norm": 1.8667203187942505, "learning_rate": 2.6246953175112254e-06, "loss": 0.7565, "step": 41135 }, { "epoch": 0.5013832522881552, "grad_norm": 1.8939790725708008, "learning_rate": 2.624374599101989e-06, "loss": 0.819, "step": 41140 }, { "epoch": 0.501444188512303, "grad_norm": 1.7475318908691406, "learning_rate": 2.624053880692752e-06, "loss": 0.8297, "step": 41145 }, { "epoch": 0.5015051247364508, "grad_norm": 2.0098836421966553, "learning_rate": 2.6237331622835157e-06, "loss": 0.7766, "step": 41150 }, { "epoch": 0.5015660609605986, "grad_norm": 1.772443413734436, "learning_rate": 2.6234124438742787e-06, "loss": 0.8673, "step": 41155 }, { "epoch": 0.5016269971847465, "grad_norm": 1.8935045003890991, "learning_rate": 2.6230917254650417e-06, "loss": 0.8552, "step": 41160 }, { "epoch": 0.5016879334088943, "grad_norm": 1.9230958223342896, "learning_rate": 2.6227710070558056e-06, "loss": 0.8287, "step": 41165 }, { "epoch": 0.5017488696330421, "grad_norm": 2.12080979347229, "learning_rate": 2.6224502886465686e-06, "loss": 0.7891, "step": 41170 }, { "epoch": 0.5018098058571898, "grad_norm": 1.8488423824310303, "learning_rate": 2.6221295702373316e-06, "loss": 0.8457, "step": 41175 }, { "epoch": 0.5018707420813376, "grad_norm": 1.8285715579986572, "learning_rate": 2.6218088518280955e-06, "loss": 0.8582, "step": 41180 }, { "epoch": 0.5019316783054855, "grad_norm": 2.3157408237457275, "learning_rate": 2.6214881334188585e-06, "loss": 0.8881, "step": 41185 }, { "epoch": 0.5019926145296333, "grad_norm": 1.8562520742416382, "learning_rate": 2.6211674150096215e-06, "loss": 0.8616, "step": 41190 }, { "epoch": 0.5020535507537811, "grad_norm": 2.046973466873169, "learning_rate": 2.620846696600385e-06, "loss": 0.7739, "step": 41195 }, { "epoch": 0.5021144869779289, "grad_norm": 3.3082644939422607, "learning_rate": 2.6205259781911484e-06, "loss": 0.8444, "step": 41200 }, { "epoch": 0.5021754232020768, "grad_norm": 2.072028160095215, "learning_rate": 2.620205259781912e-06, "loss": 0.7643, "step": 41205 }, { "epoch": 0.5022363594262245, "grad_norm": 1.7148613929748535, "learning_rate": 2.619884541372675e-06, "loss": 0.794, "step": 41210 }, { "epoch": 0.5022972956503723, "grad_norm": 1.922716498374939, "learning_rate": 2.6195638229634383e-06, "loss": 0.8855, "step": 41215 }, { "epoch": 0.5023582318745201, "grad_norm": 2.033956527709961, "learning_rate": 2.6192431045542017e-06, "loss": 0.875, "step": 41220 }, { "epoch": 0.5024191680986679, "grad_norm": 2.124117612838745, "learning_rate": 2.6189223861449647e-06, "loss": 0.7922, "step": 41225 }, { "epoch": 0.5024801043228158, "grad_norm": 2.0234062671661377, "learning_rate": 2.6186016677357286e-06, "loss": 0.8396, "step": 41230 }, { "epoch": 0.5025410405469636, "grad_norm": 1.5996088981628418, "learning_rate": 2.6182809493264916e-06, "loss": 0.8317, "step": 41235 }, { "epoch": 0.5026019767711114, "grad_norm": 2.2867307662963867, "learning_rate": 2.6179602309172546e-06, "loss": 0.7263, "step": 41240 }, { "epoch": 0.5026629129952591, "grad_norm": 1.9223419427871704, "learning_rate": 2.6176395125080185e-06, "loss": 0.8511, "step": 41245 }, { "epoch": 0.5027238492194069, "grad_norm": 1.8631867170333862, "learning_rate": 2.6173187940987815e-06, "loss": 0.8049, "step": 41250 }, { "epoch": 0.5027847854435548, "grad_norm": 2.062192678451538, "learning_rate": 2.6169980756895445e-06, "loss": 0.8655, "step": 41255 }, { "epoch": 0.5028457216677026, "grad_norm": 2.0390920639038086, "learning_rate": 2.6166773572803084e-06, "loss": 0.8629, "step": 41260 }, { "epoch": 0.5029066578918504, "grad_norm": 1.9700227975845337, "learning_rate": 2.6163566388710714e-06, "loss": 0.8776, "step": 41265 }, { "epoch": 0.5029675941159982, "grad_norm": 2.2049319744110107, "learning_rate": 2.6160359204618344e-06, "loss": 0.8135, "step": 41270 }, { "epoch": 0.503028530340146, "grad_norm": 2.331371545791626, "learning_rate": 2.615715202052598e-06, "loss": 0.802, "step": 41275 }, { "epoch": 0.5030894665642938, "grad_norm": 2.08372163772583, "learning_rate": 2.6153944836433613e-06, "loss": 0.8238, "step": 41280 }, { "epoch": 0.5031504027884416, "grad_norm": 2.0338668823242188, "learning_rate": 2.6150737652341248e-06, "loss": 0.815, "step": 41285 }, { "epoch": 0.5032113390125894, "grad_norm": 1.7825151681900024, "learning_rate": 2.6147530468248878e-06, "loss": 0.8317, "step": 41290 }, { "epoch": 0.5032722752367372, "grad_norm": 1.9645090103149414, "learning_rate": 2.614432328415651e-06, "loss": 0.8871, "step": 41295 }, { "epoch": 0.503333211460885, "grad_norm": 2.503190279006958, "learning_rate": 2.6141116100064147e-06, "loss": 0.907, "step": 41300 }, { "epoch": 0.5033941476850329, "grad_norm": 2.3619213104248047, "learning_rate": 2.6137908915971777e-06, "loss": 0.8519, "step": 41305 }, { "epoch": 0.5034550839091807, "grad_norm": 1.7507636547088623, "learning_rate": 2.6134701731879415e-06, "loss": 0.8349, "step": 41310 }, { "epoch": 0.5035160201333284, "grad_norm": 2.081925392150879, "learning_rate": 2.6131494547787045e-06, "loss": 0.9168, "step": 41315 }, { "epoch": 0.5035769563574762, "grad_norm": 1.8609200716018677, "learning_rate": 2.6128287363694676e-06, "loss": 0.8844, "step": 41320 }, { "epoch": 0.5036378925816241, "grad_norm": 2.102559804916382, "learning_rate": 2.6125080179602314e-06, "loss": 0.8887, "step": 41325 }, { "epoch": 0.5036988288057719, "grad_norm": 1.899685025215149, "learning_rate": 2.6121872995509944e-06, "loss": 0.8359, "step": 41330 }, { "epoch": 0.5037597650299197, "grad_norm": 3.4127209186553955, "learning_rate": 2.6118665811417575e-06, "loss": 0.8057, "step": 41335 }, { "epoch": 0.5038207012540675, "grad_norm": 1.6976516246795654, "learning_rate": 2.6115458627325213e-06, "loss": 0.8578, "step": 41340 }, { "epoch": 0.5038816374782153, "grad_norm": 2.275193691253662, "learning_rate": 2.6112251443232843e-06, "loss": 0.8388, "step": 41345 }, { "epoch": 0.5039425737023631, "grad_norm": 1.8038262128829956, "learning_rate": 2.6109044259140478e-06, "loss": 0.8683, "step": 41350 }, { "epoch": 0.5040035099265109, "grad_norm": 2.85384464263916, "learning_rate": 2.610583707504811e-06, "loss": 0.8753, "step": 41355 }, { "epoch": 0.5040644461506587, "grad_norm": 1.8043568134307861, "learning_rate": 2.6102629890955742e-06, "loss": 0.7857, "step": 41360 }, { "epoch": 0.5041253823748065, "grad_norm": 1.6937932968139648, "learning_rate": 2.6099422706863377e-06, "loss": 0.7926, "step": 41365 }, { "epoch": 0.5041863185989544, "grad_norm": 1.9902352094650269, "learning_rate": 2.6096215522771007e-06, "loss": 0.8324, "step": 41370 }, { "epoch": 0.5042472548231022, "grad_norm": 1.7862071990966797, "learning_rate": 2.6093008338678646e-06, "loss": 0.8955, "step": 41375 }, { "epoch": 0.50430819104725, "grad_norm": 1.8085533380508423, "learning_rate": 2.6089801154586276e-06, "loss": 0.8772, "step": 41380 }, { "epoch": 0.5043691272713977, "grad_norm": 2.1290459632873535, "learning_rate": 2.6086593970493906e-06, "loss": 0.8492, "step": 41385 }, { "epoch": 0.5044300634955455, "grad_norm": 2.1913392543792725, "learning_rate": 2.6083386786401545e-06, "loss": 0.8446, "step": 41390 }, { "epoch": 0.5044909997196934, "grad_norm": 2.208299160003662, "learning_rate": 2.6080179602309175e-06, "loss": 0.8875, "step": 41395 }, { "epoch": 0.5045519359438412, "grad_norm": 1.93479585647583, "learning_rate": 2.6076972418216805e-06, "loss": 0.8587, "step": 41400 }, { "epoch": 0.504612872167989, "grad_norm": 2.9849166870117188, "learning_rate": 2.6073765234124444e-06, "loss": 0.8716, "step": 41405 }, { "epoch": 0.5046738083921368, "grad_norm": 2.2752013206481934, "learning_rate": 2.6070558050032074e-06, "loss": 0.8042, "step": 41410 }, { "epoch": 0.5047347446162846, "grad_norm": 1.7110592126846313, "learning_rate": 2.6067350865939704e-06, "loss": 0.9034, "step": 41415 }, { "epoch": 0.5047956808404324, "grad_norm": 1.9894261360168457, "learning_rate": 2.6064143681847343e-06, "loss": 0.8229, "step": 41420 }, { "epoch": 0.5048566170645802, "grad_norm": 2.3499958515167236, "learning_rate": 2.6060936497754973e-06, "loss": 0.8373, "step": 41425 }, { "epoch": 0.504917553288728, "grad_norm": 2.008321523666382, "learning_rate": 2.6057729313662607e-06, "loss": 0.8503, "step": 41430 }, { "epoch": 0.5049784895128758, "grad_norm": 2.673313617706299, "learning_rate": 2.605452212957024e-06, "loss": 0.8311, "step": 41435 }, { "epoch": 0.5050394257370237, "grad_norm": 1.9463518857955933, "learning_rate": 2.605131494547787e-06, "loss": 0.9413, "step": 41440 }, { "epoch": 0.5051003619611715, "grad_norm": 1.9862103462219238, "learning_rate": 2.6048107761385506e-06, "loss": 0.7809, "step": 41445 }, { "epoch": 0.5051612981853193, "grad_norm": 2.3994908332824707, "learning_rate": 2.6044900577293136e-06, "loss": 0.8342, "step": 41450 }, { "epoch": 0.505222234409467, "grad_norm": 3.2145400047302246, "learning_rate": 2.6041693393200775e-06, "loss": 0.8882, "step": 41455 }, { "epoch": 0.5052831706336148, "grad_norm": 2.0416316986083984, "learning_rate": 2.6038486209108405e-06, "loss": 0.8681, "step": 41460 }, { "epoch": 0.5053441068577627, "grad_norm": 1.9509198665618896, "learning_rate": 2.6035279025016035e-06, "loss": 0.7982, "step": 41465 }, { "epoch": 0.5054050430819105, "grad_norm": 2.3485050201416016, "learning_rate": 2.6032071840923674e-06, "loss": 0.8047, "step": 41470 }, { "epoch": 0.5054659793060583, "grad_norm": 1.7994627952575684, "learning_rate": 2.6028864656831304e-06, "loss": 0.8451, "step": 41475 }, { "epoch": 0.5055269155302061, "grad_norm": 2.0232186317443848, "learning_rate": 2.6025657472738934e-06, "loss": 0.8338, "step": 41480 }, { "epoch": 0.505587851754354, "grad_norm": 1.9303946495056152, "learning_rate": 2.6022450288646573e-06, "loss": 0.7805, "step": 41485 }, { "epoch": 0.5056487879785017, "grad_norm": 2.063887596130371, "learning_rate": 2.6019243104554203e-06, "loss": 0.8813, "step": 41490 }, { "epoch": 0.5057097242026495, "grad_norm": 1.882162094116211, "learning_rate": 2.6016035920461833e-06, "loss": 0.7944, "step": 41495 }, { "epoch": 0.5057706604267973, "grad_norm": 2.043243646621704, "learning_rate": 2.601282873636947e-06, "loss": 0.8121, "step": 41500 }, { "epoch": 0.5058315966509451, "grad_norm": 1.910940170288086, "learning_rate": 2.60096215522771e-06, "loss": 0.8536, "step": 41505 }, { "epoch": 0.505892532875093, "grad_norm": 1.6479053497314453, "learning_rate": 2.6006414368184736e-06, "loss": 0.8233, "step": 41510 }, { "epoch": 0.5059534690992408, "grad_norm": 1.9815672636032104, "learning_rate": 2.600320718409237e-06, "loss": 0.8058, "step": 41515 }, { "epoch": 0.5060144053233886, "grad_norm": 1.7211647033691406, "learning_rate": 2.6e-06, "loss": 0.8614, "step": 41520 }, { "epoch": 0.5060753415475363, "grad_norm": 2.021134376525879, "learning_rate": 2.5996792815907635e-06, "loss": 0.8741, "step": 41525 }, { "epoch": 0.5061362777716841, "grad_norm": 1.8759914636611938, "learning_rate": 2.5993585631815266e-06, "loss": 0.8784, "step": 41530 }, { "epoch": 0.506197213995832, "grad_norm": 3.3975799083709717, "learning_rate": 2.5990378447722904e-06, "loss": 0.8219, "step": 41535 }, { "epoch": 0.5062581502199798, "grad_norm": 2.003004550933838, "learning_rate": 2.5987171263630534e-06, "loss": 0.8471, "step": 41540 }, { "epoch": 0.5063190864441276, "grad_norm": 1.8532109260559082, "learning_rate": 2.5983964079538164e-06, "loss": 0.8276, "step": 41545 }, { "epoch": 0.5063800226682754, "grad_norm": 1.8018501996994019, "learning_rate": 2.5980756895445803e-06, "loss": 0.8013, "step": 41550 }, { "epoch": 0.5064409588924232, "grad_norm": 1.835215449333191, "learning_rate": 2.5977549711353433e-06, "loss": 0.8293, "step": 41555 }, { "epoch": 0.506501895116571, "grad_norm": 1.8942793607711792, "learning_rate": 2.5974342527261063e-06, "loss": 0.9126, "step": 41560 }, { "epoch": 0.5065628313407188, "grad_norm": 1.7297502756118774, "learning_rate": 2.59711353431687e-06, "loss": 0.7752, "step": 41565 }, { "epoch": 0.5066237675648666, "grad_norm": 2.1212189197540283, "learning_rate": 2.5967928159076332e-06, "loss": 0.8502, "step": 41570 }, { "epoch": 0.5066847037890144, "grad_norm": 1.811221718788147, "learning_rate": 2.5964720974983967e-06, "loss": 0.8701, "step": 41575 }, { "epoch": 0.5067456400131622, "grad_norm": 2.2274599075317383, "learning_rate": 2.59615137908916e-06, "loss": 0.8659, "step": 41580 }, { "epoch": 0.5068065762373101, "grad_norm": 1.9109835624694824, "learning_rate": 2.595830660679923e-06, "loss": 0.8981, "step": 41585 }, { "epoch": 0.5068675124614579, "grad_norm": 1.820500373840332, "learning_rate": 2.5955099422706866e-06, "loss": 0.8687, "step": 41590 }, { "epoch": 0.5069284486856056, "grad_norm": 1.7757813930511475, "learning_rate": 2.59518922386145e-06, "loss": 0.8218, "step": 41595 }, { "epoch": 0.5069893849097534, "grad_norm": 1.7619826793670654, "learning_rate": 2.5948685054522134e-06, "loss": 0.848, "step": 41600 }, { "epoch": 0.5070503211339012, "grad_norm": 2.0185043811798096, "learning_rate": 2.5945477870429765e-06, "loss": 0.8488, "step": 41605 }, { "epoch": 0.5071112573580491, "grad_norm": 2.152137279510498, "learning_rate": 2.5942270686337395e-06, "loss": 0.8393, "step": 41610 }, { "epoch": 0.5071721935821969, "grad_norm": 1.9575480222702026, "learning_rate": 2.5939063502245033e-06, "loss": 0.8965, "step": 41615 }, { "epoch": 0.5072331298063447, "grad_norm": 2.1384353637695312, "learning_rate": 2.5935856318152664e-06, "loss": 0.8498, "step": 41620 }, { "epoch": 0.5072940660304925, "grad_norm": 1.7222747802734375, "learning_rate": 2.5932649134060294e-06, "loss": 0.8105, "step": 41625 }, { "epoch": 0.5073550022546403, "grad_norm": 1.7124730348587036, "learning_rate": 2.5929441949967932e-06, "loss": 0.7045, "step": 41630 }, { "epoch": 0.5074159384787881, "grad_norm": 1.9940776824951172, "learning_rate": 2.5926234765875563e-06, "loss": 0.8626, "step": 41635 }, { "epoch": 0.5074768747029359, "grad_norm": 1.8017711639404297, "learning_rate": 2.5923027581783193e-06, "loss": 0.8213, "step": 41640 }, { "epoch": 0.5075378109270837, "grad_norm": 1.9557387828826904, "learning_rate": 2.591982039769083e-06, "loss": 0.8506, "step": 41645 }, { "epoch": 0.5075987471512315, "grad_norm": 1.9097102880477905, "learning_rate": 2.591661321359846e-06, "loss": 0.8456, "step": 41650 }, { "epoch": 0.5076596833753794, "grad_norm": 1.7820703983306885, "learning_rate": 2.5913406029506096e-06, "loss": 0.9155, "step": 41655 }, { "epoch": 0.5077206195995272, "grad_norm": 1.770146369934082, "learning_rate": 2.591019884541373e-06, "loss": 0.9152, "step": 41660 }, { "epoch": 0.5077815558236749, "grad_norm": 2.0361878871917725, "learning_rate": 2.590699166132136e-06, "loss": 0.8314, "step": 41665 }, { "epoch": 0.5078424920478227, "grad_norm": 2.4400603771209717, "learning_rate": 2.5903784477228995e-06, "loss": 0.9151, "step": 41670 }, { "epoch": 0.5079034282719705, "grad_norm": 2.1396963596343994, "learning_rate": 2.590057729313663e-06, "loss": 0.7978, "step": 41675 }, { "epoch": 0.5079643644961184, "grad_norm": 2.088207483291626, "learning_rate": 2.5897370109044264e-06, "loss": 0.8398, "step": 41680 }, { "epoch": 0.5080253007202662, "grad_norm": 2.3097381591796875, "learning_rate": 2.5894162924951894e-06, "loss": 0.8765, "step": 41685 }, { "epoch": 0.508086236944414, "grad_norm": 1.8149235248565674, "learning_rate": 2.589095574085953e-06, "loss": 0.8506, "step": 41690 }, { "epoch": 0.5081471731685618, "grad_norm": 1.9451309442520142, "learning_rate": 2.5887748556767163e-06, "loss": 0.8724, "step": 41695 }, { "epoch": 0.5082081093927096, "grad_norm": 1.9716131687164307, "learning_rate": 2.5884541372674793e-06, "loss": 0.7482, "step": 41700 }, { "epoch": 0.5082690456168574, "grad_norm": 2.031883955001831, "learning_rate": 2.5881334188582423e-06, "loss": 0.8543, "step": 41705 }, { "epoch": 0.5083299818410052, "grad_norm": 2.258160352706909, "learning_rate": 2.587812700449006e-06, "loss": 0.8762, "step": 41710 }, { "epoch": 0.508390918065153, "grad_norm": 2.1344377994537354, "learning_rate": 2.587491982039769e-06, "loss": 0.7927, "step": 41715 }, { "epoch": 0.5084518542893008, "grad_norm": 1.8750478029251099, "learning_rate": 2.587171263630532e-06, "loss": 0.9103, "step": 41720 }, { "epoch": 0.5085127905134487, "grad_norm": 1.9303947687149048, "learning_rate": 2.586850545221296e-06, "loss": 0.8391, "step": 41725 }, { "epoch": 0.5085737267375964, "grad_norm": 1.9958438873291016, "learning_rate": 2.586529826812059e-06, "loss": 0.812, "step": 41730 }, { "epoch": 0.5086346629617442, "grad_norm": 1.8795831203460693, "learning_rate": 2.5862091084028225e-06, "loss": 0.8817, "step": 41735 }, { "epoch": 0.508695599185892, "grad_norm": 1.8158533573150635, "learning_rate": 2.585888389993586e-06, "loss": 0.8277, "step": 41740 }, { "epoch": 0.5087565354100398, "grad_norm": 2.019040822982788, "learning_rate": 2.585567671584349e-06, "loss": 0.8489, "step": 41745 }, { "epoch": 0.5088174716341877, "grad_norm": 2.040178060531616, "learning_rate": 2.5852469531751124e-06, "loss": 0.8132, "step": 41750 }, { "epoch": 0.5088784078583355, "grad_norm": 1.6946672201156616, "learning_rate": 2.584926234765876e-06, "loss": 0.868, "step": 41755 }, { "epoch": 0.5089393440824833, "grad_norm": 2.2085015773773193, "learning_rate": 2.5846055163566393e-06, "loss": 0.8271, "step": 41760 }, { "epoch": 0.509000280306631, "grad_norm": 1.7358970642089844, "learning_rate": 2.5842847979474023e-06, "loss": 0.8731, "step": 41765 }, { "epoch": 0.5090612165307788, "grad_norm": 1.8397853374481201, "learning_rate": 2.583964079538166e-06, "loss": 0.806, "step": 41770 }, { "epoch": 0.5091221527549267, "grad_norm": 1.8690921068191528, "learning_rate": 2.583643361128929e-06, "loss": 0.855, "step": 41775 }, { "epoch": 0.5091830889790745, "grad_norm": 2.1034390926361084, "learning_rate": 2.5833226427196922e-06, "loss": 0.8684, "step": 41780 }, { "epoch": 0.5092440252032223, "grad_norm": 1.7022008895874023, "learning_rate": 2.5830019243104552e-06, "loss": 0.9624, "step": 41785 }, { "epoch": 0.5093049614273701, "grad_norm": 2.16414475440979, "learning_rate": 2.582681205901219e-06, "loss": 0.8634, "step": 41790 }, { "epoch": 0.509365897651518, "grad_norm": 1.898748517036438, "learning_rate": 2.582360487491982e-06, "loss": 0.7768, "step": 41795 }, { "epoch": 0.5094268338756657, "grad_norm": 1.772152304649353, "learning_rate": 2.5820397690827456e-06, "loss": 0.8441, "step": 41800 }, { "epoch": 0.5094877700998135, "grad_norm": 2.0450644493103027, "learning_rate": 2.581719050673509e-06, "loss": 0.8351, "step": 41805 }, { "epoch": 0.5095487063239613, "grad_norm": 1.7081787586212158, "learning_rate": 2.581398332264272e-06, "loss": 0.8836, "step": 41810 }, { "epoch": 0.5096096425481091, "grad_norm": 1.9089841842651367, "learning_rate": 2.5810776138550354e-06, "loss": 0.8379, "step": 41815 }, { "epoch": 0.509670578772257, "grad_norm": 2.019090175628662, "learning_rate": 2.580756895445799e-06, "loss": 0.8499, "step": 41820 }, { "epoch": 0.5097315149964048, "grad_norm": 1.6486179828643799, "learning_rate": 2.5804361770365623e-06, "loss": 0.8708, "step": 41825 }, { "epoch": 0.5097924512205526, "grad_norm": 1.9929242134094238, "learning_rate": 2.5801154586273253e-06, "loss": 0.9111, "step": 41830 }, { "epoch": 0.5098533874447003, "grad_norm": 2.006852626800537, "learning_rate": 2.5797947402180888e-06, "loss": 0.8547, "step": 41835 }, { "epoch": 0.5099143236688481, "grad_norm": 2.1413705348968506, "learning_rate": 2.5794740218088522e-06, "loss": 0.8188, "step": 41840 }, { "epoch": 0.509975259892996, "grad_norm": 1.8673982620239258, "learning_rate": 2.5791533033996152e-06, "loss": 0.793, "step": 41845 }, { "epoch": 0.5100361961171438, "grad_norm": 2.2497241497039795, "learning_rate": 2.578832584990379e-06, "loss": 0.7721, "step": 41850 }, { "epoch": 0.5100971323412916, "grad_norm": 1.7972931861877441, "learning_rate": 2.578511866581142e-06, "loss": 0.8452, "step": 41855 }, { "epoch": 0.5101580685654394, "grad_norm": 2.1226508617401123, "learning_rate": 2.578191148171905e-06, "loss": 0.7982, "step": 41860 }, { "epoch": 0.5102190047895873, "grad_norm": 1.9383463859558105, "learning_rate": 2.577870429762668e-06, "loss": 0.8252, "step": 41865 }, { "epoch": 0.510279941013735, "grad_norm": 2.087244987487793, "learning_rate": 2.577549711353432e-06, "loss": 0.7929, "step": 41870 }, { "epoch": 0.5103408772378828, "grad_norm": 1.8990591764450073, "learning_rate": 2.577228992944195e-06, "loss": 0.8165, "step": 41875 }, { "epoch": 0.5104018134620306, "grad_norm": 2.1205079555511475, "learning_rate": 2.5769082745349585e-06, "loss": 0.9117, "step": 41880 }, { "epoch": 0.5104627496861784, "grad_norm": 2.1487627029418945, "learning_rate": 2.576587556125722e-06, "loss": 0.8282, "step": 41885 }, { "epoch": 0.5105236859103263, "grad_norm": 2.055762767791748, "learning_rate": 2.576266837716485e-06, "loss": 0.9123, "step": 41890 }, { "epoch": 0.5105846221344741, "grad_norm": 2.070676565170288, "learning_rate": 2.5759461193072484e-06, "loss": 0.8281, "step": 41895 }, { "epoch": 0.5106455583586219, "grad_norm": 2.0861504077911377, "learning_rate": 2.575625400898012e-06, "loss": 0.8178, "step": 41900 }, { "epoch": 0.5107064945827696, "grad_norm": 1.7851430177688599, "learning_rate": 2.5753046824887753e-06, "loss": 0.8656, "step": 41905 }, { "epoch": 0.5107674308069174, "grad_norm": 2.953774929046631, "learning_rate": 2.5749839640795383e-06, "loss": 0.8908, "step": 41910 }, { "epoch": 0.5108283670310653, "grad_norm": 1.887223720550537, "learning_rate": 2.5746632456703017e-06, "loss": 0.8814, "step": 41915 }, { "epoch": 0.5108893032552131, "grad_norm": 2.0187437534332275, "learning_rate": 2.574342527261065e-06, "loss": 0.8454, "step": 41920 }, { "epoch": 0.5109502394793609, "grad_norm": 1.857317328453064, "learning_rate": 2.574021808851828e-06, "loss": 0.8538, "step": 41925 }, { "epoch": 0.5110111757035087, "grad_norm": 2.6136741638183594, "learning_rate": 2.573701090442592e-06, "loss": 0.8322, "step": 41930 }, { "epoch": 0.5110721119276566, "grad_norm": 1.691391110420227, "learning_rate": 2.573380372033355e-06, "loss": 0.8499, "step": 41935 }, { "epoch": 0.5111330481518043, "grad_norm": 2.438791036605835, "learning_rate": 2.573059653624118e-06, "loss": 0.7907, "step": 41940 }, { "epoch": 0.5111939843759521, "grad_norm": 1.8109254837036133, "learning_rate": 2.572738935214881e-06, "loss": 0.8229, "step": 41945 }, { "epoch": 0.5112549206000999, "grad_norm": 2.3119678497314453, "learning_rate": 2.572418216805645e-06, "loss": 0.8659, "step": 41950 }, { "epoch": 0.5113158568242477, "grad_norm": 2.096632719039917, "learning_rate": 2.572097498396408e-06, "loss": 0.8585, "step": 41955 }, { "epoch": 0.5113767930483956, "grad_norm": 2.2736473083496094, "learning_rate": 2.5717767799871714e-06, "loss": 0.9003, "step": 41960 }, { "epoch": 0.5114377292725434, "grad_norm": 1.6857727766036987, "learning_rate": 2.571456061577935e-06, "loss": 0.8138, "step": 41965 }, { "epoch": 0.5114986654966912, "grad_norm": 1.9145702123641968, "learning_rate": 2.571135343168698e-06, "loss": 0.7748, "step": 41970 }, { "epoch": 0.5115596017208389, "grad_norm": 2.4351720809936523, "learning_rate": 2.5708146247594613e-06, "loss": 0.83, "step": 41975 }, { "epoch": 0.5116205379449867, "grad_norm": 1.8654717206954956, "learning_rate": 2.5704939063502247e-06, "loss": 0.8186, "step": 41980 }, { "epoch": 0.5116814741691346, "grad_norm": 1.7631587982177734, "learning_rate": 2.570173187940988e-06, "loss": 0.9118, "step": 41985 }, { "epoch": 0.5117424103932824, "grad_norm": 2.1299688816070557, "learning_rate": 2.569852469531751e-06, "loss": 0.8097, "step": 41990 }, { "epoch": 0.5118033466174302, "grad_norm": 2.041658401489258, "learning_rate": 2.5695317511225146e-06, "loss": 0.873, "step": 41995 }, { "epoch": 0.511864282841578, "grad_norm": 1.9342899322509766, "learning_rate": 2.569211032713278e-06, "loss": 0.8294, "step": 42000 }, { "epoch": 0.5119252190657259, "grad_norm": 1.9062665700912476, "learning_rate": 2.568890314304041e-06, "loss": 0.9011, "step": 42005 }, { "epoch": 0.5119861552898736, "grad_norm": 1.8911727666854858, "learning_rate": 2.568569595894805e-06, "loss": 0.7997, "step": 42010 }, { "epoch": 0.5120470915140214, "grad_norm": 1.9264898300170898, "learning_rate": 2.568248877485568e-06, "loss": 0.8187, "step": 42015 }, { "epoch": 0.5121080277381692, "grad_norm": 1.7434141635894775, "learning_rate": 2.567928159076331e-06, "loss": 0.8406, "step": 42020 }, { "epoch": 0.512168963962317, "grad_norm": 1.852852702140808, "learning_rate": 2.567607440667095e-06, "loss": 0.8573, "step": 42025 }, { "epoch": 0.5122299001864649, "grad_norm": 1.9327133893966675, "learning_rate": 2.567286722257858e-06, "loss": 0.7925, "step": 42030 }, { "epoch": 0.5122908364106127, "grad_norm": 2.4244656562805176, "learning_rate": 2.566966003848621e-06, "loss": 0.8417, "step": 42035 }, { "epoch": 0.5123517726347605, "grad_norm": 1.821733832359314, "learning_rate": 2.5666452854393843e-06, "loss": 0.8558, "step": 42040 }, { "epoch": 0.5124127088589082, "grad_norm": 2.014012098312378, "learning_rate": 2.5663245670301478e-06, "loss": 0.9143, "step": 42045 }, { "epoch": 0.512473645083056, "grad_norm": 2.291144609451294, "learning_rate": 2.5660038486209112e-06, "loss": 0.8436, "step": 42050 }, { "epoch": 0.5125345813072039, "grad_norm": 1.8601598739624023, "learning_rate": 2.5656831302116742e-06, "loss": 0.8925, "step": 42055 }, { "epoch": 0.5125955175313517, "grad_norm": 1.8570668697357178, "learning_rate": 2.5653624118024377e-06, "loss": 0.8591, "step": 42060 }, { "epoch": 0.5126564537554995, "grad_norm": 1.9094152450561523, "learning_rate": 2.565041693393201e-06, "loss": 0.8425, "step": 42065 }, { "epoch": 0.5127173899796473, "grad_norm": 1.9679419994354248, "learning_rate": 2.564720974983964e-06, "loss": 0.8228, "step": 42070 }, { "epoch": 0.5127783262037952, "grad_norm": 2.0275981426239014, "learning_rate": 2.564400256574728e-06, "loss": 0.8812, "step": 42075 }, { "epoch": 0.5128392624279429, "grad_norm": 2.0059750080108643, "learning_rate": 2.564079538165491e-06, "loss": 0.875, "step": 42080 }, { "epoch": 0.5129001986520907, "grad_norm": 2.057138442993164, "learning_rate": 2.563758819756254e-06, "loss": 0.8725, "step": 42085 }, { "epoch": 0.5129611348762385, "grad_norm": 2.1658613681793213, "learning_rate": 2.563438101347018e-06, "loss": 0.8633, "step": 42090 }, { "epoch": 0.5130220711003863, "grad_norm": 1.5765900611877441, "learning_rate": 2.563117382937781e-06, "loss": 0.8126, "step": 42095 }, { "epoch": 0.5130830073245342, "grad_norm": 1.8462512493133545, "learning_rate": 2.562796664528544e-06, "loss": 0.8789, "step": 42100 }, { "epoch": 0.513143943548682, "grad_norm": 1.9782603979110718, "learning_rate": 2.5624759461193078e-06, "loss": 0.8712, "step": 42105 }, { "epoch": 0.5132048797728298, "grad_norm": 1.8295141458511353, "learning_rate": 2.562155227710071e-06, "loss": 0.805, "step": 42110 }, { "epoch": 0.5132658159969775, "grad_norm": 1.7483264207839966, "learning_rate": 2.561834509300834e-06, "loss": 0.9061, "step": 42115 }, { "epoch": 0.5133267522211253, "grad_norm": 1.9339258670806885, "learning_rate": 2.5615137908915973e-06, "loss": 0.796, "step": 42120 }, { "epoch": 0.5133876884452732, "grad_norm": 2.005502223968506, "learning_rate": 2.5611930724823607e-06, "loss": 0.795, "step": 42125 }, { "epoch": 0.513448624669421, "grad_norm": 1.9328442811965942, "learning_rate": 2.560872354073124e-06, "loss": 0.8671, "step": 42130 }, { "epoch": 0.5135095608935688, "grad_norm": 1.7805813550949097, "learning_rate": 2.560551635663887e-06, "loss": 0.8453, "step": 42135 }, { "epoch": 0.5135704971177166, "grad_norm": 1.9902530908584595, "learning_rate": 2.5602309172546506e-06, "loss": 0.7731, "step": 42140 }, { "epoch": 0.5136314333418645, "grad_norm": 2.016472816467285, "learning_rate": 2.559910198845414e-06, "loss": 0.8396, "step": 42145 }, { "epoch": 0.5136923695660122, "grad_norm": 1.9775022268295288, "learning_rate": 2.559589480436177e-06, "loss": 0.8834, "step": 42150 }, { "epoch": 0.51375330579016, "grad_norm": 1.751802682876587, "learning_rate": 2.559268762026941e-06, "loss": 0.7905, "step": 42155 }, { "epoch": 0.5138142420143078, "grad_norm": 2.3689584732055664, "learning_rate": 2.558948043617704e-06, "loss": 0.8855, "step": 42160 }, { "epoch": 0.5138751782384556, "grad_norm": 1.9248552322387695, "learning_rate": 2.558627325208467e-06, "loss": 0.8074, "step": 42165 }, { "epoch": 0.5139361144626035, "grad_norm": 1.9302338361740112, "learning_rate": 2.558306606799231e-06, "loss": 0.8774, "step": 42170 }, { "epoch": 0.5139970506867513, "grad_norm": 2.0015909671783447, "learning_rate": 2.557985888389994e-06, "loss": 0.8882, "step": 42175 }, { "epoch": 0.5140579869108991, "grad_norm": 2.128654956817627, "learning_rate": 2.557665169980757e-06, "loss": 0.8187, "step": 42180 }, { "epoch": 0.5141189231350468, "grad_norm": 2.007481336593628, "learning_rate": 2.5573444515715207e-06, "loss": 0.8255, "step": 42185 }, { "epoch": 0.5141798593591946, "grad_norm": 1.8761615753173828, "learning_rate": 2.5570237331622837e-06, "loss": 0.8245, "step": 42190 }, { "epoch": 0.5142407955833425, "grad_norm": 2.0606155395507812, "learning_rate": 2.5567030147530467e-06, "loss": 0.7183, "step": 42195 }, { "epoch": 0.5143017318074903, "grad_norm": 2.733534336090088, "learning_rate": 2.55638229634381e-06, "loss": 0.8573, "step": 42200 }, { "epoch": 0.5143626680316381, "grad_norm": 2.267932176589966, "learning_rate": 2.5560615779345736e-06, "loss": 0.8761, "step": 42205 }, { "epoch": 0.5144236042557859, "grad_norm": 1.8677489757537842, "learning_rate": 2.555740859525337e-06, "loss": 0.8277, "step": 42210 }, { "epoch": 0.5144845404799337, "grad_norm": 2.0455594062805176, "learning_rate": 2.5554201411161e-06, "loss": 0.8769, "step": 42215 }, { "epoch": 0.5145454767040815, "grad_norm": 2.173762321472168, "learning_rate": 2.5550994227068635e-06, "loss": 0.8634, "step": 42220 }, { "epoch": 0.5146064129282293, "grad_norm": 1.6957159042358398, "learning_rate": 2.554778704297627e-06, "loss": 0.8092, "step": 42225 }, { "epoch": 0.5146673491523771, "grad_norm": 1.6494991779327393, "learning_rate": 2.55445798588839e-06, "loss": 0.7672, "step": 42230 }, { "epoch": 0.5147282853765249, "grad_norm": 2.082780122756958, "learning_rate": 2.554137267479154e-06, "loss": 0.8327, "step": 42235 }, { "epoch": 0.5147892216006728, "grad_norm": 1.6499240398406982, "learning_rate": 2.553816549069917e-06, "loss": 0.8409, "step": 42240 }, { "epoch": 0.5148501578248206, "grad_norm": 1.9509150981903076, "learning_rate": 2.55349583066068e-06, "loss": 0.8353, "step": 42245 }, { "epoch": 0.5149110940489684, "grad_norm": 1.7676409482955933, "learning_rate": 2.5531751122514437e-06, "loss": 0.7823, "step": 42250 }, { "epoch": 0.5149720302731161, "grad_norm": 1.978833794593811, "learning_rate": 2.5528543938422068e-06, "loss": 0.8505, "step": 42255 }, { "epoch": 0.5150329664972639, "grad_norm": 2.416996955871582, "learning_rate": 2.5525336754329698e-06, "loss": 0.8659, "step": 42260 }, { "epoch": 0.5150939027214118, "grad_norm": 1.880286693572998, "learning_rate": 2.5522129570237336e-06, "loss": 0.7419, "step": 42265 }, { "epoch": 0.5151548389455596, "grad_norm": 2.0324127674102783, "learning_rate": 2.5518922386144967e-06, "loss": 0.8838, "step": 42270 }, { "epoch": 0.5152157751697074, "grad_norm": 1.858963966369629, "learning_rate": 2.55157152020526e-06, "loss": 0.8251, "step": 42275 }, { "epoch": 0.5152767113938552, "grad_norm": 2.202352285385132, "learning_rate": 2.551250801796023e-06, "loss": 0.8157, "step": 42280 }, { "epoch": 0.515337647618003, "grad_norm": 2.140632390975952, "learning_rate": 2.5509300833867866e-06, "loss": 0.8, "step": 42285 }, { "epoch": 0.5153985838421508, "grad_norm": 2.29231595993042, "learning_rate": 2.55060936497755e-06, "loss": 0.8606, "step": 42290 }, { "epoch": 0.5154595200662986, "grad_norm": 1.9856680631637573, "learning_rate": 2.550288646568313e-06, "loss": 0.7554, "step": 42295 }, { "epoch": 0.5155204562904464, "grad_norm": 2.140911340713501, "learning_rate": 2.549967928159077e-06, "loss": 0.8432, "step": 42300 }, { "epoch": 0.5155813925145942, "grad_norm": 2.1491358280181885, "learning_rate": 2.54964720974984e-06, "loss": 0.8788, "step": 42305 }, { "epoch": 0.515642328738742, "grad_norm": 1.914910912513733, "learning_rate": 2.549326491340603e-06, "loss": 0.8178, "step": 42310 }, { "epoch": 0.5157032649628899, "grad_norm": 1.8277437686920166, "learning_rate": 2.5490057729313668e-06, "loss": 0.8399, "step": 42315 }, { "epoch": 0.5157642011870377, "grad_norm": 2.109267234802246, "learning_rate": 2.54868505452213e-06, "loss": 0.8664, "step": 42320 }, { "epoch": 0.5158251374111854, "grad_norm": 1.8551846742630005, "learning_rate": 2.548364336112893e-06, "loss": 0.7477, "step": 42325 }, { "epoch": 0.5158860736353332, "grad_norm": 2.4893245697021484, "learning_rate": 2.5480436177036567e-06, "loss": 0.8845, "step": 42330 }, { "epoch": 0.515947009859481, "grad_norm": 1.7800123691558838, "learning_rate": 2.5477228992944197e-06, "loss": 0.7978, "step": 42335 }, { "epoch": 0.5160079460836289, "grad_norm": 2.0541791915893555, "learning_rate": 2.5474021808851827e-06, "loss": 0.8324, "step": 42340 }, { "epoch": 0.5160688823077767, "grad_norm": 2.139235496520996, "learning_rate": 2.5470814624759466e-06, "loss": 0.8968, "step": 42345 }, { "epoch": 0.5161298185319245, "grad_norm": 2.180016040802002, "learning_rate": 2.5467607440667096e-06, "loss": 0.8101, "step": 42350 }, { "epoch": 0.5161907547560723, "grad_norm": 1.819854974746704, "learning_rate": 2.546440025657473e-06, "loss": 0.8499, "step": 42355 }, { "epoch": 0.5162516909802201, "grad_norm": 2.0619184970855713, "learning_rate": 2.5461193072482365e-06, "loss": 0.7968, "step": 42360 }, { "epoch": 0.5163126272043679, "grad_norm": 2.155437707901001, "learning_rate": 2.5457985888389995e-06, "loss": 0.8272, "step": 42365 }, { "epoch": 0.5163735634285157, "grad_norm": 2.061170816421509, "learning_rate": 2.545477870429763e-06, "loss": 0.8825, "step": 42370 }, { "epoch": 0.5164344996526635, "grad_norm": 1.8179373741149902, "learning_rate": 2.545157152020526e-06, "loss": 0.8098, "step": 42375 }, { "epoch": 0.5164954358768113, "grad_norm": 1.708769679069519, "learning_rate": 2.54483643361129e-06, "loss": 0.7973, "step": 42380 }, { "epoch": 0.5165563721009592, "grad_norm": 2.5922956466674805, "learning_rate": 2.544515715202053e-06, "loss": 0.9254, "step": 42385 }, { "epoch": 0.516617308325107, "grad_norm": 1.814318060874939, "learning_rate": 2.544194996792816e-06, "loss": 0.8511, "step": 42390 }, { "epoch": 0.5166782445492547, "grad_norm": 2.006324052810669, "learning_rate": 2.5438742783835797e-06, "loss": 0.8843, "step": 42395 }, { "epoch": 0.5167391807734025, "grad_norm": 1.949608564376831, "learning_rate": 2.5435535599743427e-06, "loss": 0.8057, "step": 42400 }, { "epoch": 0.5168001169975504, "grad_norm": 1.8716322183609009, "learning_rate": 2.5432328415651057e-06, "loss": 0.8807, "step": 42405 }, { "epoch": 0.5168610532216982, "grad_norm": 1.7710429430007935, "learning_rate": 2.5429121231558696e-06, "loss": 0.8392, "step": 42410 }, { "epoch": 0.516921989445846, "grad_norm": 1.9240787029266357, "learning_rate": 2.5425914047466326e-06, "loss": 0.8352, "step": 42415 }, { "epoch": 0.5169829256699938, "grad_norm": 2.0727784633636475, "learning_rate": 2.5422706863373956e-06, "loss": 0.8448, "step": 42420 }, { "epoch": 0.5170438618941416, "grad_norm": 2.0299365520477295, "learning_rate": 2.5419499679281595e-06, "loss": 0.7885, "step": 42425 }, { "epoch": 0.5171047981182894, "grad_norm": 1.8284413814544678, "learning_rate": 2.5416292495189225e-06, "loss": 0.847, "step": 42430 }, { "epoch": 0.5171657343424372, "grad_norm": 2.2908506393432617, "learning_rate": 2.541308531109686e-06, "loss": 0.8597, "step": 42435 }, { "epoch": 0.517226670566585, "grad_norm": 2.1386423110961914, "learning_rate": 2.5409878127004494e-06, "loss": 0.8864, "step": 42440 }, { "epoch": 0.5172876067907328, "grad_norm": 3.342971086502075, "learning_rate": 2.5406670942912124e-06, "loss": 0.8485, "step": 42445 }, { "epoch": 0.5173485430148806, "grad_norm": 2.2583186626434326, "learning_rate": 2.540346375881976e-06, "loss": 0.9301, "step": 42450 }, { "epoch": 0.5174094792390285, "grad_norm": 1.7803186178207397, "learning_rate": 2.540025657472739e-06, "loss": 0.8477, "step": 42455 }, { "epoch": 0.5174704154631763, "grad_norm": 2.596011161804199, "learning_rate": 2.5397049390635027e-06, "loss": 0.862, "step": 42460 }, { "epoch": 0.517531351687324, "grad_norm": 1.8884891271591187, "learning_rate": 2.5393842206542657e-06, "loss": 0.8383, "step": 42465 }, { "epoch": 0.5175922879114718, "grad_norm": 1.6750065088272095, "learning_rate": 2.5390635022450288e-06, "loss": 0.8082, "step": 42470 }, { "epoch": 0.5176532241356196, "grad_norm": 1.9824599027633667, "learning_rate": 2.5387427838357926e-06, "loss": 0.8504, "step": 42475 }, { "epoch": 0.5177141603597675, "grad_norm": 2.1239237785339355, "learning_rate": 2.5384220654265556e-06, "loss": 0.8775, "step": 42480 }, { "epoch": 0.5177750965839153, "grad_norm": 2.3252339363098145, "learning_rate": 2.5381013470173187e-06, "loss": 0.8444, "step": 42485 }, { "epoch": 0.5178360328080631, "grad_norm": 2.1902241706848145, "learning_rate": 2.5377806286080825e-06, "loss": 0.883, "step": 42490 }, { "epoch": 0.5178969690322109, "grad_norm": 2.022311210632324, "learning_rate": 2.5374599101988455e-06, "loss": 0.811, "step": 42495 }, { "epoch": 0.5179579052563587, "grad_norm": 1.8749080896377563, "learning_rate": 2.5371391917896086e-06, "loss": 0.8164, "step": 42500 }, { "epoch": 0.5180188414805065, "grad_norm": 1.906049370765686, "learning_rate": 2.5368184733803724e-06, "loss": 0.8135, "step": 42505 }, { "epoch": 0.5180797777046543, "grad_norm": 1.7853729724884033, "learning_rate": 2.5364977549711354e-06, "loss": 0.7639, "step": 42510 }, { "epoch": 0.5181407139288021, "grad_norm": 2.365442991256714, "learning_rate": 2.536177036561899e-06, "loss": 0.7786, "step": 42515 }, { "epoch": 0.5182016501529499, "grad_norm": 2.018589496612549, "learning_rate": 2.5358563181526623e-06, "loss": 0.7764, "step": 42520 }, { "epoch": 0.5182625863770978, "grad_norm": 1.7924764156341553, "learning_rate": 2.5355355997434258e-06, "loss": 0.8597, "step": 42525 }, { "epoch": 0.5183235226012456, "grad_norm": 1.8049248456954956, "learning_rate": 2.5352148813341888e-06, "loss": 0.7934, "step": 42530 }, { "epoch": 0.5183844588253933, "grad_norm": 2.0190556049346924, "learning_rate": 2.534894162924952e-06, "loss": 0.8148, "step": 42535 }, { "epoch": 0.5184453950495411, "grad_norm": 2.108053684234619, "learning_rate": 2.5345734445157157e-06, "loss": 0.891, "step": 42540 }, { "epoch": 0.518506331273689, "grad_norm": 2.0688703060150146, "learning_rate": 2.5342527261064787e-06, "loss": 0.8391, "step": 42545 }, { "epoch": 0.5185672674978368, "grad_norm": 1.893943190574646, "learning_rate": 2.5339320076972417e-06, "loss": 0.7475, "step": 42550 }, { "epoch": 0.5186282037219846, "grad_norm": 2.2409353256225586, "learning_rate": 2.5336112892880056e-06, "loss": 0.8343, "step": 42555 }, { "epoch": 0.5186891399461324, "grad_norm": 1.759711742401123, "learning_rate": 2.5332905708787686e-06, "loss": 0.8378, "step": 42560 }, { "epoch": 0.5187500761702802, "grad_norm": 1.8379874229431152, "learning_rate": 2.5329698524695316e-06, "loss": 0.846, "step": 42565 }, { "epoch": 0.518811012394428, "grad_norm": 1.8262358903884888, "learning_rate": 2.5326491340602955e-06, "loss": 0.7711, "step": 42570 }, { "epoch": 0.5188719486185758, "grad_norm": 2.133070707321167, "learning_rate": 2.5323284156510585e-06, "loss": 0.8323, "step": 42575 }, { "epoch": 0.5189328848427236, "grad_norm": 1.8641413450241089, "learning_rate": 2.532007697241822e-06, "loss": 0.8612, "step": 42580 }, { "epoch": 0.5189938210668714, "grad_norm": 1.9837273359298706, "learning_rate": 2.5316869788325853e-06, "loss": 0.863, "step": 42585 }, { "epoch": 0.5190547572910192, "grad_norm": 1.905570149421692, "learning_rate": 2.5313662604233484e-06, "loss": 0.8053, "step": 42590 }, { "epoch": 0.5191156935151671, "grad_norm": 1.9301409721374512, "learning_rate": 2.531045542014112e-06, "loss": 0.8376, "step": 42595 }, { "epoch": 0.5191766297393149, "grad_norm": 1.760134220123291, "learning_rate": 2.5307248236048752e-06, "loss": 0.8185, "step": 42600 }, { "epoch": 0.5192375659634626, "grad_norm": 1.8397704362869263, "learning_rate": 2.5304041051956387e-06, "loss": 0.782, "step": 42605 }, { "epoch": 0.5192985021876104, "grad_norm": 2.0806691646575928, "learning_rate": 2.5300833867864017e-06, "loss": 0.9375, "step": 42610 }, { "epoch": 0.5193594384117582, "grad_norm": 1.8671181201934814, "learning_rate": 2.529762668377165e-06, "loss": 0.851, "step": 42615 }, { "epoch": 0.5194203746359061, "grad_norm": 2.1243250370025635, "learning_rate": 2.5294419499679286e-06, "loss": 0.7269, "step": 42620 }, { "epoch": 0.5194813108600539, "grad_norm": 1.8763656616210938, "learning_rate": 2.5291212315586916e-06, "loss": 0.8417, "step": 42625 }, { "epoch": 0.5195422470842017, "grad_norm": 2.281937837600708, "learning_rate": 2.5288005131494546e-06, "loss": 0.9413, "step": 42630 }, { "epoch": 0.5196031833083495, "grad_norm": 1.9165077209472656, "learning_rate": 2.5284797947402185e-06, "loss": 0.8442, "step": 42635 }, { "epoch": 0.5196641195324972, "grad_norm": 2.3526194095611572, "learning_rate": 2.5281590763309815e-06, "loss": 0.8489, "step": 42640 }, { "epoch": 0.5197250557566451, "grad_norm": 1.6711235046386719, "learning_rate": 2.5278383579217445e-06, "loss": 0.8544, "step": 42645 }, { "epoch": 0.5197859919807929, "grad_norm": 1.9707286357879639, "learning_rate": 2.5275176395125084e-06, "loss": 0.8548, "step": 42650 }, { "epoch": 0.5198469282049407, "grad_norm": 1.865182876586914, "learning_rate": 2.5271969211032714e-06, "loss": 0.8957, "step": 42655 }, { "epoch": 0.5199078644290885, "grad_norm": 1.9744259119033813, "learning_rate": 2.526876202694035e-06, "loss": 0.8892, "step": 42660 }, { "epoch": 0.5199688006532364, "grad_norm": 2.9259755611419678, "learning_rate": 2.5265554842847983e-06, "loss": 0.8253, "step": 42665 }, { "epoch": 0.5200297368773841, "grad_norm": 1.8517264127731323, "learning_rate": 2.5262347658755613e-06, "loss": 0.8021, "step": 42670 }, { "epoch": 0.5200906731015319, "grad_norm": 1.6708189249038696, "learning_rate": 2.5259140474663247e-06, "loss": 0.8208, "step": 42675 }, { "epoch": 0.5201516093256797, "grad_norm": 2.1849639415740967, "learning_rate": 2.525593329057088e-06, "loss": 0.8203, "step": 42680 }, { "epoch": 0.5202125455498275, "grad_norm": 2.373952865600586, "learning_rate": 2.5252726106478516e-06, "loss": 0.9265, "step": 42685 }, { "epoch": 0.5202734817739754, "grad_norm": 2.347585678100586, "learning_rate": 2.5249518922386146e-06, "loss": 0.8056, "step": 42690 }, { "epoch": 0.5203344179981232, "grad_norm": 2.094113349914551, "learning_rate": 2.524631173829378e-06, "loss": 0.7746, "step": 42695 }, { "epoch": 0.520395354222271, "grad_norm": 1.9441697597503662, "learning_rate": 2.5243104554201415e-06, "loss": 0.8957, "step": 42700 }, { "epoch": 0.5204562904464187, "grad_norm": 2.1699886322021484, "learning_rate": 2.5239897370109045e-06, "loss": 0.8992, "step": 42705 }, { "epoch": 0.5205172266705665, "grad_norm": 1.7189490795135498, "learning_rate": 2.5236690186016675e-06, "loss": 0.855, "step": 42710 }, { "epoch": 0.5205781628947144, "grad_norm": 2.145909547805786, "learning_rate": 2.5233483001924314e-06, "loss": 0.8783, "step": 42715 }, { "epoch": 0.5206390991188622, "grad_norm": 2.166717529296875, "learning_rate": 2.5230275817831944e-06, "loss": 0.792, "step": 42720 }, { "epoch": 0.52070003534301, "grad_norm": 2.146455764770508, "learning_rate": 2.5227068633739574e-06, "loss": 0.8364, "step": 42725 }, { "epoch": 0.5207609715671578, "grad_norm": 2.2583634853363037, "learning_rate": 2.5223861449647213e-06, "loss": 0.7666, "step": 42730 }, { "epoch": 0.5208219077913057, "grad_norm": 2.7471067905426025, "learning_rate": 2.5220654265554843e-06, "loss": 0.8828, "step": 42735 }, { "epoch": 0.5208828440154534, "grad_norm": 1.8364567756652832, "learning_rate": 2.5217447081462478e-06, "loss": 0.8491, "step": 42740 }, { "epoch": 0.5209437802396012, "grad_norm": 1.739255428314209, "learning_rate": 2.521423989737011e-06, "loss": 0.8173, "step": 42745 }, { "epoch": 0.521004716463749, "grad_norm": 2.0739598274230957, "learning_rate": 2.5211032713277746e-06, "loss": 0.8371, "step": 42750 }, { "epoch": 0.5210656526878968, "grad_norm": 1.51584792137146, "learning_rate": 2.5207825529185377e-06, "loss": 0.8065, "step": 42755 }, { "epoch": 0.5211265889120447, "grad_norm": 1.976702094078064, "learning_rate": 2.520461834509301e-06, "loss": 0.8913, "step": 42760 }, { "epoch": 0.5211875251361925, "grad_norm": 2.1037704944610596, "learning_rate": 2.5201411161000645e-06, "loss": 0.8573, "step": 42765 }, { "epoch": 0.5212484613603403, "grad_norm": 1.8123345375061035, "learning_rate": 2.5198203976908276e-06, "loss": 0.7894, "step": 42770 }, { "epoch": 0.521309397584488, "grad_norm": 1.9003793001174927, "learning_rate": 2.5194996792815914e-06, "loss": 0.7557, "step": 42775 }, { "epoch": 0.5213703338086358, "grad_norm": 1.9290794134140015, "learning_rate": 2.5191789608723544e-06, "loss": 0.7612, "step": 42780 }, { "epoch": 0.5214312700327837, "grad_norm": 2.6991147994995117, "learning_rate": 2.5188582424631175e-06, "loss": 0.807, "step": 42785 }, { "epoch": 0.5214922062569315, "grad_norm": 1.9889106750488281, "learning_rate": 2.5185375240538805e-06, "loss": 0.7635, "step": 42790 }, { "epoch": 0.5215531424810793, "grad_norm": 1.9722814559936523, "learning_rate": 2.5182168056446443e-06, "loss": 0.8363, "step": 42795 }, { "epoch": 0.5216140787052271, "grad_norm": 2.103182792663574, "learning_rate": 2.5178960872354074e-06, "loss": 0.849, "step": 42800 }, { "epoch": 0.521675014929375, "grad_norm": 2.057771682739258, "learning_rate": 2.517575368826171e-06, "loss": 0.8657, "step": 42805 }, { "epoch": 0.5217359511535227, "grad_norm": 1.9628565311431885, "learning_rate": 2.5172546504169342e-06, "loss": 0.8747, "step": 42810 }, { "epoch": 0.5217968873776705, "grad_norm": 2.090785264968872, "learning_rate": 2.5169339320076972e-06, "loss": 0.7457, "step": 42815 }, { "epoch": 0.5218578236018183, "grad_norm": 2.0074427127838135, "learning_rate": 2.5166132135984607e-06, "loss": 0.878, "step": 42820 }, { "epoch": 0.5219187598259661, "grad_norm": 1.7795568704605103, "learning_rate": 2.516292495189224e-06, "loss": 0.8045, "step": 42825 }, { "epoch": 0.521979696050114, "grad_norm": 1.695859670639038, "learning_rate": 2.5159717767799876e-06, "loss": 0.7968, "step": 42830 }, { "epoch": 0.5220406322742618, "grad_norm": 1.6808521747589111, "learning_rate": 2.5156510583707506e-06, "loss": 0.8801, "step": 42835 }, { "epoch": 0.5221015684984096, "grad_norm": 1.8596553802490234, "learning_rate": 2.515330339961514e-06, "loss": 0.8007, "step": 42840 }, { "epoch": 0.5221625047225573, "grad_norm": 1.95406973361969, "learning_rate": 2.5150096215522775e-06, "loss": 0.8623, "step": 42845 }, { "epoch": 0.5222234409467051, "grad_norm": 1.8766826391220093, "learning_rate": 2.5146889031430405e-06, "loss": 0.8012, "step": 42850 }, { "epoch": 0.522284377170853, "grad_norm": 2.205198049545288, "learning_rate": 2.5143681847338043e-06, "loss": 0.8285, "step": 42855 }, { "epoch": 0.5223453133950008, "grad_norm": 1.8537510633468628, "learning_rate": 2.5140474663245674e-06, "loss": 0.8139, "step": 42860 }, { "epoch": 0.5224062496191486, "grad_norm": 1.8805745840072632, "learning_rate": 2.5137267479153304e-06, "loss": 0.8727, "step": 42865 }, { "epoch": 0.5224671858432964, "grad_norm": 2.113525390625, "learning_rate": 2.5134060295060934e-06, "loss": 0.8761, "step": 42870 }, { "epoch": 0.5225281220674443, "grad_norm": 1.732590913772583, "learning_rate": 2.5130853110968573e-06, "loss": 0.8367, "step": 42875 }, { "epoch": 0.522589058291592, "grad_norm": 1.6957275867462158, "learning_rate": 2.5127645926876203e-06, "loss": 0.7774, "step": 42880 }, { "epoch": 0.5226499945157398, "grad_norm": 1.688210368156433, "learning_rate": 2.5124438742783837e-06, "loss": 0.8507, "step": 42885 }, { "epoch": 0.5227109307398876, "grad_norm": 1.846855640411377, "learning_rate": 2.512123155869147e-06, "loss": 0.8568, "step": 42890 }, { "epoch": 0.5227718669640354, "grad_norm": 1.9352972507476807, "learning_rate": 2.51180243745991e-06, "loss": 0.8112, "step": 42895 }, { "epoch": 0.5228328031881833, "grad_norm": 2.133082628250122, "learning_rate": 2.5114817190506736e-06, "loss": 0.7783, "step": 42900 }, { "epoch": 0.5228937394123311, "grad_norm": 1.697553277015686, "learning_rate": 2.511161000641437e-06, "loss": 0.8452, "step": 42905 }, { "epoch": 0.5229546756364789, "grad_norm": 1.9275133609771729, "learning_rate": 2.5108402822322005e-06, "loss": 0.8893, "step": 42910 }, { "epoch": 0.5230156118606266, "grad_norm": 1.9929407835006714, "learning_rate": 2.5105195638229635e-06, "loss": 0.828, "step": 42915 }, { "epoch": 0.5230765480847744, "grad_norm": 1.8781753778457642, "learning_rate": 2.510198845413727e-06, "loss": 0.8016, "step": 42920 }, { "epoch": 0.5231374843089223, "grad_norm": 2.1112780570983887, "learning_rate": 2.5098781270044904e-06, "loss": 0.7943, "step": 42925 }, { "epoch": 0.5231984205330701, "grad_norm": 1.916269063949585, "learning_rate": 2.5095574085952534e-06, "loss": 0.8686, "step": 42930 }, { "epoch": 0.5232593567572179, "grad_norm": 1.8040478229522705, "learning_rate": 2.5092366901860173e-06, "loss": 0.8218, "step": 42935 }, { "epoch": 0.5233202929813657, "grad_norm": 1.7928357124328613, "learning_rate": 2.5089159717767803e-06, "loss": 0.7497, "step": 42940 }, { "epoch": 0.5233812292055136, "grad_norm": 2.134925127029419, "learning_rate": 2.5085952533675433e-06, "loss": 0.7932, "step": 42945 }, { "epoch": 0.5234421654296613, "grad_norm": 2.2675421237945557, "learning_rate": 2.508274534958307e-06, "loss": 0.887, "step": 42950 }, { "epoch": 0.5235031016538091, "grad_norm": 2.21004581451416, "learning_rate": 2.50795381654907e-06, "loss": 0.841, "step": 42955 }, { "epoch": 0.5235640378779569, "grad_norm": 1.9828358888626099, "learning_rate": 2.507633098139833e-06, "loss": 0.8143, "step": 42960 }, { "epoch": 0.5236249741021047, "grad_norm": 2.032153367996216, "learning_rate": 2.5073123797305966e-06, "loss": 0.7825, "step": 42965 }, { "epoch": 0.5236859103262526, "grad_norm": 2.123561143875122, "learning_rate": 2.50699166132136e-06, "loss": 0.8172, "step": 42970 }, { "epoch": 0.5237468465504004, "grad_norm": 1.9335410594940186, "learning_rate": 2.5066709429121235e-06, "loss": 0.7452, "step": 42975 }, { "epoch": 0.5238077827745482, "grad_norm": 2.2186367511749268, "learning_rate": 2.5063502245028865e-06, "loss": 0.7772, "step": 42980 }, { "epoch": 0.5238687189986959, "grad_norm": 1.9955435991287231, "learning_rate": 2.50602950609365e-06, "loss": 0.8503, "step": 42985 }, { "epoch": 0.5239296552228437, "grad_norm": 1.963159441947937, "learning_rate": 2.5057087876844134e-06, "loss": 0.8609, "step": 42990 }, { "epoch": 0.5239905914469916, "grad_norm": 2.1254005432128906, "learning_rate": 2.5053880692751764e-06, "loss": 0.8452, "step": 42995 }, { "epoch": 0.5240515276711394, "grad_norm": 2.0542447566986084, "learning_rate": 2.5050673508659403e-06, "loss": 0.7962, "step": 43000 }, { "epoch": 0.5241124638952872, "grad_norm": 1.9348522424697876, "learning_rate": 2.5047466324567033e-06, "loss": 0.8454, "step": 43005 }, { "epoch": 0.524173400119435, "grad_norm": 1.864548921585083, "learning_rate": 2.5044259140474663e-06, "loss": 0.8109, "step": 43010 }, { "epoch": 0.5242343363435829, "grad_norm": 2.144023895263672, "learning_rate": 2.50410519563823e-06, "loss": 0.891, "step": 43015 }, { "epoch": 0.5242952725677306, "grad_norm": 1.950821042060852, "learning_rate": 2.5037844772289932e-06, "loss": 0.805, "step": 43020 }, { "epoch": 0.5243562087918784, "grad_norm": 1.763805627822876, "learning_rate": 2.5034637588197562e-06, "loss": 0.7529, "step": 43025 }, { "epoch": 0.5244171450160262, "grad_norm": 2.1553761959075928, "learning_rate": 2.50314304041052e-06, "loss": 0.8552, "step": 43030 }, { "epoch": 0.524478081240174, "grad_norm": 1.8092095851898193, "learning_rate": 2.502822322001283e-06, "loss": 0.8308, "step": 43035 }, { "epoch": 0.5245390174643219, "grad_norm": 1.76266348361969, "learning_rate": 2.502501603592046e-06, "loss": 0.891, "step": 43040 }, { "epoch": 0.5245999536884697, "grad_norm": 1.7026207447052002, "learning_rate": 2.5021808851828096e-06, "loss": 0.8498, "step": 43045 }, { "epoch": 0.5246608899126175, "grad_norm": 1.9248073101043701, "learning_rate": 2.501860166773573e-06, "loss": 0.8386, "step": 43050 }, { "epoch": 0.5247218261367652, "grad_norm": 1.9879601001739502, "learning_rate": 2.5015394483643365e-06, "loss": 0.8764, "step": 43055 }, { "epoch": 0.524782762360913, "grad_norm": 2.217595338821411, "learning_rate": 2.5012187299550995e-06, "loss": 0.8792, "step": 43060 }, { "epoch": 0.5248436985850609, "grad_norm": 2.040924549102783, "learning_rate": 2.500898011545863e-06, "loss": 0.7741, "step": 43065 }, { "epoch": 0.5249046348092087, "grad_norm": 2.1328928470611572, "learning_rate": 2.5005772931366264e-06, "loss": 0.9556, "step": 43070 }, { "epoch": 0.5249655710333565, "grad_norm": 1.8266584873199463, "learning_rate": 2.5002565747273894e-06, "loss": 0.7662, "step": 43075 }, { "epoch": 0.5250265072575043, "grad_norm": 1.847266435623169, "learning_rate": 2.499935856318153e-06, "loss": 0.7941, "step": 43080 }, { "epoch": 0.5250874434816521, "grad_norm": 1.9810794591903687, "learning_rate": 2.4996151379089162e-06, "loss": 0.8169, "step": 43085 }, { "epoch": 0.5251483797057999, "grad_norm": 2.0757336616516113, "learning_rate": 2.4992944194996797e-06, "loss": 0.8151, "step": 43090 }, { "epoch": 0.5252093159299477, "grad_norm": 1.7760100364685059, "learning_rate": 2.4989737010904427e-06, "loss": 0.7956, "step": 43095 }, { "epoch": 0.5252702521540955, "grad_norm": 1.8635531663894653, "learning_rate": 2.498652982681206e-06, "loss": 0.8763, "step": 43100 }, { "epoch": 0.5253311883782433, "grad_norm": 1.9783369302749634, "learning_rate": 2.498332264271969e-06, "loss": 0.8279, "step": 43105 }, { "epoch": 0.5253921246023912, "grad_norm": 2.420306921005249, "learning_rate": 2.4980115458627326e-06, "loss": 0.9072, "step": 43110 }, { "epoch": 0.525453060826539, "grad_norm": 2.02223801612854, "learning_rate": 2.497690827453496e-06, "loss": 0.8018, "step": 43115 }, { "epoch": 0.5255139970506868, "grad_norm": 2.20119309425354, "learning_rate": 2.497370109044259e-06, "loss": 0.8738, "step": 43120 }, { "epoch": 0.5255749332748345, "grad_norm": 1.7812963724136353, "learning_rate": 2.4970493906350225e-06, "loss": 0.7744, "step": 43125 }, { "epoch": 0.5256358694989823, "grad_norm": 1.9514844417572021, "learning_rate": 2.496728672225786e-06, "loss": 0.8806, "step": 43130 }, { "epoch": 0.5256968057231302, "grad_norm": 1.92830491065979, "learning_rate": 2.4964079538165494e-06, "loss": 0.8128, "step": 43135 }, { "epoch": 0.525757741947278, "grad_norm": 1.9927599430084229, "learning_rate": 2.496087235407313e-06, "loss": 0.8199, "step": 43140 }, { "epoch": 0.5258186781714258, "grad_norm": 1.8004186153411865, "learning_rate": 2.495766516998076e-06, "loss": 0.8047, "step": 43145 }, { "epoch": 0.5258796143955736, "grad_norm": 2.0124382972717285, "learning_rate": 2.4954457985888393e-06, "loss": 0.8725, "step": 43150 }, { "epoch": 0.5259405506197214, "grad_norm": 2.101715326309204, "learning_rate": 2.4951250801796027e-06, "loss": 0.8273, "step": 43155 }, { "epoch": 0.5260014868438692, "grad_norm": 1.784160852432251, "learning_rate": 2.4948043617703657e-06, "loss": 0.7971, "step": 43160 }, { "epoch": 0.526062423068017, "grad_norm": 2.0881001949310303, "learning_rate": 2.494483643361129e-06, "loss": 0.853, "step": 43165 }, { "epoch": 0.5261233592921648, "grad_norm": 1.8604663610458374, "learning_rate": 2.4941629249518926e-06, "loss": 0.8121, "step": 43170 }, { "epoch": 0.5261842955163126, "grad_norm": 2.154315948486328, "learning_rate": 2.4938422065426556e-06, "loss": 0.8784, "step": 43175 }, { "epoch": 0.5262452317404605, "grad_norm": 1.7223753929138184, "learning_rate": 2.493521488133419e-06, "loss": 0.7576, "step": 43180 }, { "epoch": 0.5263061679646083, "grad_norm": 1.8795371055603027, "learning_rate": 2.493200769724182e-06, "loss": 0.82, "step": 43185 }, { "epoch": 0.5263671041887561, "grad_norm": 3.314631462097168, "learning_rate": 2.4928800513149455e-06, "loss": 0.7862, "step": 43190 }, { "epoch": 0.5264280404129038, "grad_norm": 1.817152738571167, "learning_rate": 2.492559332905709e-06, "loss": 0.8101, "step": 43195 }, { "epoch": 0.5264889766370516, "grad_norm": 1.9805965423583984, "learning_rate": 2.492238614496472e-06, "loss": 0.8372, "step": 43200 }, { "epoch": 0.5265499128611995, "grad_norm": 2.044799566268921, "learning_rate": 2.4919178960872354e-06, "loss": 0.8518, "step": 43205 }, { "epoch": 0.5266108490853473, "grad_norm": 1.7815520763397217, "learning_rate": 2.491597177677999e-06, "loss": 0.7582, "step": 43210 }, { "epoch": 0.5266717853094951, "grad_norm": 1.720495581626892, "learning_rate": 2.4912764592687623e-06, "loss": 0.8029, "step": 43215 }, { "epoch": 0.5267327215336429, "grad_norm": 1.925215721130371, "learning_rate": 2.4909557408595257e-06, "loss": 0.8538, "step": 43220 }, { "epoch": 0.5267936577577907, "grad_norm": 1.7632304430007935, "learning_rate": 2.490635022450289e-06, "loss": 0.8508, "step": 43225 }, { "epoch": 0.5268545939819385, "grad_norm": 2.575253486633301, "learning_rate": 2.490314304041052e-06, "loss": 0.8451, "step": 43230 }, { "epoch": 0.5269155302060863, "grad_norm": 2.188591957092285, "learning_rate": 2.4899935856318156e-06, "loss": 0.8495, "step": 43235 }, { "epoch": 0.5269764664302341, "grad_norm": 2.0989465713500977, "learning_rate": 2.4896728672225787e-06, "loss": 0.8681, "step": 43240 }, { "epoch": 0.5270374026543819, "grad_norm": 2.0662271976470947, "learning_rate": 2.489352148813342e-06, "loss": 0.8121, "step": 43245 }, { "epoch": 0.5270983388785297, "grad_norm": 2.134495258331299, "learning_rate": 2.4890314304041055e-06, "loss": 0.8722, "step": 43250 }, { "epoch": 0.5271592751026776, "grad_norm": 2.1930108070373535, "learning_rate": 2.4887107119948686e-06, "loss": 0.8132, "step": 43255 }, { "epoch": 0.5272202113268254, "grad_norm": 2.3044426441192627, "learning_rate": 2.488389993585632e-06, "loss": 0.8465, "step": 43260 }, { "epoch": 0.5272811475509731, "grad_norm": 1.8613466024398804, "learning_rate": 2.4880692751763954e-06, "loss": 0.7901, "step": 43265 }, { "epoch": 0.5273420837751209, "grad_norm": 2.01265025138855, "learning_rate": 2.4877485567671585e-06, "loss": 0.8743, "step": 43270 }, { "epoch": 0.5274030199992688, "grad_norm": 1.7836940288543701, "learning_rate": 2.487427838357922e-06, "loss": 0.7859, "step": 43275 }, { "epoch": 0.5274639562234166, "grad_norm": 2.129936695098877, "learning_rate": 2.4871071199486853e-06, "loss": 0.8011, "step": 43280 }, { "epoch": 0.5275248924475644, "grad_norm": 1.9914789199829102, "learning_rate": 2.4867864015394484e-06, "loss": 0.8503, "step": 43285 }, { "epoch": 0.5275858286717122, "grad_norm": 1.8866318464279175, "learning_rate": 2.486465683130212e-06, "loss": 0.7862, "step": 43290 }, { "epoch": 0.52764676489586, "grad_norm": 1.7212169170379639, "learning_rate": 2.4861449647209752e-06, "loss": 0.8893, "step": 43295 }, { "epoch": 0.5277077011200078, "grad_norm": 1.7184498310089111, "learning_rate": 2.4858242463117387e-06, "loss": 0.8342, "step": 43300 }, { "epoch": 0.5277686373441556, "grad_norm": 1.8102654218673706, "learning_rate": 2.485503527902502e-06, "loss": 0.8017, "step": 43305 }, { "epoch": 0.5278295735683034, "grad_norm": 2.101630449295044, "learning_rate": 2.485182809493265e-06, "loss": 0.7884, "step": 43310 }, { "epoch": 0.5278905097924512, "grad_norm": 1.8670639991760254, "learning_rate": 2.4848620910840286e-06, "loss": 0.7578, "step": 43315 }, { "epoch": 0.527951446016599, "grad_norm": 1.831080436706543, "learning_rate": 2.4845413726747916e-06, "loss": 0.858, "step": 43320 }, { "epoch": 0.5280123822407469, "grad_norm": 1.819984793663025, "learning_rate": 2.484220654265555e-06, "loss": 0.8223, "step": 43325 }, { "epoch": 0.5280733184648947, "grad_norm": 1.981200098991394, "learning_rate": 2.4838999358563185e-06, "loss": 0.9052, "step": 43330 }, { "epoch": 0.5281342546890424, "grad_norm": 1.948327660560608, "learning_rate": 2.4835792174470815e-06, "loss": 0.8046, "step": 43335 }, { "epoch": 0.5281951909131902, "grad_norm": 2.180525541305542, "learning_rate": 2.483258499037845e-06, "loss": 0.8885, "step": 43340 }, { "epoch": 0.528256127137338, "grad_norm": 2.0526771545410156, "learning_rate": 2.4829377806286084e-06, "loss": 0.8199, "step": 43345 }, { "epoch": 0.5283170633614859, "grad_norm": 1.729134202003479, "learning_rate": 2.4826170622193714e-06, "loss": 0.8353, "step": 43350 }, { "epoch": 0.5283779995856337, "grad_norm": 2.10957407951355, "learning_rate": 2.482296343810135e-06, "loss": 0.8294, "step": 43355 }, { "epoch": 0.5284389358097815, "grad_norm": 2.0207085609436035, "learning_rate": 2.4819756254008983e-06, "loss": 0.8625, "step": 43360 }, { "epoch": 0.5284998720339293, "grad_norm": 1.8024505376815796, "learning_rate": 2.4816549069916617e-06, "loss": 0.7603, "step": 43365 }, { "epoch": 0.528560808258077, "grad_norm": 2.004882335662842, "learning_rate": 2.4813341885824247e-06, "loss": 0.864, "step": 43370 }, { "epoch": 0.5286217444822249, "grad_norm": 2.440077304840088, "learning_rate": 2.481013470173188e-06, "loss": 0.8548, "step": 43375 }, { "epoch": 0.5286826807063727, "grad_norm": 2.5766916275024414, "learning_rate": 2.4806927517639516e-06, "loss": 0.7572, "step": 43380 }, { "epoch": 0.5287436169305205, "grad_norm": 1.9747710227966309, "learning_rate": 2.480372033354715e-06, "loss": 0.7992, "step": 43385 }, { "epoch": 0.5288045531546683, "grad_norm": 1.7532670497894287, "learning_rate": 2.480051314945478e-06, "loss": 0.8574, "step": 43390 }, { "epoch": 0.5288654893788162, "grad_norm": 1.9922850131988525, "learning_rate": 2.4797305965362415e-06, "loss": 0.8354, "step": 43395 }, { "epoch": 0.528926425602964, "grad_norm": 1.8968098163604736, "learning_rate": 2.4794098781270045e-06, "loss": 0.8043, "step": 43400 }, { "epoch": 0.5289873618271117, "grad_norm": 1.7311803102493286, "learning_rate": 2.479089159717768e-06, "loss": 0.8387, "step": 43405 }, { "epoch": 0.5290482980512595, "grad_norm": 2.0146443843841553, "learning_rate": 2.4787684413085314e-06, "loss": 0.8081, "step": 43410 }, { "epoch": 0.5291092342754073, "grad_norm": 1.8393645286560059, "learning_rate": 2.4784477228992944e-06, "loss": 0.8608, "step": 43415 }, { "epoch": 0.5291701704995552, "grad_norm": 1.9625929594039917, "learning_rate": 2.478127004490058e-06, "loss": 0.8089, "step": 43420 }, { "epoch": 0.529231106723703, "grad_norm": 2.2065274715423584, "learning_rate": 2.4778062860808213e-06, "loss": 0.8307, "step": 43425 }, { "epoch": 0.5292920429478508, "grad_norm": 2.147827386856079, "learning_rate": 2.4774855676715843e-06, "loss": 0.8622, "step": 43430 }, { "epoch": 0.5293529791719986, "grad_norm": 2.0946972370147705, "learning_rate": 2.4771648492623478e-06, "loss": 0.8315, "step": 43435 }, { "epoch": 0.5294139153961464, "grad_norm": 2.01163911819458, "learning_rate": 2.476844130853111e-06, "loss": 0.8057, "step": 43440 }, { "epoch": 0.5294748516202942, "grad_norm": 1.6976286172866821, "learning_rate": 2.4765234124438746e-06, "loss": 0.8594, "step": 43445 }, { "epoch": 0.529535787844442, "grad_norm": 1.7334083318710327, "learning_rate": 2.476202694034638e-06, "loss": 0.8626, "step": 43450 }, { "epoch": 0.5295967240685898, "grad_norm": 1.8844735622406006, "learning_rate": 2.475881975625401e-06, "loss": 0.8271, "step": 43455 }, { "epoch": 0.5296576602927376, "grad_norm": 2.5468220710754395, "learning_rate": 2.4755612572161645e-06, "loss": 0.8261, "step": 43460 }, { "epoch": 0.5297185965168855, "grad_norm": 1.9276020526885986, "learning_rate": 2.475240538806928e-06, "loss": 0.7471, "step": 43465 }, { "epoch": 0.5297795327410333, "grad_norm": 2.018348455429077, "learning_rate": 2.474919820397691e-06, "loss": 0.8385, "step": 43470 }, { "epoch": 0.529840468965181, "grad_norm": 2.1173300743103027, "learning_rate": 2.4745991019884544e-06, "loss": 0.8193, "step": 43475 }, { "epoch": 0.5299014051893288, "grad_norm": 1.9293912649154663, "learning_rate": 2.4742783835792174e-06, "loss": 0.8204, "step": 43480 }, { "epoch": 0.5299623414134766, "grad_norm": 1.920993447303772, "learning_rate": 2.473957665169981e-06, "loss": 0.7749, "step": 43485 }, { "epoch": 0.5300232776376245, "grad_norm": 1.8741099834442139, "learning_rate": 2.4736369467607443e-06, "loss": 0.8265, "step": 43490 }, { "epoch": 0.5300842138617723, "grad_norm": 2.2854340076446533, "learning_rate": 2.4733162283515073e-06, "loss": 0.8232, "step": 43495 }, { "epoch": 0.5301451500859201, "grad_norm": 1.7807055711746216, "learning_rate": 2.4729955099422708e-06, "loss": 0.8124, "step": 43500 }, { "epoch": 0.5302060863100679, "grad_norm": 1.9645562171936035, "learning_rate": 2.4726747915330342e-06, "loss": 0.8816, "step": 43505 }, { "epoch": 0.5302670225342156, "grad_norm": 2.0937368869781494, "learning_rate": 2.4723540731237972e-06, "loss": 0.845, "step": 43510 }, { "epoch": 0.5303279587583635, "grad_norm": 2.1128482818603516, "learning_rate": 2.4720333547145607e-06, "loss": 0.8776, "step": 43515 }, { "epoch": 0.5303888949825113, "grad_norm": 1.7771832942962646, "learning_rate": 2.471712636305324e-06, "loss": 0.8433, "step": 43520 }, { "epoch": 0.5304498312066591, "grad_norm": 2.098694324493408, "learning_rate": 2.4713919178960876e-06, "loss": 0.8771, "step": 43525 }, { "epoch": 0.5305107674308069, "grad_norm": 1.9633359909057617, "learning_rate": 2.471071199486851e-06, "loss": 0.8741, "step": 43530 }, { "epoch": 0.5305717036549548, "grad_norm": 1.9979621171951294, "learning_rate": 2.470750481077614e-06, "loss": 0.7939, "step": 43535 }, { "epoch": 0.5306326398791026, "grad_norm": 1.8447647094726562, "learning_rate": 2.4704297626683775e-06, "loss": 0.8107, "step": 43540 }, { "epoch": 0.5306935761032503, "grad_norm": 1.8238639831542969, "learning_rate": 2.470109044259141e-06, "loss": 0.843, "step": 43545 }, { "epoch": 0.5307545123273981, "grad_norm": 1.8412452936172485, "learning_rate": 2.469788325849904e-06, "loss": 0.8268, "step": 43550 }, { "epoch": 0.5308154485515459, "grad_norm": 1.9891616106033325, "learning_rate": 2.4694676074406674e-06, "loss": 0.8707, "step": 43555 }, { "epoch": 0.5308763847756938, "grad_norm": 1.7858744859695435, "learning_rate": 2.469146889031431e-06, "loss": 0.8329, "step": 43560 }, { "epoch": 0.5309373209998416, "grad_norm": 1.865412712097168, "learning_rate": 2.468826170622194e-06, "loss": 0.8202, "step": 43565 }, { "epoch": 0.5309982572239894, "grad_norm": 2.0978293418884277, "learning_rate": 2.4685054522129573e-06, "loss": 0.8346, "step": 43570 }, { "epoch": 0.5310591934481372, "grad_norm": 2.150520086288452, "learning_rate": 2.4681847338037203e-06, "loss": 0.7776, "step": 43575 }, { "epoch": 0.531120129672285, "grad_norm": 1.9666756391525269, "learning_rate": 2.4678640153944837e-06, "loss": 0.8342, "step": 43580 }, { "epoch": 0.5311810658964328, "grad_norm": 1.8218679428100586, "learning_rate": 2.467543296985247e-06, "loss": 0.8733, "step": 43585 }, { "epoch": 0.5312420021205806, "grad_norm": 1.9847848415374756, "learning_rate": 2.4672225785760106e-06, "loss": 0.8338, "step": 43590 }, { "epoch": 0.5313029383447284, "grad_norm": 1.8283549547195435, "learning_rate": 2.4669018601667736e-06, "loss": 0.8386, "step": 43595 }, { "epoch": 0.5313638745688762, "grad_norm": 1.7289811372756958, "learning_rate": 2.466581141757537e-06, "loss": 0.8582, "step": 43600 }, { "epoch": 0.5314248107930241, "grad_norm": 1.8501428365707397, "learning_rate": 2.4662604233483005e-06, "loss": 0.8072, "step": 43605 }, { "epoch": 0.5314857470171719, "grad_norm": 2.616274356842041, "learning_rate": 2.465939704939064e-06, "loss": 0.7876, "step": 43610 }, { "epoch": 0.5315466832413196, "grad_norm": 1.9847873449325562, "learning_rate": 2.465618986529827e-06, "loss": 0.8344, "step": 43615 }, { "epoch": 0.5316076194654674, "grad_norm": 2.0431017875671387, "learning_rate": 2.4652982681205904e-06, "loss": 0.8065, "step": 43620 }, { "epoch": 0.5316685556896152, "grad_norm": 2.0311105251312256, "learning_rate": 2.464977549711354e-06, "loss": 0.9066, "step": 43625 }, { "epoch": 0.5317294919137631, "grad_norm": 2.093296527862549, "learning_rate": 2.464656831302117e-06, "loss": 0.8513, "step": 43630 }, { "epoch": 0.5317904281379109, "grad_norm": 1.8734185695648193, "learning_rate": 2.4643361128928803e-06, "loss": 0.8344, "step": 43635 }, { "epoch": 0.5318513643620587, "grad_norm": 1.6776612997055054, "learning_rate": 2.4640153944836437e-06, "loss": 0.7486, "step": 43640 }, { "epoch": 0.5319123005862064, "grad_norm": 1.91927170753479, "learning_rate": 2.4636946760744067e-06, "loss": 0.7615, "step": 43645 }, { "epoch": 0.5319732368103542, "grad_norm": 2.074826240539551, "learning_rate": 2.46337395766517e-06, "loss": 0.7239, "step": 43650 }, { "epoch": 0.5320341730345021, "grad_norm": 1.9152789115905762, "learning_rate": 2.463053239255933e-06, "loss": 0.8598, "step": 43655 }, { "epoch": 0.5320951092586499, "grad_norm": 1.800171136856079, "learning_rate": 2.4627325208466966e-06, "loss": 0.7712, "step": 43660 }, { "epoch": 0.5321560454827977, "grad_norm": 2.3607609272003174, "learning_rate": 2.46241180243746e-06, "loss": 0.8052, "step": 43665 }, { "epoch": 0.5322169817069455, "grad_norm": 1.8800287246704102, "learning_rate": 2.4620910840282235e-06, "loss": 0.8795, "step": 43670 }, { "epoch": 0.5322779179310934, "grad_norm": 2.457437753677368, "learning_rate": 2.461770365618987e-06, "loss": 0.7669, "step": 43675 }, { "epoch": 0.5323388541552411, "grad_norm": 1.9407691955566406, "learning_rate": 2.46144964720975e-06, "loss": 0.8266, "step": 43680 }, { "epoch": 0.5323997903793889, "grad_norm": 1.8970582485198975, "learning_rate": 2.4611289288005134e-06, "loss": 0.8274, "step": 43685 }, { "epoch": 0.5324607266035367, "grad_norm": 1.7072447538375854, "learning_rate": 2.460808210391277e-06, "loss": 0.9053, "step": 43690 }, { "epoch": 0.5325216628276845, "grad_norm": 1.720872402191162, "learning_rate": 2.46048749198204e-06, "loss": 0.8506, "step": 43695 }, { "epoch": 0.5325825990518324, "grad_norm": 1.8518494367599487, "learning_rate": 2.4601667735728033e-06, "loss": 0.802, "step": 43700 }, { "epoch": 0.5326435352759802, "grad_norm": 1.9453353881835938, "learning_rate": 2.4598460551635668e-06, "loss": 0.8849, "step": 43705 }, { "epoch": 0.532704471500128, "grad_norm": 2.1432487964630127, "learning_rate": 2.4595253367543298e-06, "loss": 0.8146, "step": 43710 }, { "epoch": 0.5327654077242757, "grad_norm": 1.995392918586731, "learning_rate": 2.459204618345093e-06, "loss": 0.836, "step": 43715 }, { "epoch": 0.5328263439484235, "grad_norm": 1.8062115907669067, "learning_rate": 2.4588838999358566e-06, "loss": 0.8771, "step": 43720 }, { "epoch": 0.5328872801725714, "grad_norm": 1.868938684463501, "learning_rate": 2.4585631815266197e-06, "loss": 0.8134, "step": 43725 }, { "epoch": 0.5329482163967192, "grad_norm": 1.6240636110305786, "learning_rate": 2.458242463117383e-06, "loss": 0.8043, "step": 43730 }, { "epoch": 0.533009152620867, "grad_norm": 1.7970502376556396, "learning_rate": 2.457921744708146e-06, "loss": 0.8132, "step": 43735 }, { "epoch": 0.5330700888450148, "grad_norm": 1.9197407960891724, "learning_rate": 2.4576010262989096e-06, "loss": 0.9016, "step": 43740 }, { "epoch": 0.5331310250691627, "grad_norm": 2.1519722938537598, "learning_rate": 2.457280307889673e-06, "loss": 0.7937, "step": 43745 }, { "epoch": 0.5331919612933104, "grad_norm": 2.1353228092193604, "learning_rate": 2.4569595894804364e-06, "loss": 0.837, "step": 43750 }, { "epoch": 0.5332528975174582, "grad_norm": 2.5542514324188232, "learning_rate": 2.4566388710712e-06, "loss": 0.9506, "step": 43755 }, { "epoch": 0.533313833741606, "grad_norm": 2.191236734390259, "learning_rate": 2.456318152661963e-06, "loss": 0.8938, "step": 43760 }, { "epoch": 0.5333747699657538, "grad_norm": 1.8805080652236938, "learning_rate": 2.4559974342527263e-06, "loss": 0.8758, "step": 43765 }, { "epoch": 0.5334357061899017, "grad_norm": 2.2958240509033203, "learning_rate": 2.4556767158434898e-06, "loss": 0.839, "step": 43770 }, { "epoch": 0.5334966424140495, "grad_norm": 1.9302817583084106, "learning_rate": 2.455355997434253e-06, "loss": 0.8508, "step": 43775 }, { "epoch": 0.5335575786381973, "grad_norm": 2.4810962677001953, "learning_rate": 2.4550352790250162e-06, "loss": 0.7673, "step": 43780 }, { "epoch": 0.533618514862345, "grad_norm": 2.0367472171783447, "learning_rate": 2.4547145606157797e-06, "loss": 0.8116, "step": 43785 }, { "epoch": 0.5336794510864928, "grad_norm": 2.3421409130096436, "learning_rate": 2.4543938422065427e-06, "loss": 0.931, "step": 43790 }, { "epoch": 0.5337403873106407, "grad_norm": 2.263845682144165, "learning_rate": 2.454073123797306e-06, "loss": 0.8808, "step": 43795 }, { "epoch": 0.5338013235347885, "grad_norm": 2.0088589191436768, "learning_rate": 2.4537524053880696e-06, "loss": 0.8194, "step": 43800 }, { "epoch": 0.5338622597589363, "grad_norm": 2.4942452907562256, "learning_rate": 2.4534316869788326e-06, "loss": 0.8633, "step": 43805 }, { "epoch": 0.5339231959830841, "grad_norm": 1.969865322113037, "learning_rate": 2.453110968569596e-06, "loss": 0.867, "step": 43810 }, { "epoch": 0.533984132207232, "grad_norm": 1.9915109872817993, "learning_rate": 2.4527902501603595e-06, "loss": 0.7824, "step": 43815 }, { "epoch": 0.5340450684313797, "grad_norm": 1.885481357574463, "learning_rate": 2.4524695317511225e-06, "loss": 0.8803, "step": 43820 }, { "epoch": 0.5341060046555275, "grad_norm": 1.9925068616867065, "learning_rate": 2.452148813341886e-06, "loss": 0.8242, "step": 43825 }, { "epoch": 0.5341669408796753, "grad_norm": 2.1267480850219727, "learning_rate": 2.4518280949326494e-06, "loss": 0.8314, "step": 43830 }, { "epoch": 0.5342278771038231, "grad_norm": 1.951528787612915, "learning_rate": 2.451507376523413e-06, "loss": 0.8482, "step": 43835 }, { "epoch": 0.534288813327971, "grad_norm": 1.8009119033813477, "learning_rate": 2.4511866581141763e-06, "loss": 0.8728, "step": 43840 }, { "epoch": 0.5343497495521188, "grad_norm": 1.9717985391616821, "learning_rate": 2.4508659397049393e-06, "loss": 0.8549, "step": 43845 }, { "epoch": 0.5344106857762666, "grad_norm": 1.9023412466049194, "learning_rate": 2.4505452212957027e-06, "loss": 0.8367, "step": 43850 }, { "epoch": 0.5344716220004143, "grad_norm": 2.141627311706543, "learning_rate": 2.450224502886466e-06, "loss": 0.8635, "step": 43855 }, { "epoch": 0.5345325582245621, "grad_norm": 2.092092514038086, "learning_rate": 2.449903784477229e-06, "loss": 0.7483, "step": 43860 }, { "epoch": 0.53459349444871, "grad_norm": 1.7551438808441162, "learning_rate": 2.4495830660679926e-06, "loss": 0.7707, "step": 43865 }, { "epoch": 0.5346544306728578, "grad_norm": 2.023601770401001, "learning_rate": 2.4492623476587556e-06, "loss": 0.8603, "step": 43870 }, { "epoch": 0.5347153668970056, "grad_norm": 1.855661392211914, "learning_rate": 2.448941629249519e-06, "loss": 0.8242, "step": 43875 }, { "epoch": 0.5347763031211534, "grad_norm": 1.914083480834961, "learning_rate": 2.4486209108402825e-06, "loss": 0.8636, "step": 43880 }, { "epoch": 0.5348372393453013, "grad_norm": 1.8596168756484985, "learning_rate": 2.4483001924310455e-06, "loss": 0.8315, "step": 43885 }, { "epoch": 0.534898175569449, "grad_norm": 2.1063718795776367, "learning_rate": 2.447979474021809e-06, "loss": 0.812, "step": 43890 }, { "epoch": 0.5349591117935968, "grad_norm": 2.1035115718841553, "learning_rate": 2.4476587556125724e-06, "loss": 0.8567, "step": 43895 }, { "epoch": 0.5350200480177446, "grad_norm": 2.0504229068756104, "learning_rate": 2.4473380372033354e-06, "loss": 0.827, "step": 43900 }, { "epoch": 0.5350809842418924, "grad_norm": 2.4903414249420166, "learning_rate": 2.447017318794099e-06, "loss": 0.881, "step": 43905 }, { "epoch": 0.5351419204660403, "grad_norm": 2.0011537075042725, "learning_rate": 2.4466966003848623e-06, "loss": 0.8145, "step": 43910 }, { "epoch": 0.5352028566901881, "grad_norm": 1.6259863376617432, "learning_rate": 2.4463758819756257e-06, "loss": 0.8377, "step": 43915 }, { "epoch": 0.5352637929143359, "grad_norm": 1.757636547088623, "learning_rate": 2.446055163566389e-06, "loss": 0.8667, "step": 43920 }, { "epoch": 0.5353247291384836, "grad_norm": 1.571339726448059, "learning_rate": 2.445734445157152e-06, "loss": 0.7359, "step": 43925 }, { "epoch": 0.5353856653626314, "grad_norm": 2.205068349838257, "learning_rate": 2.4454137267479156e-06, "loss": 0.891, "step": 43930 }, { "epoch": 0.5354466015867793, "grad_norm": 1.747280240058899, "learning_rate": 2.445093008338679e-06, "loss": 0.8541, "step": 43935 }, { "epoch": 0.5355075378109271, "grad_norm": 2.512716770172119, "learning_rate": 2.444772289929442e-06, "loss": 0.9251, "step": 43940 }, { "epoch": 0.5355684740350749, "grad_norm": 3.237633228302002, "learning_rate": 2.4444515715202055e-06, "loss": 0.7853, "step": 43945 }, { "epoch": 0.5356294102592227, "grad_norm": 1.9487831592559814, "learning_rate": 2.4441308531109685e-06, "loss": 0.8176, "step": 43950 }, { "epoch": 0.5356903464833705, "grad_norm": 1.8528605699539185, "learning_rate": 2.443810134701732e-06, "loss": 0.7408, "step": 43955 }, { "epoch": 0.5357512827075183, "grad_norm": 2.2767257690429688, "learning_rate": 2.4434894162924954e-06, "loss": 0.8744, "step": 43960 }, { "epoch": 0.5358122189316661, "grad_norm": 1.9132963418960571, "learning_rate": 2.4431686978832584e-06, "loss": 0.7976, "step": 43965 }, { "epoch": 0.5358731551558139, "grad_norm": 1.8623144626617432, "learning_rate": 2.442847979474022e-06, "loss": 0.817, "step": 43970 }, { "epoch": 0.5359340913799617, "grad_norm": 2.3918423652648926, "learning_rate": 2.4425272610647853e-06, "loss": 0.7588, "step": 43975 }, { "epoch": 0.5359950276041096, "grad_norm": 1.9989242553710938, "learning_rate": 2.4422065426555488e-06, "loss": 0.8742, "step": 43980 }, { "epoch": 0.5360559638282574, "grad_norm": 1.6371484994888306, "learning_rate": 2.4418858242463118e-06, "loss": 0.7454, "step": 43985 }, { "epoch": 0.5361169000524052, "grad_norm": 2.1876392364501953, "learning_rate": 2.4415651058370752e-06, "loss": 0.8642, "step": 43990 }, { "epoch": 0.5361778362765529, "grad_norm": 1.7838560342788696, "learning_rate": 2.4412443874278387e-06, "loss": 0.831, "step": 43995 }, { "epoch": 0.5362387725007007, "grad_norm": 2.0052099227905273, "learning_rate": 2.440923669018602e-06, "loss": 0.8624, "step": 44000 }, { "epoch": 0.5362997087248486, "grad_norm": 1.9679266214370728, "learning_rate": 2.440602950609365e-06, "loss": 0.8543, "step": 44005 }, { "epoch": 0.5363606449489964, "grad_norm": 1.9266204833984375, "learning_rate": 2.4402822322001286e-06, "loss": 0.8456, "step": 44010 }, { "epoch": 0.5364215811731442, "grad_norm": 2.110044479370117, "learning_rate": 2.439961513790892e-06, "loss": 0.816, "step": 44015 }, { "epoch": 0.536482517397292, "grad_norm": 1.972279667854309, "learning_rate": 2.439640795381655e-06, "loss": 0.8767, "step": 44020 }, { "epoch": 0.5365434536214398, "grad_norm": 2.2911717891693115, "learning_rate": 2.4393200769724185e-06, "loss": 0.8702, "step": 44025 }, { "epoch": 0.5366043898455876, "grad_norm": 1.9534783363342285, "learning_rate": 2.4389993585631815e-06, "loss": 0.8402, "step": 44030 }, { "epoch": 0.5366653260697354, "grad_norm": 1.6828052997589111, "learning_rate": 2.438678640153945e-06, "loss": 0.8216, "step": 44035 }, { "epoch": 0.5367262622938832, "grad_norm": 1.9769153594970703, "learning_rate": 2.4383579217447084e-06, "loss": 0.7842, "step": 44040 }, { "epoch": 0.536787198518031, "grad_norm": 2.007617235183716, "learning_rate": 2.4380372033354714e-06, "loss": 0.7956, "step": 44045 }, { "epoch": 0.5368481347421789, "grad_norm": 1.7748159170150757, "learning_rate": 2.437716484926235e-06, "loss": 0.7897, "step": 44050 }, { "epoch": 0.5369090709663267, "grad_norm": 2.3308944702148438, "learning_rate": 2.4373957665169983e-06, "loss": 0.8334, "step": 44055 }, { "epoch": 0.5369700071904745, "grad_norm": 1.720077633857727, "learning_rate": 2.4370750481077617e-06, "loss": 0.833, "step": 44060 }, { "epoch": 0.5370309434146222, "grad_norm": 1.876116394996643, "learning_rate": 2.436754329698525e-06, "loss": 0.8627, "step": 44065 }, { "epoch": 0.53709187963877, "grad_norm": 1.7600936889648438, "learning_rate": 2.436433611289288e-06, "loss": 0.8007, "step": 44070 }, { "epoch": 0.5371528158629179, "grad_norm": 1.8402750492095947, "learning_rate": 2.4361128928800516e-06, "loss": 0.8274, "step": 44075 }, { "epoch": 0.5372137520870657, "grad_norm": 2.12544584274292, "learning_rate": 2.435792174470815e-06, "loss": 0.7868, "step": 44080 }, { "epoch": 0.5372746883112135, "grad_norm": 2.186856508255005, "learning_rate": 2.435471456061578e-06, "loss": 0.8605, "step": 44085 }, { "epoch": 0.5373356245353613, "grad_norm": 1.9254106283187866, "learning_rate": 2.4351507376523415e-06, "loss": 0.8598, "step": 44090 }, { "epoch": 0.5373965607595091, "grad_norm": 1.8613265752792358, "learning_rate": 2.434830019243105e-06, "loss": 0.7541, "step": 44095 }, { "epoch": 0.5374574969836569, "grad_norm": 1.9451582431793213, "learning_rate": 2.434509300833868e-06, "loss": 0.8492, "step": 44100 }, { "epoch": 0.5375184332078047, "grad_norm": 2.1276190280914307, "learning_rate": 2.4341885824246314e-06, "loss": 0.8457, "step": 44105 }, { "epoch": 0.5375793694319525, "grad_norm": 2.0897903442382812, "learning_rate": 2.433867864015395e-06, "loss": 0.8249, "step": 44110 }, { "epoch": 0.5376403056561003, "grad_norm": 1.817711591720581, "learning_rate": 2.433547145606158e-06, "loss": 0.8176, "step": 44115 }, { "epoch": 0.5377012418802481, "grad_norm": 1.7214940786361694, "learning_rate": 2.4332264271969213e-06, "loss": 0.8013, "step": 44120 }, { "epoch": 0.537762178104396, "grad_norm": 1.9017671346664429, "learning_rate": 2.4329057087876843e-06, "loss": 0.8198, "step": 44125 }, { "epoch": 0.5378231143285438, "grad_norm": 1.74502432346344, "learning_rate": 2.4325849903784477e-06, "loss": 0.7911, "step": 44130 }, { "epoch": 0.5378840505526915, "grad_norm": 1.849159836769104, "learning_rate": 2.432264271969211e-06, "loss": 0.8174, "step": 44135 }, { "epoch": 0.5379449867768393, "grad_norm": 1.8672157526016235, "learning_rate": 2.4319435535599746e-06, "loss": 0.7896, "step": 44140 }, { "epoch": 0.5380059230009872, "grad_norm": 3.2319366931915283, "learning_rate": 2.431622835150738e-06, "loss": 0.9108, "step": 44145 }, { "epoch": 0.538066859225135, "grad_norm": 1.8728169202804565, "learning_rate": 2.4313021167415015e-06, "loss": 0.8256, "step": 44150 }, { "epoch": 0.5381277954492828, "grad_norm": 2.127161979675293, "learning_rate": 2.4309813983322645e-06, "loss": 0.7911, "step": 44155 }, { "epoch": 0.5381887316734306, "grad_norm": 1.7501025199890137, "learning_rate": 2.430660679923028e-06, "loss": 0.8175, "step": 44160 }, { "epoch": 0.5382496678975784, "grad_norm": 1.8210853338241577, "learning_rate": 2.430339961513791e-06, "loss": 0.8295, "step": 44165 }, { "epoch": 0.5383106041217262, "grad_norm": 2.2400593757629395, "learning_rate": 2.4300192431045544e-06, "loss": 0.8112, "step": 44170 }, { "epoch": 0.538371540345874, "grad_norm": 2.245389461517334, "learning_rate": 2.429698524695318e-06, "loss": 0.8506, "step": 44175 }, { "epoch": 0.5384324765700218, "grad_norm": 1.8382655382156372, "learning_rate": 2.429377806286081e-06, "loss": 0.8222, "step": 44180 }, { "epoch": 0.5384934127941696, "grad_norm": 1.9860411882400513, "learning_rate": 2.4290570878768443e-06, "loss": 0.892, "step": 44185 }, { "epoch": 0.5385543490183174, "grad_norm": 1.9764366149902344, "learning_rate": 2.4287363694676078e-06, "loss": 0.8085, "step": 44190 }, { "epoch": 0.5386152852424653, "grad_norm": 2.2023818492889404, "learning_rate": 2.4284156510583708e-06, "loss": 0.8206, "step": 44195 }, { "epoch": 0.5386762214666131, "grad_norm": 1.7836878299713135, "learning_rate": 2.428094932649134e-06, "loss": 0.8144, "step": 44200 }, { "epoch": 0.5387371576907608, "grad_norm": 1.9177565574645996, "learning_rate": 2.4277742142398977e-06, "loss": 0.8365, "step": 44205 }, { "epoch": 0.5387980939149086, "grad_norm": 1.910717248916626, "learning_rate": 2.4274534958306607e-06, "loss": 0.8491, "step": 44210 }, { "epoch": 0.5388590301390564, "grad_norm": 2.035201072692871, "learning_rate": 2.427132777421424e-06, "loss": 0.7836, "step": 44215 }, { "epoch": 0.5389199663632043, "grad_norm": 2.010037422180176, "learning_rate": 2.4268120590121875e-06, "loss": 0.8658, "step": 44220 }, { "epoch": 0.5389809025873521, "grad_norm": 2.033686876296997, "learning_rate": 2.426491340602951e-06, "loss": 0.8075, "step": 44225 }, { "epoch": 0.5390418388114999, "grad_norm": 1.7190078496932983, "learning_rate": 2.4261706221937144e-06, "loss": 0.7428, "step": 44230 }, { "epoch": 0.5391027750356477, "grad_norm": 2.3933820724487305, "learning_rate": 2.4258499037844774e-06, "loss": 0.8581, "step": 44235 }, { "epoch": 0.5391637112597955, "grad_norm": 1.7495354413986206, "learning_rate": 2.425529185375241e-06, "loss": 0.8003, "step": 44240 }, { "epoch": 0.5392246474839433, "grad_norm": 2.0669450759887695, "learning_rate": 2.425208466966004e-06, "loss": 0.7796, "step": 44245 }, { "epoch": 0.5392855837080911, "grad_norm": 1.9434494972229004, "learning_rate": 2.4248877485567673e-06, "loss": 0.8449, "step": 44250 }, { "epoch": 0.5393465199322389, "grad_norm": 1.917825698852539, "learning_rate": 2.4245670301475308e-06, "loss": 0.8626, "step": 44255 }, { "epoch": 0.5394074561563867, "grad_norm": 2.565852165222168, "learning_rate": 2.424246311738294e-06, "loss": 0.9203, "step": 44260 }, { "epoch": 0.5394683923805346, "grad_norm": 1.9485565423965454, "learning_rate": 2.4239255933290572e-06, "loss": 0.8245, "step": 44265 }, { "epoch": 0.5395293286046824, "grad_norm": 2.1245667934417725, "learning_rate": 2.4236048749198207e-06, "loss": 0.7994, "step": 44270 }, { "epoch": 0.5395902648288301, "grad_norm": 1.9420766830444336, "learning_rate": 2.4232841565105837e-06, "loss": 0.9105, "step": 44275 }, { "epoch": 0.5396512010529779, "grad_norm": 1.8331377506256104, "learning_rate": 2.422963438101347e-06, "loss": 0.7823, "step": 44280 }, { "epoch": 0.5397121372771257, "grad_norm": 1.8426587581634521, "learning_rate": 2.4226427196921106e-06, "loss": 0.7849, "step": 44285 }, { "epoch": 0.5397730735012736, "grad_norm": 2.0408029556274414, "learning_rate": 2.422322001282874e-06, "loss": 0.8072, "step": 44290 }, { "epoch": 0.5398340097254214, "grad_norm": 1.6474945545196533, "learning_rate": 2.422001282873637e-06, "loss": 0.805, "step": 44295 }, { "epoch": 0.5398949459495692, "grad_norm": 1.7738926410675049, "learning_rate": 2.4216805644644005e-06, "loss": 0.8425, "step": 44300 }, { "epoch": 0.539955882173717, "grad_norm": 2.5538887977600098, "learning_rate": 2.421359846055164e-06, "loss": 0.8196, "step": 44305 }, { "epoch": 0.5400168183978648, "grad_norm": 1.7409265041351318, "learning_rate": 2.4210391276459274e-06, "loss": 0.8129, "step": 44310 }, { "epoch": 0.5400777546220126, "grad_norm": 2.444383144378662, "learning_rate": 2.4207184092366904e-06, "loss": 0.8499, "step": 44315 }, { "epoch": 0.5401386908461604, "grad_norm": 1.7636847496032715, "learning_rate": 2.420397690827454e-06, "loss": 0.852, "step": 44320 }, { "epoch": 0.5401996270703082, "grad_norm": 1.808373212814331, "learning_rate": 2.420076972418217e-06, "loss": 0.8181, "step": 44325 }, { "epoch": 0.540260563294456, "grad_norm": 2.1188769340515137, "learning_rate": 2.4197562540089803e-06, "loss": 0.807, "step": 44330 }, { "epoch": 0.5403214995186039, "grad_norm": 1.9004303216934204, "learning_rate": 2.4194355355997437e-06, "loss": 0.8351, "step": 44335 }, { "epoch": 0.5403824357427517, "grad_norm": 2.164530038833618, "learning_rate": 2.4191148171905067e-06, "loss": 0.8217, "step": 44340 }, { "epoch": 0.5404433719668994, "grad_norm": 1.918946623802185, "learning_rate": 2.41879409878127e-06, "loss": 0.8651, "step": 44345 }, { "epoch": 0.5405043081910472, "grad_norm": 2.046358823776245, "learning_rate": 2.4184733803720336e-06, "loss": 0.8766, "step": 44350 }, { "epoch": 0.540565244415195, "grad_norm": 1.969779372215271, "learning_rate": 2.4181526619627966e-06, "loss": 0.851, "step": 44355 }, { "epoch": 0.5406261806393429, "grad_norm": 2.0053608417510986, "learning_rate": 2.41783194355356e-06, "loss": 0.8638, "step": 44360 }, { "epoch": 0.5406871168634907, "grad_norm": 2.173549175262451, "learning_rate": 2.4175112251443235e-06, "loss": 0.8744, "step": 44365 }, { "epoch": 0.5407480530876385, "grad_norm": 1.7408450841903687, "learning_rate": 2.417190506735087e-06, "loss": 0.8443, "step": 44370 }, { "epoch": 0.5408089893117863, "grad_norm": 1.824690580368042, "learning_rate": 2.4168697883258504e-06, "loss": 0.8078, "step": 44375 }, { "epoch": 0.540869925535934, "grad_norm": 1.9143046140670776, "learning_rate": 2.4165490699166134e-06, "loss": 0.8226, "step": 44380 }, { "epoch": 0.5409308617600819, "grad_norm": 2.0401954650878906, "learning_rate": 2.416228351507377e-06, "loss": 0.8859, "step": 44385 }, { "epoch": 0.5409917979842297, "grad_norm": 1.8543379306793213, "learning_rate": 2.4159076330981403e-06, "loss": 0.8027, "step": 44390 }, { "epoch": 0.5410527342083775, "grad_norm": 2.094975471496582, "learning_rate": 2.4155869146889033e-06, "loss": 0.8481, "step": 44395 }, { "epoch": 0.5411136704325253, "grad_norm": 2.3049988746643066, "learning_rate": 2.4152661962796667e-06, "loss": 0.8692, "step": 44400 }, { "epoch": 0.5411746066566732, "grad_norm": 1.9822582006454468, "learning_rate": 2.4149454778704298e-06, "loss": 0.8523, "step": 44405 }, { "epoch": 0.541235542880821, "grad_norm": 2.244333028793335, "learning_rate": 2.414624759461193e-06, "loss": 0.7952, "step": 44410 }, { "epoch": 0.5412964791049687, "grad_norm": 1.98778235912323, "learning_rate": 2.4143040410519566e-06, "loss": 0.877, "step": 44415 }, { "epoch": 0.5413574153291165, "grad_norm": 1.7903162240982056, "learning_rate": 2.4139833226427197e-06, "loss": 0.8196, "step": 44420 }, { "epoch": 0.5414183515532643, "grad_norm": 1.8705512285232544, "learning_rate": 2.413662604233483e-06, "loss": 0.8212, "step": 44425 }, { "epoch": 0.5414792877774122, "grad_norm": 1.9715979099273682, "learning_rate": 2.4133418858242465e-06, "loss": 0.7964, "step": 44430 }, { "epoch": 0.54154022400156, "grad_norm": 1.9881386756896973, "learning_rate": 2.4130211674150096e-06, "loss": 0.8151, "step": 44435 }, { "epoch": 0.5416011602257078, "grad_norm": 2.311579465866089, "learning_rate": 2.412700449005773e-06, "loss": 0.8951, "step": 44440 }, { "epoch": 0.5416620964498556, "grad_norm": 2.070671558380127, "learning_rate": 2.4123797305965364e-06, "loss": 0.8132, "step": 44445 }, { "epoch": 0.5417230326740033, "grad_norm": 2.5433475971221924, "learning_rate": 2.4120590121873e-06, "loss": 0.8491, "step": 44450 }, { "epoch": 0.5417839688981512, "grad_norm": 1.850093960762024, "learning_rate": 2.4117382937780633e-06, "loss": 0.7457, "step": 44455 }, { "epoch": 0.541844905122299, "grad_norm": 1.9219623804092407, "learning_rate": 2.4114175753688263e-06, "loss": 0.7783, "step": 44460 }, { "epoch": 0.5419058413464468, "grad_norm": 2.056739330291748, "learning_rate": 2.4110968569595898e-06, "loss": 0.8318, "step": 44465 }, { "epoch": 0.5419667775705946, "grad_norm": 2.1200358867645264, "learning_rate": 2.410776138550353e-06, "loss": 0.7721, "step": 44470 }, { "epoch": 0.5420277137947425, "grad_norm": 1.8625431060791016, "learning_rate": 2.4104554201411162e-06, "loss": 0.8578, "step": 44475 }, { "epoch": 0.5420886500188903, "grad_norm": 2.186014413833618, "learning_rate": 2.4101347017318797e-06, "loss": 0.9211, "step": 44480 }, { "epoch": 0.542149586243038, "grad_norm": 1.8655520677566528, "learning_rate": 2.409813983322643e-06, "loss": 0.8396, "step": 44485 }, { "epoch": 0.5422105224671858, "grad_norm": 2.1891350746154785, "learning_rate": 2.409493264913406e-06, "loss": 0.7874, "step": 44490 }, { "epoch": 0.5422714586913336, "grad_norm": 1.7682191133499146, "learning_rate": 2.4091725465041696e-06, "loss": 0.8457, "step": 44495 }, { "epoch": 0.5423323949154815, "grad_norm": 1.739391565322876, "learning_rate": 2.4088518280949326e-06, "loss": 0.8559, "step": 44500 }, { "epoch": 0.5423933311396293, "grad_norm": 2.113161325454712, "learning_rate": 2.408531109685696e-06, "loss": 0.8435, "step": 44505 }, { "epoch": 0.5424542673637771, "grad_norm": 1.5351519584655762, "learning_rate": 2.4082103912764595e-06, "loss": 0.8708, "step": 44510 }, { "epoch": 0.5425152035879249, "grad_norm": 1.8689743280410767, "learning_rate": 2.407889672867223e-06, "loss": 0.8058, "step": 44515 }, { "epoch": 0.5425761398120726, "grad_norm": 2.0857691764831543, "learning_rate": 2.407568954457986e-06, "loss": 0.7861, "step": 44520 }, { "epoch": 0.5426370760362205, "grad_norm": 1.891930341720581, "learning_rate": 2.4072482360487494e-06, "loss": 0.9294, "step": 44525 }, { "epoch": 0.5426980122603683, "grad_norm": 1.9877902269363403, "learning_rate": 2.406927517639513e-06, "loss": 0.8554, "step": 44530 }, { "epoch": 0.5427589484845161, "grad_norm": 2.003490686416626, "learning_rate": 2.4066067992302762e-06, "loss": 0.8097, "step": 44535 }, { "epoch": 0.5428198847086639, "grad_norm": 2.189455032348633, "learning_rate": 2.4062860808210393e-06, "loss": 0.8277, "step": 44540 }, { "epoch": 0.5428808209328118, "grad_norm": 2.1030237674713135, "learning_rate": 2.4059653624118027e-06, "loss": 0.8563, "step": 44545 }, { "epoch": 0.5429417571569596, "grad_norm": 1.8354451656341553, "learning_rate": 2.405644644002566e-06, "loss": 0.7761, "step": 44550 }, { "epoch": 0.5430026933811073, "grad_norm": 2.185328245162964, "learning_rate": 2.405323925593329e-06, "loss": 0.8537, "step": 44555 }, { "epoch": 0.5430636296052551, "grad_norm": 1.8177261352539062, "learning_rate": 2.4050032071840926e-06, "loss": 0.8247, "step": 44560 }, { "epoch": 0.5431245658294029, "grad_norm": 2.1720187664031982, "learning_rate": 2.404682488774856e-06, "loss": 0.8395, "step": 44565 }, { "epoch": 0.5431855020535508, "grad_norm": 2.2998099327087402, "learning_rate": 2.404361770365619e-06, "loss": 0.7774, "step": 44570 }, { "epoch": 0.5432464382776986, "grad_norm": 2.3158395290374756, "learning_rate": 2.4040410519563825e-06, "loss": 0.8536, "step": 44575 }, { "epoch": 0.5433073745018464, "grad_norm": 2.46889591217041, "learning_rate": 2.4037203335471455e-06, "loss": 0.8729, "step": 44580 }, { "epoch": 0.5433683107259941, "grad_norm": 2.3908755779266357, "learning_rate": 2.403399615137909e-06, "loss": 0.7417, "step": 44585 }, { "epoch": 0.5434292469501419, "grad_norm": 1.6731858253479004, "learning_rate": 2.4030788967286724e-06, "loss": 0.7134, "step": 44590 }, { "epoch": 0.5434901831742898, "grad_norm": 2.236943483352661, "learning_rate": 2.402758178319436e-06, "loss": 0.8762, "step": 44595 }, { "epoch": 0.5435511193984376, "grad_norm": 1.6514571905136108, "learning_rate": 2.402437459910199e-06, "loss": 0.8967, "step": 44600 }, { "epoch": 0.5436120556225854, "grad_norm": 2.2514026165008545, "learning_rate": 2.4021167415009623e-06, "loss": 0.8715, "step": 44605 }, { "epoch": 0.5436729918467332, "grad_norm": 1.6915189027786255, "learning_rate": 2.4017960230917257e-06, "loss": 0.794, "step": 44610 }, { "epoch": 0.5437339280708811, "grad_norm": 2.003457546234131, "learning_rate": 2.401475304682489e-06, "loss": 0.8439, "step": 44615 }, { "epoch": 0.5437948642950288, "grad_norm": 2.0206844806671143, "learning_rate": 2.401154586273252e-06, "loss": 0.7908, "step": 44620 }, { "epoch": 0.5438558005191766, "grad_norm": 1.789897084236145, "learning_rate": 2.4008338678640156e-06, "loss": 0.8557, "step": 44625 }, { "epoch": 0.5439167367433244, "grad_norm": 3.121842384338379, "learning_rate": 2.400513149454779e-06, "loss": 0.8202, "step": 44630 }, { "epoch": 0.5439776729674722, "grad_norm": 1.9232752323150635, "learning_rate": 2.400192431045542e-06, "loss": 0.8568, "step": 44635 }, { "epoch": 0.5440386091916201, "grad_norm": 2.0438854694366455, "learning_rate": 2.3998717126363055e-06, "loss": 0.7746, "step": 44640 }, { "epoch": 0.5440995454157679, "grad_norm": 1.7952687740325928, "learning_rate": 2.399550994227069e-06, "loss": 0.7727, "step": 44645 }, { "epoch": 0.5441604816399157, "grad_norm": 2.252828359603882, "learning_rate": 2.399230275817832e-06, "loss": 0.8289, "step": 44650 }, { "epoch": 0.5442214178640634, "grad_norm": 2.0402119159698486, "learning_rate": 2.3989095574085954e-06, "loss": 0.7896, "step": 44655 }, { "epoch": 0.5442823540882112, "grad_norm": 1.865458607673645, "learning_rate": 2.3985888389993584e-06, "loss": 0.8333, "step": 44660 }, { "epoch": 0.5443432903123591, "grad_norm": 2.206587553024292, "learning_rate": 2.398268120590122e-06, "loss": 0.8496, "step": 44665 }, { "epoch": 0.5444042265365069, "grad_norm": 2.696171998977661, "learning_rate": 2.3979474021808853e-06, "loss": 0.8979, "step": 44670 }, { "epoch": 0.5444651627606547, "grad_norm": 1.901319980621338, "learning_rate": 2.3976266837716488e-06, "loss": 0.8083, "step": 44675 }, { "epoch": 0.5445260989848025, "grad_norm": 2.3537745475769043, "learning_rate": 2.397305965362412e-06, "loss": 0.8293, "step": 44680 }, { "epoch": 0.5445870352089504, "grad_norm": 2.21470046043396, "learning_rate": 2.3969852469531752e-06, "loss": 0.8066, "step": 44685 }, { "epoch": 0.5446479714330981, "grad_norm": 1.9286811351776123, "learning_rate": 2.3966645285439387e-06, "loss": 0.8643, "step": 44690 }, { "epoch": 0.5447089076572459, "grad_norm": 2.188324213027954, "learning_rate": 2.396343810134702e-06, "loss": 0.7864, "step": 44695 }, { "epoch": 0.5447698438813937, "grad_norm": 2.038377523422241, "learning_rate": 2.396023091725465e-06, "loss": 0.8482, "step": 44700 }, { "epoch": 0.5448307801055415, "grad_norm": 2.1148812770843506, "learning_rate": 2.3957023733162286e-06, "loss": 0.7881, "step": 44705 }, { "epoch": 0.5448917163296894, "grad_norm": 1.8857858180999756, "learning_rate": 2.395381654906992e-06, "loss": 0.883, "step": 44710 }, { "epoch": 0.5449526525538372, "grad_norm": 1.8752095699310303, "learning_rate": 2.395060936497755e-06, "loss": 0.8094, "step": 44715 }, { "epoch": 0.545013588777985, "grad_norm": 2.1306369304656982, "learning_rate": 2.3947402180885184e-06, "loss": 0.8743, "step": 44720 }, { "epoch": 0.5450745250021327, "grad_norm": 1.8536770343780518, "learning_rate": 2.394419499679282e-06, "loss": 0.8453, "step": 44725 }, { "epoch": 0.5451354612262805, "grad_norm": 2.313767671585083, "learning_rate": 2.394098781270045e-06, "loss": 0.8565, "step": 44730 }, { "epoch": 0.5451963974504284, "grad_norm": 2.1150457859039307, "learning_rate": 2.3937780628608083e-06, "loss": 0.8024, "step": 44735 }, { "epoch": 0.5452573336745762, "grad_norm": 1.9014718532562256, "learning_rate": 2.3934573444515718e-06, "loss": 0.7974, "step": 44740 }, { "epoch": 0.545318269898724, "grad_norm": 1.9811800718307495, "learning_rate": 2.393136626042335e-06, "loss": 0.8285, "step": 44745 }, { "epoch": 0.5453792061228718, "grad_norm": 2.012563467025757, "learning_rate": 2.3928159076330982e-06, "loss": 0.902, "step": 44750 }, { "epoch": 0.5454401423470197, "grad_norm": 1.6928484439849854, "learning_rate": 2.3924951892238617e-06, "loss": 0.7713, "step": 44755 }, { "epoch": 0.5455010785711674, "grad_norm": 2.1041572093963623, "learning_rate": 2.392174470814625e-06, "loss": 0.8493, "step": 44760 }, { "epoch": 0.5455620147953152, "grad_norm": 1.747228980064392, "learning_rate": 2.3918537524053886e-06, "loss": 0.8566, "step": 44765 }, { "epoch": 0.545622951019463, "grad_norm": 1.818568468093872, "learning_rate": 2.3915330339961516e-06, "loss": 0.8769, "step": 44770 }, { "epoch": 0.5456838872436108, "grad_norm": 2.0343236923217773, "learning_rate": 2.391212315586915e-06, "loss": 0.8496, "step": 44775 }, { "epoch": 0.5457448234677587, "grad_norm": 2.5306615829467773, "learning_rate": 2.3908915971776785e-06, "loss": 0.7644, "step": 44780 }, { "epoch": 0.5458057596919065, "grad_norm": 1.8605087995529175, "learning_rate": 2.3905708787684415e-06, "loss": 0.8119, "step": 44785 }, { "epoch": 0.5458666959160543, "grad_norm": 2.0497379302978516, "learning_rate": 2.390250160359205e-06, "loss": 0.8108, "step": 44790 }, { "epoch": 0.545927632140202, "grad_norm": 1.767964243888855, "learning_rate": 2.389929441949968e-06, "loss": 0.8002, "step": 44795 }, { "epoch": 0.5459885683643498, "grad_norm": 1.9501994848251343, "learning_rate": 2.3896087235407314e-06, "loss": 0.7935, "step": 44800 }, { "epoch": 0.5460495045884977, "grad_norm": 1.9635937213897705, "learning_rate": 2.389288005131495e-06, "loss": 0.8361, "step": 44805 }, { "epoch": 0.5461104408126455, "grad_norm": 2.187986135482788, "learning_rate": 2.388967286722258e-06, "loss": 0.7913, "step": 44810 }, { "epoch": 0.5461713770367933, "grad_norm": 2.115250825881958, "learning_rate": 2.3886465683130213e-06, "loss": 0.8188, "step": 44815 }, { "epoch": 0.5462323132609411, "grad_norm": 2.004318952560425, "learning_rate": 2.3883258499037847e-06, "loss": 0.8165, "step": 44820 }, { "epoch": 0.546293249485089, "grad_norm": 1.9077719449996948, "learning_rate": 2.3880051314945477e-06, "loss": 0.8371, "step": 44825 }, { "epoch": 0.5463541857092367, "grad_norm": 1.9765524864196777, "learning_rate": 2.387684413085311e-06, "loss": 0.8637, "step": 44830 }, { "epoch": 0.5464151219333845, "grad_norm": 2.062939405441284, "learning_rate": 2.3873636946760746e-06, "loss": 0.8499, "step": 44835 }, { "epoch": 0.5464760581575323, "grad_norm": 2.1437621116638184, "learning_rate": 2.387042976266838e-06, "loss": 0.7992, "step": 44840 }, { "epoch": 0.5465369943816801, "grad_norm": 2.324113607406616, "learning_rate": 2.3867222578576015e-06, "loss": 0.9415, "step": 44845 }, { "epoch": 0.546597930605828, "grad_norm": 2.2380218505859375, "learning_rate": 2.3864015394483645e-06, "loss": 0.891, "step": 44850 }, { "epoch": 0.5466588668299758, "grad_norm": 2.0938501358032227, "learning_rate": 2.386080821039128e-06, "loss": 0.9156, "step": 44855 }, { "epoch": 0.5467198030541236, "grad_norm": 1.8407620191574097, "learning_rate": 2.3857601026298914e-06, "loss": 0.7903, "step": 44860 }, { "epoch": 0.5467807392782713, "grad_norm": 1.9804415702819824, "learning_rate": 2.3854393842206544e-06, "loss": 0.8135, "step": 44865 }, { "epoch": 0.5468416755024191, "grad_norm": 2.077700138092041, "learning_rate": 2.385118665811418e-06, "loss": 0.8334, "step": 44870 }, { "epoch": 0.546902611726567, "grad_norm": 2.3125100135803223, "learning_rate": 2.384797947402181e-06, "loss": 0.8552, "step": 44875 }, { "epoch": 0.5469635479507148, "grad_norm": 2.2769808769226074, "learning_rate": 2.3844772289929443e-06, "loss": 0.9225, "step": 44880 }, { "epoch": 0.5470244841748626, "grad_norm": 2.469587802886963, "learning_rate": 2.3841565105837077e-06, "loss": 0.889, "step": 44885 }, { "epoch": 0.5470854203990104, "grad_norm": 1.6908661127090454, "learning_rate": 2.3838357921744708e-06, "loss": 0.8563, "step": 44890 }, { "epoch": 0.5471463566231582, "grad_norm": 2.117936372756958, "learning_rate": 2.383515073765234e-06, "loss": 0.8207, "step": 44895 }, { "epoch": 0.547207292847306, "grad_norm": 2.188603401184082, "learning_rate": 2.3831943553559976e-06, "loss": 0.7814, "step": 44900 }, { "epoch": 0.5472682290714538, "grad_norm": 2.043278455734253, "learning_rate": 2.382873636946761e-06, "loss": 0.8, "step": 44905 }, { "epoch": 0.5473291652956016, "grad_norm": 1.9306049346923828, "learning_rate": 2.382552918537524e-06, "loss": 0.8575, "step": 44910 }, { "epoch": 0.5473901015197494, "grad_norm": 2.55590558052063, "learning_rate": 2.3822322001282875e-06, "loss": 0.8642, "step": 44915 }, { "epoch": 0.5474510377438973, "grad_norm": 2.078172206878662, "learning_rate": 2.381911481719051e-06, "loss": 0.7834, "step": 44920 }, { "epoch": 0.5475119739680451, "grad_norm": 2.0882837772369385, "learning_rate": 2.3815907633098144e-06, "loss": 0.7939, "step": 44925 }, { "epoch": 0.5475729101921929, "grad_norm": 2.0913643836975098, "learning_rate": 2.3812700449005774e-06, "loss": 0.824, "step": 44930 }, { "epoch": 0.5476338464163406, "grad_norm": 2.577578067779541, "learning_rate": 2.380949326491341e-06, "loss": 0.798, "step": 44935 }, { "epoch": 0.5476947826404884, "grad_norm": 1.9097295999526978, "learning_rate": 2.3806286080821043e-06, "loss": 0.9106, "step": 44940 }, { "epoch": 0.5477557188646363, "grad_norm": 1.83187997341156, "learning_rate": 2.3803078896728673e-06, "loss": 0.8665, "step": 44945 }, { "epoch": 0.5478166550887841, "grad_norm": 1.7224810123443604, "learning_rate": 2.3799871712636308e-06, "loss": 0.7694, "step": 44950 }, { "epoch": 0.5478775913129319, "grad_norm": 1.7476097345352173, "learning_rate": 2.379666452854394e-06, "loss": 0.8046, "step": 44955 }, { "epoch": 0.5479385275370797, "grad_norm": 1.809086799621582, "learning_rate": 2.3793457344451572e-06, "loss": 0.7572, "step": 44960 }, { "epoch": 0.5479994637612275, "grad_norm": 1.7721498012542725, "learning_rate": 2.3790250160359207e-06, "loss": 0.8252, "step": 44965 }, { "epoch": 0.5480603999853753, "grad_norm": 1.902985692024231, "learning_rate": 2.3787042976266837e-06, "loss": 0.8346, "step": 44970 }, { "epoch": 0.5481213362095231, "grad_norm": 2.679889678955078, "learning_rate": 2.378383579217447e-06, "loss": 0.8118, "step": 44975 }, { "epoch": 0.5481822724336709, "grad_norm": 2.2465620040893555, "learning_rate": 2.3780628608082106e-06, "loss": 0.8166, "step": 44980 }, { "epoch": 0.5482432086578187, "grad_norm": 2.0161678791046143, "learning_rate": 2.377742142398974e-06, "loss": 0.8372, "step": 44985 }, { "epoch": 0.5483041448819665, "grad_norm": 1.777413010597229, "learning_rate": 2.3774214239897374e-06, "loss": 0.8229, "step": 44990 }, { "epoch": 0.5483650811061144, "grad_norm": 1.824742078781128, "learning_rate": 2.3771007055805005e-06, "loss": 0.8172, "step": 44995 }, { "epoch": 0.5484260173302622, "grad_norm": 2.442366123199463, "learning_rate": 2.376779987171264e-06, "loss": 0.8822, "step": 45000 }, { "epoch": 0.5484869535544099, "grad_norm": 2.115379571914673, "learning_rate": 2.3764592687620273e-06, "loss": 0.8608, "step": 45005 }, { "epoch": 0.5485478897785577, "grad_norm": 2.244786262512207, "learning_rate": 2.3761385503527904e-06, "loss": 0.7942, "step": 45010 }, { "epoch": 0.5486088260027056, "grad_norm": 2.071171998977661, "learning_rate": 2.375817831943554e-06, "loss": 0.8464, "step": 45015 }, { "epoch": 0.5486697622268534, "grad_norm": 2.144298791885376, "learning_rate": 2.3754971135343172e-06, "loss": 0.863, "step": 45020 }, { "epoch": 0.5487306984510012, "grad_norm": 2.003093719482422, "learning_rate": 2.3751763951250803e-06, "loss": 0.8719, "step": 45025 }, { "epoch": 0.548791634675149, "grad_norm": 1.8760226964950562, "learning_rate": 2.3748556767158437e-06, "loss": 0.798, "step": 45030 }, { "epoch": 0.5488525708992968, "grad_norm": 2.006289482116699, "learning_rate": 2.374534958306607e-06, "loss": 0.8914, "step": 45035 }, { "epoch": 0.5489135071234446, "grad_norm": 2.198354721069336, "learning_rate": 2.37421423989737e-06, "loss": 0.7651, "step": 45040 }, { "epoch": 0.5489744433475924, "grad_norm": 1.9200564622879028, "learning_rate": 2.3738935214881336e-06, "loss": 0.773, "step": 45045 }, { "epoch": 0.5490353795717402, "grad_norm": 2.2400777339935303, "learning_rate": 2.3735728030788966e-06, "loss": 0.7785, "step": 45050 }, { "epoch": 0.549096315795888, "grad_norm": 1.9646015167236328, "learning_rate": 2.37325208466966e-06, "loss": 0.8847, "step": 45055 }, { "epoch": 0.5491572520200358, "grad_norm": 1.5650101900100708, "learning_rate": 2.3729313662604235e-06, "loss": 0.841, "step": 45060 }, { "epoch": 0.5492181882441837, "grad_norm": 1.963114857673645, "learning_rate": 2.372610647851187e-06, "loss": 0.7517, "step": 45065 }, { "epoch": 0.5492791244683315, "grad_norm": 1.9488136768341064, "learning_rate": 2.3722899294419504e-06, "loss": 0.8455, "step": 45070 }, { "epoch": 0.5493400606924792, "grad_norm": 2.060657262802124, "learning_rate": 2.3719692110327134e-06, "loss": 0.74, "step": 45075 }, { "epoch": 0.549400996916627, "grad_norm": 1.9292110204696655, "learning_rate": 2.371648492623477e-06, "loss": 0.854, "step": 45080 }, { "epoch": 0.5494619331407748, "grad_norm": 2.004021406173706, "learning_rate": 2.3713277742142403e-06, "loss": 0.8202, "step": 45085 }, { "epoch": 0.5495228693649227, "grad_norm": 2.126999616622925, "learning_rate": 2.3710070558050033e-06, "loss": 0.8591, "step": 45090 }, { "epoch": 0.5495838055890705, "grad_norm": 2.025890827178955, "learning_rate": 2.3706863373957667e-06, "loss": 0.8505, "step": 45095 }, { "epoch": 0.5496447418132183, "grad_norm": 2.0073206424713135, "learning_rate": 2.37036561898653e-06, "loss": 0.8133, "step": 45100 }, { "epoch": 0.5497056780373661, "grad_norm": 2.185197114944458, "learning_rate": 2.370044900577293e-06, "loss": 0.8938, "step": 45105 }, { "epoch": 0.5497666142615139, "grad_norm": 2.1908020973205566, "learning_rate": 2.3697241821680566e-06, "loss": 0.786, "step": 45110 }, { "epoch": 0.5498275504856617, "grad_norm": 1.9731578826904297, "learning_rate": 2.36940346375882e-06, "loss": 0.8501, "step": 45115 }, { "epoch": 0.5498884867098095, "grad_norm": 2.168766975402832, "learning_rate": 2.369082745349583e-06, "loss": 0.7671, "step": 45120 }, { "epoch": 0.5499494229339573, "grad_norm": 1.9155356884002686, "learning_rate": 2.3687620269403465e-06, "loss": 0.8277, "step": 45125 }, { "epoch": 0.5500103591581051, "grad_norm": 1.8346432447433472, "learning_rate": 2.36844130853111e-06, "loss": 0.8239, "step": 45130 }, { "epoch": 0.550071295382253, "grad_norm": 2.02860951423645, "learning_rate": 2.368120590121873e-06, "loss": 0.8262, "step": 45135 }, { "epoch": 0.5501322316064008, "grad_norm": 1.9640998840332031, "learning_rate": 2.3677998717126364e-06, "loss": 0.7944, "step": 45140 }, { "epoch": 0.5501931678305485, "grad_norm": 2.148817777633667, "learning_rate": 2.3674791533034e-06, "loss": 0.842, "step": 45145 }, { "epoch": 0.5502541040546963, "grad_norm": 2.0748021602630615, "learning_rate": 2.3671584348941633e-06, "loss": 0.8308, "step": 45150 }, { "epoch": 0.5503150402788441, "grad_norm": 1.6171183586120605, "learning_rate": 2.3668377164849267e-06, "loss": 0.7568, "step": 45155 }, { "epoch": 0.550375976502992, "grad_norm": 1.9205647706985474, "learning_rate": 2.3665169980756898e-06, "loss": 0.8113, "step": 45160 }, { "epoch": 0.5504369127271398, "grad_norm": 1.819831132888794, "learning_rate": 2.366196279666453e-06, "loss": 0.866, "step": 45165 }, { "epoch": 0.5504978489512876, "grad_norm": 1.9471062421798706, "learning_rate": 2.3658755612572162e-06, "loss": 0.786, "step": 45170 }, { "epoch": 0.5505587851754354, "grad_norm": 2.1332452297210693, "learning_rate": 2.3655548428479797e-06, "loss": 0.7652, "step": 45175 }, { "epoch": 0.5506197213995832, "grad_norm": 2.089087724685669, "learning_rate": 2.365234124438743e-06, "loss": 0.8737, "step": 45180 }, { "epoch": 0.550680657623731, "grad_norm": 1.7820261716842651, "learning_rate": 2.364913406029506e-06, "loss": 0.7277, "step": 45185 }, { "epoch": 0.5507415938478788, "grad_norm": 1.820586085319519, "learning_rate": 2.3645926876202696e-06, "loss": 0.8368, "step": 45190 }, { "epoch": 0.5508025300720266, "grad_norm": 1.997816801071167, "learning_rate": 2.364271969211033e-06, "loss": 0.8295, "step": 45195 }, { "epoch": 0.5508634662961744, "grad_norm": 1.7415306568145752, "learning_rate": 2.363951250801796e-06, "loss": 0.9178, "step": 45200 }, { "epoch": 0.5509244025203223, "grad_norm": 2.1395375728607178, "learning_rate": 2.3636305323925595e-06, "loss": 0.8403, "step": 45205 }, { "epoch": 0.5509853387444701, "grad_norm": 1.9337642192840576, "learning_rate": 2.363309813983323e-06, "loss": 0.8619, "step": 45210 }, { "epoch": 0.5510462749686178, "grad_norm": 1.9165089130401611, "learning_rate": 2.3629890955740863e-06, "loss": 0.8172, "step": 45215 }, { "epoch": 0.5511072111927656, "grad_norm": 1.8942867517471313, "learning_rate": 2.3626683771648493e-06, "loss": 0.8604, "step": 45220 }, { "epoch": 0.5511681474169134, "grad_norm": 2.165660858154297, "learning_rate": 2.362347658755613e-06, "loss": 0.8648, "step": 45225 }, { "epoch": 0.5512290836410613, "grad_norm": 1.8928642272949219, "learning_rate": 2.3620269403463762e-06, "loss": 0.8343, "step": 45230 }, { "epoch": 0.5512900198652091, "grad_norm": 1.7832984924316406, "learning_rate": 2.3617062219371397e-06, "loss": 0.8698, "step": 45235 }, { "epoch": 0.5513509560893569, "grad_norm": 1.8080023527145386, "learning_rate": 2.3613855035279027e-06, "loss": 0.8937, "step": 45240 }, { "epoch": 0.5514118923135047, "grad_norm": 2.0883214473724365, "learning_rate": 2.361064785118666e-06, "loss": 0.8233, "step": 45245 }, { "epoch": 0.5514728285376524, "grad_norm": 2.0057475566864014, "learning_rate": 2.360744066709429e-06, "loss": 0.7797, "step": 45250 }, { "epoch": 0.5515337647618003, "grad_norm": 1.912360429763794, "learning_rate": 2.3604233483001926e-06, "loss": 0.8016, "step": 45255 }, { "epoch": 0.5515947009859481, "grad_norm": 1.937327265739441, "learning_rate": 2.360102629890956e-06, "loss": 0.8367, "step": 45260 }, { "epoch": 0.5516556372100959, "grad_norm": 1.8837482929229736, "learning_rate": 2.359781911481719e-06, "loss": 0.8329, "step": 45265 }, { "epoch": 0.5517165734342437, "grad_norm": 2.0138487815856934, "learning_rate": 2.3594611930724825e-06, "loss": 0.8638, "step": 45270 }, { "epoch": 0.5517775096583916, "grad_norm": 2.0771074295043945, "learning_rate": 2.359140474663246e-06, "loss": 0.8187, "step": 45275 }, { "epoch": 0.5518384458825394, "grad_norm": 2.059281349182129, "learning_rate": 2.358819756254009e-06, "loss": 0.8452, "step": 45280 }, { "epoch": 0.5518993821066871, "grad_norm": 1.9214543104171753, "learning_rate": 2.3584990378447724e-06, "loss": 0.8694, "step": 45285 }, { "epoch": 0.5519603183308349, "grad_norm": 1.9627978801727295, "learning_rate": 2.358178319435536e-06, "loss": 0.8403, "step": 45290 }, { "epoch": 0.5520212545549827, "grad_norm": 1.9016369581222534, "learning_rate": 2.3578576010262993e-06, "loss": 0.8205, "step": 45295 }, { "epoch": 0.5520821907791306, "grad_norm": 2.142676830291748, "learning_rate": 2.3575368826170623e-06, "loss": 0.8559, "step": 45300 }, { "epoch": 0.5521431270032784, "grad_norm": 2.032181978225708, "learning_rate": 2.3572161642078257e-06, "loss": 0.8456, "step": 45305 }, { "epoch": 0.5522040632274262, "grad_norm": 1.7717938423156738, "learning_rate": 2.356895445798589e-06, "loss": 0.7947, "step": 45310 }, { "epoch": 0.552264999451574, "grad_norm": 1.9322688579559326, "learning_rate": 2.3565747273893526e-06, "loss": 0.8043, "step": 45315 }, { "epoch": 0.5523259356757217, "grad_norm": 1.660785436630249, "learning_rate": 2.3562540089801156e-06, "loss": 0.8347, "step": 45320 }, { "epoch": 0.5523868718998696, "grad_norm": 2.5727498531341553, "learning_rate": 2.355933290570879e-06, "loss": 0.913, "step": 45325 }, { "epoch": 0.5524478081240174, "grad_norm": 1.9988038539886475, "learning_rate": 2.3556125721616425e-06, "loss": 0.7825, "step": 45330 }, { "epoch": 0.5525087443481652, "grad_norm": 1.9035428762435913, "learning_rate": 2.3552918537524055e-06, "loss": 0.8205, "step": 45335 }, { "epoch": 0.552569680572313, "grad_norm": 2.098348379135132, "learning_rate": 2.354971135343169e-06, "loss": 0.853, "step": 45340 }, { "epoch": 0.5526306167964609, "grad_norm": 1.713773488998413, "learning_rate": 2.354650416933932e-06, "loss": 0.7343, "step": 45345 }, { "epoch": 0.5526915530206087, "grad_norm": 2.2294886112213135, "learning_rate": 2.3543296985246954e-06, "loss": 0.8328, "step": 45350 }, { "epoch": 0.5527524892447564, "grad_norm": 1.861138105392456, "learning_rate": 2.354008980115459e-06, "loss": 0.7338, "step": 45355 }, { "epoch": 0.5528134254689042, "grad_norm": 1.8978995084762573, "learning_rate": 2.353688261706222e-06, "loss": 0.8266, "step": 45360 }, { "epoch": 0.552874361693052, "grad_norm": 1.9976359605789185, "learning_rate": 2.3533675432969853e-06, "loss": 0.8007, "step": 45365 }, { "epoch": 0.5529352979171999, "grad_norm": 1.9572604894638062, "learning_rate": 2.3530468248877487e-06, "loss": 0.8974, "step": 45370 }, { "epoch": 0.5529962341413477, "grad_norm": 2.281951427459717, "learning_rate": 2.352726106478512e-06, "loss": 0.7697, "step": 45375 }, { "epoch": 0.5530571703654955, "grad_norm": 1.9850176572799683, "learning_rate": 2.3524053880692756e-06, "loss": 0.7893, "step": 45380 }, { "epoch": 0.5531181065896433, "grad_norm": 2.2052462100982666, "learning_rate": 2.3520846696600386e-06, "loss": 0.7723, "step": 45385 }, { "epoch": 0.553179042813791, "grad_norm": 1.9237812757492065, "learning_rate": 2.351763951250802e-06, "loss": 0.76, "step": 45390 }, { "epoch": 0.5532399790379389, "grad_norm": 2.1481809616088867, "learning_rate": 2.3514432328415655e-06, "loss": 0.8425, "step": 45395 }, { "epoch": 0.5533009152620867, "grad_norm": 1.9482978582382202, "learning_rate": 2.3511225144323285e-06, "loss": 0.847, "step": 45400 }, { "epoch": 0.5533618514862345, "grad_norm": 1.7403877973556519, "learning_rate": 2.350801796023092e-06, "loss": 0.8933, "step": 45405 }, { "epoch": 0.5534227877103823, "grad_norm": 1.9217745065689087, "learning_rate": 2.3504810776138554e-06, "loss": 0.8614, "step": 45410 }, { "epoch": 0.5534837239345302, "grad_norm": 2.71423077583313, "learning_rate": 2.3501603592046184e-06, "loss": 0.866, "step": 45415 }, { "epoch": 0.553544660158678, "grad_norm": 2.0659775733947754, "learning_rate": 2.349839640795382e-06, "loss": 0.8251, "step": 45420 }, { "epoch": 0.5536055963828257, "grad_norm": 1.7636300325393677, "learning_rate": 2.349518922386145e-06, "loss": 0.8327, "step": 45425 }, { "epoch": 0.5536665326069735, "grad_norm": 1.8702661991119385, "learning_rate": 2.3491982039769083e-06, "loss": 0.852, "step": 45430 }, { "epoch": 0.5537274688311213, "grad_norm": 2.184025764465332, "learning_rate": 2.3488774855676718e-06, "loss": 0.8876, "step": 45435 }, { "epoch": 0.5537884050552692, "grad_norm": 1.9927482604980469, "learning_rate": 2.348556767158435e-06, "loss": 0.7687, "step": 45440 }, { "epoch": 0.553849341279417, "grad_norm": 2.469670295715332, "learning_rate": 2.3482360487491982e-06, "loss": 0.8384, "step": 45445 }, { "epoch": 0.5539102775035648, "grad_norm": 1.7981723546981812, "learning_rate": 2.3479153303399617e-06, "loss": 0.8464, "step": 45450 }, { "epoch": 0.5539712137277126, "grad_norm": 2.0437490940093994, "learning_rate": 2.347594611930725e-06, "loss": 0.793, "step": 45455 }, { "epoch": 0.5540321499518603, "grad_norm": 2.064957857131958, "learning_rate": 2.3472738935214886e-06, "loss": 0.878, "step": 45460 }, { "epoch": 0.5540930861760082, "grad_norm": 1.6879456043243408, "learning_rate": 2.3469531751122516e-06, "loss": 0.8061, "step": 45465 }, { "epoch": 0.554154022400156, "grad_norm": 2.0718533992767334, "learning_rate": 2.346632456703015e-06, "loss": 0.7743, "step": 45470 }, { "epoch": 0.5542149586243038, "grad_norm": 1.655989646911621, "learning_rate": 2.3463117382937785e-06, "loss": 0.8168, "step": 45475 }, { "epoch": 0.5542758948484516, "grad_norm": 2.2748241424560547, "learning_rate": 2.3459910198845415e-06, "loss": 0.8696, "step": 45480 }, { "epoch": 0.5543368310725995, "grad_norm": 1.9784088134765625, "learning_rate": 2.345670301475305e-06, "loss": 0.8189, "step": 45485 }, { "epoch": 0.5543977672967473, "grad_norm": 2.0703537464141846, "learning_rate": 2.3453495830660683e-06, "loss": 0.8414, "step": 45490 }, { "epoch": 0.554458703520895, "grad_norm": 1.9936974048614502, "learning_rate": 2.3450288646568314e-06, "loss": 0.7848, "step": 45495 }, { "epoch": 0.5545196397450428, "grad_norm": 1.891466736793518, "learning_rate": 2.344708146247595e-06, "loss": 0.8317, "step": 45500 }, { "epoch": 0.5545805759691906, "grad_norm": 1.8109955787658691, "learning_rate": 2.344387427838358e-06, "loss": 0.8051, "step": 45505 }, { "epoch": 0.5546415121933385, "grad_norm": 1.8901103734970093, "learning_rate": 2.3440667094291213e-06, "loss": 0.8301, "step": 45510 }, { "epoch": 0.5547024484174863, "grad_norm": 2.133192777633667, "learning_rate": 2.3437459910198847e-06, "loss": 0.8027, "step": 45515 }, { "epoch": 0.5547633846416341, "grad_norm": 2.3075149059295654, "learning_rate": 2.343425272610648e-06, "loss": 0.8987, "step": 45520 }, { "epoch": 0.5548243208657819, "grad_norm": 2.211170196533203, "learning_rate": 2.343104554201411e-06, "loss": 0.8362, "step": 45525 }, { "epoch": 0.5548852570899296, "grad_norm": 2.0212838649749756, "learning_rate": 2.3427838357921746e-06, "loss": 0.8114, "step": 45530 }, { "epoch": 0.5549461933140775, "grad_norm": 1.9113285541534424, "learning_rate": 2.342463117382938e-06, "loss": 0.8275, "step": 45535 }, { "epoch": 0.5550071295382253, "grad_norm": 2.332505464553833, "learning_rate": 2.3421423989737015e-06, "loss": 0.8, "step": 45540 }, { "epoch": 0.5550680657623731, "grad_norm": 2.013991355895996, "learning_rate": 2.3418216805644645e-06, "loss": 0.7669, "step": 45545 }, { "epoch": 0.5551290019865209, "grad_norm": 2.2162463665008545, "learning_rate": 2.341500962155228e-06, "loss": 0.8751, "step": 45550 }, { "epoch": 0.5551899382106688, "grad_norm": 2.3040523529052734, "learning_rate": 2.3411802437459914e-06, "loss": 0.8504, "step": 45555 }, { "epoch": 0.5552508744348165, "grad_norm": 2.3011891841888428, "learning_rate": 2.3408595253367544e-06, "loss": 0.8117, "step": 45560 }, { "epoch": 0.5553118106589643, "grad_norm": 1.7260819673538208, "learning_rate": 2.340538806927518e-06, "loss": 0.821, "step": 45565 }, { "epoch": 0.5553727468831121, "grad_norm": 2.233825206756592, "learning_rate": 2.3402180885182813e-06, "loss": 0.8257, "step": 45570 }, { "epoch": 0.5554336831072599, "grad_norm": 2.4867210388183594, "learning_rate": 2.3398973701090443e-06, "loss": 0.8286, "step": 45575 }, { "epoch": 0.5554946193314078, "grad_norm": 2.405010223388672, "learning_rate": 2.3395766516998077e-06, "loss": 0.8713, "step": 45580 }, { "epoch": 0.5555555555555556, "grad_norm": 1.7491576671600342, "learning_rate": 2.3392559332905707e-06, "loss": 0.7891, "step": 45585 }, { "epoch": 0.5556164917797034, "grad_norm": 2.393885374069214, "learning_rate": 2.338935214881334e-06, "loss": 0.8071, "step": 45590 }, { "epoch": 0.5556774280038511, "grad_norm": 1.8492356538772583, "learning_rate": 2.3386144964720976e-06, "loss": 0.8776, "step": 45595 }, { "epoch": 0.5557383642279989, "grad_norm": 1.844120740890503, "learning_rate": 2.338293778062861e-06, "loss": 0.7967, "step": 45600 }, { "epoch": 0.5557993004521468, "grad_norm": 2.188528537750244, "learning_rate": 2.3379730596536245e-06, "loss": 0.863, "step": 45605 }, { "epoch": 0.5558602366762946, "grad_norm": 1.8752681016921997, "learning_rate": 2.3376523412443875e-06, "loss": 0.8954, "step": 45610 }, { "epoch": 0.5559211729004424, "grad_norm": 2.0268421173095703, "learning_rate": 2.337331622835151e-06, "loss": 0.8216, "step": 45615 }, { "epoch": 0.5559821091245902, "grad_norm": 2.4244701862335205, "learning_rate": 2.3370109044259144e-06, "loss": 0.9069, "step": 45620 }, { "epoch": 0.556043045348738, "grad_norm": 1.6459155082702637, "learning_rate": 2.336690186016678e-06, "loss": 0.7877, "step": 45625 }, { "epoch": 0.5561039815728858, "grad_norm": 1.8745485544204712, "learning_rate": 2.336369467607441e-06, "loss": 0.8972, "step": 45630 }, { "epoch": 0.5561649177970336, "grad_norm": 2.17378830909729, "learning_rate": 2.3360487491982043e-06, "loss": 0.8844, "step": 45635 }, { "epoch": 0.5562258540211814, "grad_norm": 2.0446603298187256, "learning_rate": 2.3357280307889673e-06, "loss": 0.7547, "step": 45640 }, { "epoch": 0.5562867902453292, "grad_norm": 2.006091594696045, "learning_rate": 2.3354073123797308e-06, "loss": 0.808, "step": 45645 }, { "epoch": 0.556347726469477, "grad_norm": 2.0685653686523438, "learning_rate": 2.335086593970494e-06, "loss": 0.8459, "step": 45650 }, { "epoch": 0.5564086626936249, "grad_norm": 1.752044916152954, "learning_rate": 2.3347658755612572e-06, "loss": 0.7962, "step": 45655 }, { "epoch": 0.5564695989177727, "grad_norm": 2.0031497478485107, "learning_rate": 2.3344451571520207e-06, "loss": 0.8418, "step": 45660 }, { "epoch": 0.5565305351419204, "grad_norm": 1.8617980480194092, "learning_rate": 2.334124438742784e-06, "loss": 0.8241, "step": 45665 }, { "epoch": 0.5565914713660682, "grad_norm": 1.8856744766235352, "learning_rate": 2.333803720333547e-06, "loss": 0.873, "step": 45670 }, { "epoch": 0.5566524075902161, "grad_norm": 2.0511868000030518, "learning_rate": 2.3334830019243106e-06, "loss": 0.8848, "step": 45675 }, { "epoch": 0.5567133438143639, "grad_norm": 2.104290723800659, "learning_rate": 2.333162283515074e-06, "loss": 0.9012, "step": 45680 }, { "epoch": 0.5567742800385117, "grad_norm": 2.0414113998413086, "learning_rate": 2.3328415651058374e-06, "loss": 0.8083, "step": 45685 }, { "epoch": 0.5568352162626595, "grad_norm": 1.7842403650283813, "learning_rate": 2.332520846696601e-06, "loss": 0.8208, "step": 45690 }, { "epoch": 0.5568961524868073, "grad_norm": 2.203873634338379, "learning_rate": 2.332200128287364e-06, "loss": 0.8659, "step": 45695 }, { "epoch": 0.5569570887109551, "grad_norm": 1.7245581150054932, "learning_rate": 2.3318794098781273e-06, "loss": 0.8454, "step": 45700 }, { "epoch": 0.5570180249351029, "grad_norm": 2.1759450435638428, "learning_rate": 2.3315586914688908e-06, "loss": 0.8135, "step": 45705 }, { "epoch": 0.5570789611592507, "grad_norm": 1.8938111066818237, "learning_rate": 2.331237973059654e-06, "loss": 0.8565, "step": 45710 }, { "epoch": 0.5571398973833985, "grad_norm": 1.7157851457595825, "learning_rate": 2.3309172546504172e-06, "loss": 0.8356, "step": 45715 }, { "epoch": 0.5572008336075464, "grad_norm": 1.9822313785552979, "learning_rate": 2.3305965362411802e-06, "loss": 0.8264, "step": 45720 }, { "epoch": 0.5572617698316942, "grad_norm": 2.257880449295044, "learning_rate": 2.3302758178319437e-06, "loss": 0.8862, "step": 45725 }, { "epoch": 0.557322706055842, "grad_norm": 2.040886163711548, "learning_rate": 2.329955099422707e-06, "loss": 0.815, "step": 45730 }, { "epoch": 0.5573836422799897, "grad_norm": 1.8861817121505737, "learning_rate": 2.32963438101347e-06, "loss": 0.8108, "step": 45735 }, { "epoch": 0.5574445785041375, "grad_norm": 2.2180285453796387, "learning_rate": 2.3293136626042336e-06, "loss": 0.7931, "step": 45740 }, { "epoch": 0.5575055147282854, "grad_norm": 1.836790680885315, "learning_rate": 2.328992944194997e-06, "loss": 0.8665, "step": 45745 }, { "epoch": 0.5575664509524332, "grad_norm": 1.8363134860992432, "learning_rate": 2.32867222578576e-06, "loss": 0.8479, "step": 45750 }, { "epoch": 0.557627387176581, "grad_norm": 2.047185182571411, "learning_rate": 2.3283515073765235e-06, "loss": 0.8107, "step": 45755 }, { "epoch": 0.5576883234007288, "grad_norm": 2.2124826908111572, "learning_rate": 2.328030788967287e-06, "loss": 0.8429, "step": 45760 }, { "epoch": 0.5577492596248766, "grad_norm": 1.981054663658142, "learning_rate": 2.3277100705580504e-06, "loss": 0.8181, "step": 45765 }, { "epoch": 0.5578101958490244, "grad_norm": 2.0017552375793457, "learning_rate": 2.327389352148814e-06, "loss": 0.7947, "step": 45770 }, { "epoch": 0.5578711320731722, "grad_norm": 2.1525039672851562, "learning_rate": 2.327068633739577e-06, "loss": 0.8065, "step": 45775 }, { "epoch": 0.55793206829732, "grad_norm": 1.8179516792297363, "learning_rate": 2.3267479153303403e-06, "loss": 0.7734, "step": 45780 }, { "epoch": 0.5579930045214678, "grad_norm": 1.8277044296264648, "learning_rate": 2.3264271969211037e-06, "loss": 0.8626, "step": 45785 }, { "epoch": 0.5580539407456157, "grad_norm": 1.912933349609375, "learning_rate": 2.3261064785118667e-06, "loss": 0.7765, "step": 45790 }, { "epoch": 0.5581148769697635, "grad_norm": 2.6821322441101074, "learning_rate": 2.32578576010263e-06, "loss": 0.8118, "step": 45795 }, { "epoch": 0.5581758131939113, "grad_norm": 2.0293173789978027, "learning_rate": 2.325465041693393e-06, "loss": 0.8388, "step": 45800 }, { "epoch": 0.558236749418059, "grad_norm": 1.862445592880249, "learning_rate": 2.3251443232841566e-06, "loss": 0.8499, "step": 45805 }, { "epoch": 0.5582976856422068, "grad_norm": 1.9230847358703613, "learning_rate": 2.32482360487492e-06, "loss": 0.8067, "step": 45810 }, { "epoch": 0.5583586218663547, "grad_norm": 2.078502893447876, "learning_rate": 2.324502886465683e-06, "loss": 0.9047, "step": 45815 }, { "epoch": 0.5584195580905025, "grad_norm": 2.5739314556121826, "learning_rate": 2.3241821680564465e-06, "loss": 0.8262, "step": 45820 }, { "epoch": 0.5584804943146503, "grad_norm": 1.8587825298309326, "learning_rate": 2.32386144964721e-06, "loss": 0.8468, "step": 45825 }, { "epoch": 0.5585414305387981, "grad_norm": 1.8633599281311035, "learning_rate": 2.3235407312379734e-06, "loss": 0.8072, "step": 45830 }, { "epoch": 0.558602366762946, "grad_norm": 1.995674967765808, "learning_rate": 2.3232200128287364e-06, "loss": 0.7697, "step": 45835 }, { "epoch": 0.5586633029870937, "grad_norm": 1.90800940990448, "learning_rate": 2.3228992944195e-06, "loss": 0.7885, "step": 45840 }, { "epoch": 0.5587242392112415, "grad_norm": 1.9959734678268433, "learning_rate": 2.3225785760102633e-06, "loss": 0.8797, "step": 45845 }, { "epoch": 0.5587851754353893, "grad_norm": 1.921814203262329, "learning_rate": 2.3222578576010267e-06, "loss": 0.8144, "step": 45850 }, { "epoch": 0.5588461116595371, "grad_norm": 2.199120283126831, "learning_rate": 2.3219371391917897e-06, "loss": 0.791, "step": 45855 }, { "epoch": 0.558907047883685, "grad_norm": 1.9080479145050049, "learning_rate": 2.321616420782553e-06, "loss": 0.8329, "step": 45860 }, { "epoch": 0.5589679841078328, "grad_norm": 1.7753783464431763, "learning_rate": 2.3212957023733166e-06, "loss": 0.8487, "step": 45865 }, { "epoch": 0.5590289203319806, "grad_norm": 1.9925284385681152, "learning_rate": 2.3209749839640796e-06, "loss": 0.7671, "step": 45870 }, { "epoch": 0.5590898565561283, "grad_norm": 2.64959979057312, "learning_rate": 2.320654265554843e-06, "loss": 0.7929, "step": 45875 }, { "epoch": 0.5591507927802761, "grad_norm": 2.1483538150787354, "learning_rate": 2.320333547145606e-06, "loss": 0.892, "step": 45880 }, { "epoch": 0.559211729004424, "grad_norm": 2.4289474487304688, "learning_rate": 2.3200128287363695e-06, "loss": 0.8431, "step": 45885 }, { "epoch": 0.5592726652285718, "grad_norm": 2.2277565002441406, "learning_rate": 2.319692110327133e-06, "loss": 0.789, "step": 45890 }, { "epoch": 0.5593336014527196, "grad_norm": 2.070136070251465, "learning_rate": 2.319371391917896e-06, "loss": 0.8435, "step": 45895 }, { "epoch": 0.5593945376768674, "grad_norm": 2.036796808242798, "learning_rate": 2.3190506735086594e-06, "loss": 0.8242, "step": 45900 }, { "epoch": 0.5594554739010152, "grad_norm": 2.1553447246551514, "learning_rate": 2.318729955099423e-06, "loss": 0.8789, "step": 45905 }, { "epoch": 0.559516410125163, "grad_norm": 2.0220212936401367, "learning_rate": 2.3184092366901863e-06, "loss": 0.7904, "step": 45910 }, { "epoch": 0.5595773463493108, "grad_norm": 1.9757788181304932, "learning_rate": 2.3180885182809498e-06, "loss": 0.8277, "step": 45915 }, { "epoch": 0.5596382825734586, "grad_norm": 2.128582715988159, "learning_rate": 2.3177677998717128e-06, "loss": 0.8334, "step": 45920 }, { "epoch": 0.5596992187976064, "grad_norm": 2.0485377311706543, "learning_rate": 2.3174470814624762e-06, "loss": 0.7971, "step": 45925 }, { "epoch": 0.5597601550217542, "grad_norm": 1.4757658243179321, "learning_rate": 2.3171263630532397e-06, "loss": 0.7527, "step": 45930 }, { "epoch": 0.5598210912459021, "grad_norm": 2.349268674850464, "learning_rate": 2.3168056446440027e-06, "loss": 0.8035, "step": 45935 }, { "epoch": 0.5598820274700499, "grad_norm": 2.070854902267456, "learning_rate": 2.316484926234766e-06, "loss": 0.8672, "step": 45940 }, { "epoch": 0.5599429636941976, "grad_norm": 1.9752931594848633, "learning_rate": 2.3161642078255296e-06, "loss": 0.7964, "step": 45945 }, { "epoch": 0.5600038999183454, "grad_norm": 1.6945172548294067, "learning_rate": 2.3158434894162926e-06, "loss": 0.7993, "step": 45950 }, { "epoch": 0.5600648361424932, "grad_norm": 1.8497984409332275, "learning_rate": 2.315522771007056e-06, "loss": 0.856, "step": 45955 }, { "epoch": 0.5601257723666411, "grad_norm": 2.3780744075775146, "learning_rate": 2.3152020525978195e-06, "loss": 0.8705, "step": 45960 }, { "epoch": 0.5601867085907889, "grad_norm": 2.0708041191101074, "learning_rate": 2.3148813341885825e-06, "loss": 0.866, "step": 45965 }, { "epoch": 0.5602476448149367, "grad_norm": 1.9924300909042358, "learning_rate": 2.314560615779346e-06, "loss": 0.8481, "step": 45970 }, { "epoch": 0.5603085810390845, "grad_norm": 1.623091220855713, "learning_rate": 2.314239897370109e-06, "loss": 0.7924, "step": 45975 }, { "epoch": 0.5603695172632323, "grad_norm": 2.1217892169952393, "learning_rate": 2.3139191789608724e-06, "loss": 0.8521, "step": 45980 }, { "epoch": 0.5604304534873801, "grad_norm": 1.9309536218643188, "learning_rate": 2.313598460551636e-06, "loss": 0.9024, "step": 45985 }, { "epoch": 0.5604913897115279, "grad_norm": 1.7467058897018433, "learning_rate": 2.3132777421423992e-06, "loss": 0.7482, "step": 45990 }, { "epoch": 0.5605523259356757, "grad_norm": 2.093552350997925, "learning_rate": 2.3129570237331627e-06, "loss": 0.8216, "step": 45995 }, { "epoch": 0.5606132621598235, "grad_norm": 1.85309898853302, "learning_rate": 2.3126363053239257e-06, "loss": 0.9429, "step": 46000 }, { "epoch": 0.5606741983839714, "grad_norm": 2.0391407012939453, "learning_rate": 2.312315586914689e-06, "loss": 0.8328, "step": 46005 }, { "epoch": 0.5607351346081192, "grad_norm": 1.9873470067977905, "learning_rate": 2.3119948685054526e-06, "loss": 0.8511, "step": 46010 }, { "epoch": 0.5607960708322669, "grad_norm": 2.186208486557007, "learning_rate": 2.3116741500962156e-06, "loss": 0.813, "step": 46015 }, { "epoch": 0.5608570070564147, "grad_norm": 1.9341856241226196, "learning_rate": 2.311353431686979e-06, "loss": 0.8091, "step": 46020 }, { "epoch": 0.5609179432805625, "grad_norm": 1.9332664012908936, "learning_rate": 2.3110327132777425e-06, "loss": 0.7903, "step": 46025 }, { "epoch": 0.5609788795047104, "grad_norm": 1.7767478227615356, "learning_rate": 2.3107119948685055e-06, "loss": 0.7544, "step": 46030 }, { "epoch": 0.5610398157288582, "grad_norm": 1.9226512908935547, "learning_rate": 2.310391276459269e-06, "loss": 0.8921, "step": 46035 }, { "epoch": 0.561100751953006, "grad_norm": 1.9550065994262695, "learning_rate": 2.3100705580500324e-06, "loss": 0.8069, "step": 46040 }, { "epoch": 0.5611616881771538, "grad_norm": 2.053330659866333, "learning_rate": 2.3097498396407954e-06, "loss": 0.8587, "step": 46045 }, { "epoch": 0.5612226244013016, "grad_norm": 2.3813722133636475, "learning_rate": 2.309429121231559e-06, "loss": 0.7901, "step": 46050 }, { "epoch": 0.5612835606254494, "grad_norm": 1.8603229522705078, "learning_rate": 2.3091084028223223e-06, "loss": 0.7989, "step": 46055 }, { "epoch": 0.5613444968495972, "grad_norm": 1.7768183946609497, "learning_rate": 2.3087876844130853e-06, "loss": 0.8449, "step": 46060 }, { "epoch": 0.561405433073745, "grad_norm": 2.0424323081970215, "learning_rate": 2.3084669660038487e-06, "loss": 0.8524, "step": 46065 }, { "epoch": 0.5614663692978928, "grad_norm": 2.4318244457244873, "learning_rate": 2.308146247594612e-06, "loss": 0.7876, "step": 46070 }, { "epoch": 0.5615273055220407, "grad_norm": 1.8756470680236816, "learning_rate": 2.3078255291853756e-06, "loss": 0.8195, "step": 46075 }, { "epoch": 0.5615882417461885, "grad_norm": 1.9590121507644653, "learning_rate": 2.307504810776139e-06, "loss": 0.8697, "step": 46080 }, { "epoch": 0.5616491779703362, "grad_norm": 1.891204595565796, "learning_rate": 2.307184092366902e-06, "loss": 0.8843, "step": 46085 }, { "epoch": 0.561710114194484, "grad_norm": 2.467160224914551, "learning_rate": 2.3068633739576655e-06, "loss": 0.7847, "step": 46090 }, { "epoch": 0.5617710504186318, "grad_norm": 2.1393332481384277, "learning_rate": 2.3065426555484285e-06, "loss": 0.8724, "step": 46095 }, { "epoch": 0.5618319866427797, "grad_norm": 1.845903754234314, "learning_rate": 2.306221937139192e-06, "loss": 0.813, "step": 46100 }, { "epoch": 0.5618929228669275, "grad_norm": 2.5813188552856445, "learning_rate": 2.3059012187299554e-06, "loss": 0.8362, "step": 46105 }, { "epoch": 0.5619538590910753, "grad_norm": 2.036275625228882, "learning_rate": 2.3055805003207184e-06, "loss": 0.9091, "step": 46110 }, { "epoch": 0.5620147953152231, "grad_norm": 2.6594784259796143, "learning_rate": 2.305259781911482e-06, "loss": 0.804, "step": 46115 }, { "epoch": 0.5620757315393708, "grad_norm": 2.0103952884674072, "learning_rate": 2.3049390635022453e-06, "loss": 0.8256, "step": 46120 }, { "epoch": 0.5621366677635187, "grad_norm": 1.8368957042694092, "learning_rate": 2.3046183450930083e-06, "loss": 0.7923, "step": 46125 }, { "epoch": 0.5621976039876665, "grad_norm": 1.7955875396728516, "learning_rate": 2.3042976266837718e-06, "loss": 0.7757, "step": 46130 }, { "epoch": 0.5622585402118143, "grad_norm": 1.7040798664093018, "learning_rate": 2.303976908274535e-06, "loss": 0.8498, "step": 46135 }, { "epoch": 0.5623194764359621, "grad_norm": 1.9001320600509644, "learning_rate": 2.3036561898652982e-06, "loss": 0.8064, "step": 46140 }, { "epoch": 0.56238041266011, "grad_norm": 2.214207172393799, "learning_rate": 2.3033354714560617e-06, "loss": 0.7539, "step": 46145 }, { "epoch": 0.5624413488842578, "grad_norm": 1.885612964630127, "learning_rate": 2.303014753046825e-06, "loss": 0.8125, "step": 46150 }, { "epoch": 0.5625022851084055, "grad_norm": 2.1140167713165283, "learning_rate": 2.3026940346375885e-06, "loss": 0.8462, "step": 46155 }, { "epoch": 0.5625632213325533, "grad_norm": 1.956043004989624, "learning_rate": 2.302373316228352e-06, "loss": 0.815, "step": 46160 }, { "epoch": 0.5626241575567011, "grad_norm": 1.7110246419906616, "learning_rate": 2.302052597819115e-06, "loss": 0.846, "step": 46165 }, { "epoch": 0.562685093780849, "grad_norm": 1.796484112739563, "learning_rate": 2.3017318794098784e-06, "loss": 0.8669, "step": 46170 }, { "epoch": 0.5627460300049968, "grad_norm": 2.129969358444214, "learning_rate": 2.3014111610006415e-06, "loss": 0.8537, "step": 46175 }, { "epoch": 0.5628069662291446, "grad_norm": 1.9609014987945557, "learning_rate": 2.301090442591405e-06, "loss": 0.8177, "step": 46180 }, { "epoch": 0.5628679024532924, "grad_norm": 2.163426399230957, "learning_rate": 2.3007697241821683e-06, "loss": 0.8796, "step": 46185 }, { "epoch": 0.5629288386774401, "grad_norm": 1.9954220056533813, "learning_rate": 2.3004490057729314e-06, "loss": 0.8684, "step": 46190 }, { "epoch": 0.562989774901588, "grad_norm": 2.128127098083496, "learning_rate": 2.300128287363695e-06, "loss": 0.8392, "step": 46195 }, { "epoch": 0.5630507111257358, "grad_norm": 2.4468255043029785, "learning_rate": 2.2998075689544582e-06, "loss": 0.8141, "step": 46200 }, { "epoch": 0.5631116473498836, "grad_norm": 1.9217050075531006, "learning_rate": 2.2994868505452213e-06, "loss": 0.8643, "step": 46205 }, { "epoch": 0.5631725835740314, "grad_norm": 1.9375988245010376, "learning_rate": 2.2991661321359847e-06, "loss": 0.7812, "step": 46210 }, { "epoch": 0.5632335197981793, "grad_norm": 1.956657886505127, "learning_rate": 2.298845413726748e-06, "loss": 0.8467, "step": 46215 }, { "epoch": 0.5632944560223271, "grad_norm": 2.0234224796295166, "learning_rate": 2.2985246953175116e-06, "loss": 0.8562, "step": 46220 }, { "epoch": 0.5633553922464748, "grad_norm": 1.7558059692382812, "learning_rate": 2.2982039769082746e-06, "loss": 0.9034, "step": 46225 }, { "epoch": 0.5634163284706226, "grad_norm": 1.8254423141479492, "learning_rate": 2.297883258499038e-06, "loss": 0.825, "step": 46230 }, { "epoch": 0.5634772646947704, "grad_norm": 2.005756139755249, "learning_rate": 2.2975625400898015e-06, "loss": 0.8273, "step": 46235 }, { "epoch": 0.5635382009189183, "grad_norm": 2.070197343826294, "learning_rate": 2.297241821680565e-06, "loss": 0.9221, "step": 46240 }, { "epoch": 0.5635991371430661, "grad_norm": 1.9372584819793701, "learning_rate": 2.296921103271328e-06, "loss": 0.7727, "step": 46245 }, { "epoch": 0.5636600733672139, "grad_norm": 2.605046272277832, "learning_rate": 2.2966003848620914e-06, "loss": 0.8298, "step": 46250 }, { "epoch": 0.5637210095913617, "grad_norm": 1.9598592519760132, "learning_rate": 2.296279666452855e-06, "loss": 0.8515, "step": 46255 }, { "epoch": 0.5637819458155094, "grad_norm": 1.9304627180099487, "learning_rate": 2.295958948043618e-06, "loss": 0.7807, "step": 46260 }, { "epoch": 0.5638428820396573, "grad_norm": 1.6955804824829102, "learning_rate": 2.2956382296343813e-06, "loss": 0.7966, "step": 46265 }, { "epoch": 0.5639038182638051, "grad_norm": 2.1477408409118652, "learning_rate": 2.2953175112251443e-06, "loss": 0.8075, "step": 46270 }, { "epoch": 0.5639647544879529, "grad_norm": 1.925000786781311, "learning_rate": 2.2949967928159077e-06, "loss": 0.7779, "step": 46275 }, { "epoch": 0.5640256907121007, "grad_norm": 2.12357234954834, "learning_rate": 2.294676074406671e-06, "loss": 0.7851, "step": 46280 }, { "epoch": 0.5640866269362486, "grad_norm": 2.2070164680480957, "learning_rate": 2.294355355997434e-06, "loss": 0.8558, "step": 46285 }, { "epoch": 0.5641475631603964, "grad_norm": 2.047746181488037, "learning_rate": 2.2940346375881976e-06, "loss": 0.8395, "step": 46290 }, { "epoch": 0.5642084993845441, "grad_norm": 2.1503093242645264, "learning_rate": 2.293713919178961e-06, "loss": 0.8642, "step": 46295 }, { "epoch": 0.5642694356086919, "grad_norm": 2.127110719680786, "learning_rate": 2.2933932007697245e-06, "loss": 0.8891, "step": 46300 }, { "epoch": 0.5643303718328397, "grad_norm": 1.7623636722564697, "learning_rate": 2.293072482360488e-06, "loss": 0.8371, "step": 46305 }, { "epoch": 0.5643913080569876, "grad_norm": 2.0810651779174805, "learning_rate": 2.292751763951251e-06, "loss": 0.8176, "step": 46310 }, { "epoch": 0.5644522442811354, "grad_norm": 2.0342602729797363, "learning_rate": 2.2924310455420144e-06, "loss": 0.8817, "step": 46315 }, { "epoch": 0.5645131805052832, "grad_norm": 2.148902654647827, "learning_rate": 2.292110327132778e-06, "loss": 0.912, "step": 46320 }, { "epoch": 0.564574116729431, "grad_norm": 1.7688908576965332, "learning_rate": 2.291789608723541e-06, "loss": 0.8367, "step": 46325 }, { "epoch": 0.5646350529535787, "grad_norm": 2.0807549953460693, "learning_rate": 2.2914688903143043e-06, "loss": 0.8084, "step": 46330 }, { "epoch": 0.5646959891777266, "grad_norm": 1.8965797424316406, "learning_rate": 2.2911481719050677e-06, "loss": 0.806, "step": 46335 }, { "epoch": 0.5647569254018744, "grad_norm": 2.014427900314331, "learning_rate": 2.2908274534958308e-06, "loss": 0.8263, "step": 46340 }, { "epoch": 0.5648178616260222, "grad_norm": 2.066563606262207, "learning_rate": 2.290506735086594e-06, "loss": 0.768, "step": 46345 }, { "epoch": 0.56487879785017, "grad_norm": 1.831536889076233, "learning_rate": 2.290186016677357e-06, "loss": 0.8176, "step": 46350 }, { "epoch": 0.5649397340743179, "grad_norm": 2.4189212322235107, "learning_rate": 2.2898652982681206e-06, "loss": 0.8064, "step": 46355 }, { "epoch": 0.5650006702984657, "grad_norm": 1.9912997484207153, "learning_rate": 2.289544579858884e-06, "loss": 0.7876, "step": 46360 }, { "epoch": 0.5650616065226134, "grad_norm": 1.6816257238388062, "learning_rate": 2.289223861449647e-06, "loss": 0.8408, "step": 46365 }, { "epoch": 0.5651225427467612, "grad_norm": 2.0010275840759277, "learning_rate": 2.2889031430404105e-06, "loss": 0.8414, "step": 46370 }, { "epoch": 0.565183478970909, "grad_norm": 1.6961404085159302, "learning_rate": 2.288582424631174e-06, "loss": 0.8138, "step": 46375 }, { "epoch": 0.5652444151950569, "grad_norm": 1.6583163738250732, "learning_rate": 2.2882617062219374e-06, "loss": 0.8842, "step": 46380 }, { "epoch": 0.5653053514192047, "grad_norm": 2.2582480907440186, "learning_rate": 2.287940987812701e-06, "loss": 0.8204, "step": 46385 }, { "epoch": 0.5653662876433525, "grad_norm": 2.0074350833892822, "learning_rate": 2.287620269403464e-06, "loss": 0.8514, "step": 46390 }, { "epoch": 0.5654272238675003, "grad_norm": 1.9159612655639648, "learning_rate": 2.2872995509942273e-06, "loss": 0.8439, "step": 46395 }, { "epoch": 0.565488160091648, "grad_norm": 1.9399174451828003, "learning_rate": 2.2869788325849908e-06, "loss": 0.8417, "step": 46400 }, { "epoch": 0.5655490963157959, "grad_norm": 1.9107091426849365, "learning_rate": 2.2866581141757538e-06, "loss": 0.8468, "step": 46405 }, { "epoch": 0.5656100325399437, "grad_norm": 1.881735920906067, "learning_rate": 2.2863373957665172e-06, "loss": 0.8291, "step": 46410 }, { "epoch": 0.5656709687640915, "grad_norm": 1.813093662261963, "learning_rate": 2.2860166773572807e-06, "loss": 0.7998, "step": 46415 }, { "epoch": 0.5657319049882393, "grad_norm": 1.861638069152832, "learning_rate": 2.2856959589480437e-06, "loss": 0.8686, "step": 46420 }, { "epoch": 0.5657928412123872, "grad_norm": 1.5957573652267456, "learning_rate": 2.285375240538807e-06, "loss": 0.8526, "step": 46425 }, { "epoch": 0.565853777436535, "grad_norm": 1.9957282543182373, "learning_rate": 2.28505452212957e-06, "loss": 0.8197, "step": 46430 }, { "epoch": 0.5659147136606827, "grad_norm": 1.7192587852478027, "learning_rate": 2.2847338037203336e-06, "loss": 0.8516, "step": 46435 }, { "epoch": 0.5659756498848305, "grad_norm": 1.7046990394592285, "learning_rate": 2.284413085311097e-06, "loss": 0.7867, "step": 46440 }, { "epoch": 0.5660365861089783, "grad_norm": 1.8955531120300293, "learning_rate": 2.2840923669018605e-06, "loss": 0.8032, "step": 46445 }, { "epoch": 0.5660975223331262, "grad_norm": 2.1931300163269043, "learning_rate": 2.2837716484926235e-06, "loss": 0.8474, "step": 46450 }, { "epoch": 0.566158458557274, "grad_norm": 2.037426710128784, "learning_rate": 2.283450930083387e-06, "loss": 0.804, "step": 46455 }, { "epoch": 0.5662193947814218, "grad_norm": 1.9349594116210938, "learning_rate": 2.2831302116741504e-06, "loss": 0.8789, "step": 46460 }, { "epoch": 0.5662803310055696, "grad_norm": 1.873466968536377, "learning_rate": 2.282809493264914e-06, "loss": 0.8334, "step": 46465 }, { "epoch": 0.5663412672297173, "grad_norm": 2.008112907409668, "learning_rate": 2.282488774855677e-06, "loss": 0.7951, "step": 46470 }, { "epoch": 0.5664022034538652, "grad_norm": 1.764153003692627, "learning_rate": 2.2821680564464403e-06, "loss": 0.8828, "step": 46475 }, { "epoch": 0.566463139678013, "grad_norm": 2.159061908721924, "learning_rate": 2.2818473380372037e-06, "loss": 0.8364, "step": 46480 }, { "epoch": 0.5665240759021608, "grad_norm": 1.687476396560669, "learning_rate": 2.2815266196279667e-06, "loss": 0.8024, "step": 46485 }, { "epoch": 0.5665850121263086, "grad_norm": 1.9714158773422241, "learning_rate": 2.28120590121873e-06, "loss": 0.8537, "step": 46490 }, { "epoch": 0.5666459483504565, "grad_norm": 1.9048794507980347, "learning_rate": 2.2808851828094936e-06, "loss": 0.7558, "step": 46495 }, { "epoch": 0.5667068845746042, "grad_norm": 1.9570257663726807, "learning_rate": 2.2805644644002566e-06, "loss": 0.8373, "step": 46500 }, { "epoch": 0.566767820798752, "grad_norm": 1.8737916946411133, "learning_rate": 2.28024374599102e-06, "loss": 0.8139, "step": 46505 }, { "epoch": 0.5668287570228998, "grad_norm": 2.0130064487457275, "learning_rate": 2.279923027581783e-06, "loss": 0.8463, "step": 46510 }, { "epoch": 0.5668896932470476, "grad_norm": 2.024451732635498, "learning_rate": 2.2796023091725465e-06, "loss": 0.8468, "step": 46515 }, { "epoch": 0.5669506294711955, "grad_norm": 1.654944896697998, "learning_rate": 2.27928159076331e-06, "loss": 0.8438, "step": 46520 }, { "epoch": 0.5670115656953433, "grad_norm": 2.016745090484619, "learning_rate": 2.2789608723540734e-06, "loss": 0.8145, "step": 46525 }, { "epoch": 0.5670725019194911, "grad_norm": 2.0045220851898193, "learning_rate": 2.278640153944837e-06, "loss": 0.8647, "step": 46530 }, { "epoch": 0.5671334381436388, "grad_norm": 2.0354995727539062, "learning_rate": 2.2783194355356e-06, "loss": 0.847, "step": 46535 }, { "epoch": 0.5671943743677866, "grad_norm": 2.056988477706909, "learning_rate": 2.2779987171263633e-06, "loss": 0.8141, "step": 46540 }, { "epoch": 0.5672553105919345, "grad_norm": 1.696816086769104, "learning_rate": 2.2776779987171267e-06, "loss": 0.8456, "step": 46545 }, { "epoch": 0.5673162468160823, "grad_norm": 1.8098399639129639, "learning_rate": 2.27735728030789e-06, "loss": 0.8436, "step": 46550 }, { "epoch": 0.5673771830402301, "grad_norm": 2.2188634872436523, "learning_rate": 2.277036561898653e-06, "loss": 0.7656, "step": 46555 }, { "epoch": 0.5674381192643779, "grad_norm": 2.412104606628418, "learning_rate": 2.2767158434894166e-06, "loss": 0.839, "step": 46560 }, { "epoch": 0.5674990554885257, "grad_norm": 2.361426830291748, "learning_rate": 2.2763951250801796e-06, "loss": 0.8635, "step": 46565 }, { "epoch": 0.5675599917126735, "grad_norm": 1.7974401712417603, "learning_rate": 2.276074406670943e-06, "loss": 0.8081, "step": 46570 }, { "epoch": 0.5676209279368213, "grad_norm": 1.8506168127059937, "learning_rate": 2.2757536882617065e-06, "loss": 0.7632, "step": 46575 }, { "epoch": 0.5676818641609691, "grad_norm": 2.3242807388305664, "learning_rate": 2.2754329698524695e-06, "loss": 0.8308, "step": 46580 }, { "epoch": 0.5677428003851169, "grad_norm": 2.2750637531280518, "learning_rate": 2.275112251443233e-06, "loss": 0.8021, "step": 46585 }, { "epoch": 0.5678037366092648, "grad_norm": 1.7640470266342163, "learning_rate": 2.2747915330339964e-06, "loss": 0.9029, "step": 46590 }, { "epoch": 0.5678646728334126, "grad_norm": 2.0553689002990723, "learning_rate": 2.2744708146247594e-06, "loss": 0.8391, "step": 46595 }, { "epoch": 0.5679256090575604, "grad_norm": 2.2630951404571533, "learning_rate": 2.274150096215523e-06, "loss": 0.8861, "step": 46600 }, { "epoch": 0.5679865452817081, "grad_norm": 2.169945478439331, "learning_rate": 2.2738293778062863e-06, "loss": 0.8569, "step": 46605 }, { "epoch": 0.5680474815058559, "grad_norm": 2.1394202709198, "learning_rate": 2.2735086593970498e-06, "loss": 0.8763, "step": 46610 }, { "epoch": 0.5681084177300038, "grad_norm": 1.9546117782592773, "learning_rate": 2.2731879409878128e-06, "loss": 0.7979, "step": 46615 }, { "epoch": 0.5681693539541516, "grad_norm": 1.81418776512146, "learning_rate": 2.272867222578576e-06, "loss": 0.7911, "step": 46620 }, { "epoch": 0.5682302901782994, "grad_norm": 2.358163833618164, "learning_rate": 2.2725465041693396e-06, "loss": 0.7883, "step": 46625 }, { "epoch": 0.5682912264024472, "grad_norm": 2.1472771167755127, "learning_rate": 2.272225785760103e-06, "loss": 0.805, "step": 46630 }, { "epoch": 0.568352162626595, "grad_norm": 2.0586533546447754, "learning_rate": 2.271905067350866e-06, "loss": 0.8661, "step": 46635 }, { "epoch": 0.5684130988507428, "grad_norm": 2.404695510864258, "learning_rate": 2.2715843489416295e-06, "loss": 0.8613, "step": 46640 }, { "epoch": 0.5684740350748906, "grad_norm": 2.193721294403076, "learning_rate": 2.2712636305323926e-06, "loss": 0.8292, "step": 46645 }, { "epoch": 0.5685349712990384, "grad_norm": 1.9868897199630737, "learning_rate": 2.270942912123156e-06, "loss": 0.7702, "step": 46650 }, { "epoch": 0.5685959075231862, "grad_norm": 1.9441888332366943, "learning_rate": 2.2706221937139194e-06, "loss": 0.8141, "step": 46655 }, { "epoch": 0.568656843747334, "grad_norm": 1.9544696807861328, "learning_rate": 2.2703014753046825e-06, "loss": 0.8552, "step": 46660 }, { "epoch": 0.5687177799714819, "grad_norm": 2.202800750732422, "learning_rate": 2.269980756895446e-06, "loss": 0.7904, "step": 46665 }, { "epoch": 0.5687787161956297, "grad_norm": 2.209453821182251, "learning_rate": 2.2696600384862093e-06, "loss": 0.8489, "step": 46670 }, { "epoch": 0.5688396524197774, "grad_norm": 2.0029306411743164, "learning_rate": 2.2693393200769724e-06, "loss": 0.8171, "step": 46675 }, { "epoch": 0.5689005886439252, "grad_norm": 2.3885068893432617, "learning_rate": 2.269018601667736e-06, "loss": 0.8556, "step": 46680 }, { "epoch": 0.568961524868073, "grad_norm": 2.529125213623047, "learning_rate": 2.2686978832584992e-06, "loss": 0.8775, "step": 46685 }, { "epoch": 0.5690224610922209, "grad_norm": 2.059690475463867, "learning_rate": 2.2683771648492627e-06, "loss": 0.8374, "step": 46690 }, { "epoch": 0.5690833973163687, "grad_norm": 1.7653664350509644, "learning_rate": 2.268056446440026e-06, "loss": 0.8418, "step": 46695 }, { "epoch": 0.5691443335405165, "grad_norm": 1.8594163656234741, "learning_rate": 2.267735728030789e-06, "loss": 0.817, "step": 46700 }, { "epoch": 0.5692052697646643, "grad_norm": 1.7720266580581665, "learning_rate": 2.2674150096215526e-06, "loss": 0.8356, "step": 46705 }, { "epoch": 0.5692662059888121, "grad_norm": 1.851218819618225, "learning_rate": 2.267094291212316e-06, "loss": 0.8611, "step": 46710 }, { "epoch": 0.5693271422129599, "grad_norm": 2.045295238494873, "learning_rate": 2.266773572803079e-06, "loss": 0.7993, "step": 46715 }, { "epoch": 0.5693880784371077, "grad_norm": 2.042210102081299, "learning_rate": 2.2664528543938425e-06, "loss": 0.9208, "step": 46720 }, { "epoch": 0.5694490146612555, "grad_norm": 2.0723907947540283, "learning_rate": 2.2661321359846055e-06, "loss": 0.8577, "step": 46725 }, { "epoch": 0.5695099508854033, "grad_norm": 2.043646812438965, "learning_rate": 2.265811417575369e-06, "loss": 0.829, "step": 46730 }, { "epoch": 0.5695708871095512, "grad_norm": 2.137298345565796, "learning_rate": 2.2654906991661324e-06, "loss": 0.8326, "step": 46735 }, { "epoch": 0.569631823333699, "grad_norm": 1.8671880960464478, "learning_rate": 2.2651699807568954e-06, "loss": 0.9115, "step": 46740 }, { "epoch": 0.5696927595578467, "grad_norm": 1.9234105348587036, "learning_rate": 2.264849262347659e-06, "loss": 0.828, "step": 46745 }, { "epoch": 0.5697536957819945, "grad_norm": 2.1192119121551514, "learning_rate": 2.2645285439384223e-06, "loss": 0.7965, "step": 46750 }, { "epoch": 0.5698146320061424, "grad_norm": 2.1220977306365967, "learning_rate": 2.2642078255291857e-06, "loss": 0.865, "step": 46755 }, { "epoch": 0.5698755682302902, "grad_norm": 2.3696062564849854, "learning_rate": 2.2638871071199487e-06, "loss": 0.7973, "step": 46760 }, { "epoch": 0.569936504454438, "grad_norm": 1.9479420185089111, "learning_rate": 2.263566388710712e-06, "loss": 0.8136, "step": 46765 }, { "epoch": 0.5699974406785858, "grad_norm": 1.8303611278533936, "learning_rate": 2.2632456703014756e-06, "loss": 0.8468, "step": 46770 }, { "epoch": 0.5700583769027336, "grad_norm": 1.9872710704803467, "learning_rate": 2.262924951892239e-06, "loss": 0.9045, "step": 46775 }, { "epoch": 0.5701193131268814, "grad_norm": 1.958810567855835, "learning_rate": 2.262604233483002e-06, "loss": 0.8528, "step": 46780 }, { "epoch": 0.5701802493510292, "grad_norm": 1.8808687925338745, "learning_rate": 2.2622835150737655e-06, "loss": 0.8159, "step": 46785 }, { "epoch": 0.570241185575177, "grad_norm": 2.093061923980713, "learning_rate": 2.261962796664529e-06, "loss": 1.0478, "step": 46790 }, { "epoch": 0.5703021217993248, "grad_norm": 1.8567081689834595, "learning_rate": 2.261642078255292e-06, "loss": 0.8584, "step": 46795 }, { "epoch": 0.5703630580234726, "grad_norm": 2.0501630306243896, "learning_rate": 2.2613213598460554e-06, "loss": 0.7812, "step": 46800 }, { "epoch": 0.5704239942476205, "grad_norm": 2.1112897396087646, "learning_rate": 2.2610006414368184e-06, "loss": 0.8503, "step": 46805 }, { "epoch": 0.5704849304717683, "grad_norm": 2.1236915588378906, "learning_rate": 2.260679923027582e-06, "loss": 0.8568, "step": 46810 }, { "epoch": 0.570545866695916, "grad_norm": 1.9572209119796753, "learning_rate": 2.2603592046183453e-06, "loss": 0.8254, "step": 46815 }, { "epoch": 0.5706068029200638, "grad_norm": 1.9125964641571045, "learning_rate": 2.2600384862091083e-06, "loss": 0.9044, "step": 46820 }, { "epoch": 0.5706677391442116, "grad_norm": 1.833236575126648, "learning_rate": 2.2597177677998718e-06, "loss": 0.7845, "step": 46825 }, { "epoch": 0.5707286753683595, "grad_norm": 2.0851290225982666, "learning_rate": 2.259397049390635e-06, "loss": 0.7929, "step": 46830 }, { "epoch": 0.5707896115925073, "grad_norm": 2.087736129760742, "learning_rate": 2.2590763309813986e-06, "loss": 0.8516, "step": 46835 }, { "epoch": 0.5708505478166551, "grad_norm": 1.9428207874298096, "learning_rate": 2.2587556125721617e-06, "loss": 0.8446, "step": 46840 }, { "epoch": 0.5709114840408029, "grad_norm": 2.0875041484832764, "learning_rate": 2.258434894162925e-06, "loss": 0.8748, "step": 46845 }, { "epoch": 0.5709724202649507, "grad_norm": 2.223236083984375, "learning_rate": 2.2581141757536885e-06, "loss": 0.8576, "step": 46850 }, { "epoch": 0.5710333564890985, "grad_norm": 1.7241617441177368, "learning_rate": 2.257793457344452e-06, "loss": 0.8683, "step": 46855 }, { "epoch": 0.5710942927132463, "grad_norm": 1.9940779209136963, "learning_rate": 2.257472738935215e-06, "loss": 0.8582, "step": 46860 }, { "epoch": 0.5711552289373941, "grad_norm": 1.703643560409546, "learning_rate": 2.2571520205259784e-06, "loss": 0.8575, "step": 46865 }, { "epoch": 0.5712161651615419, "grad_norm": 1.770564079284668, "learning_rate": 2.256831302116742e-06, "loss": 0.7881, "step": 46870 }, { "epoch": 0.5712771013856898, "grad_norm": 1.9269888401031494, "learning_rate": 2.256510583707505e-06, "loss": 0.8021, "step": 46875 }, { "epoch": 0.5713380376098376, "grad_norm": 2.0019731521606445, "learning_rate": 2.2561898652982683e-06, "loss": 0.7606, "step": 46880 }, { "epoch": 0.5713989738339853, "grad_norm": 2.3251993656158447, "learning_rate": 2.2558691468890318e-06, "loss": 0.8663, "step": 46885 }, { "epoch": 0.5714599100581331, "grad_norm": 1.8722705841064453, "learning_rate": 2.2555484284797948e-06, "loss": 0.9208, "step": 46890 }, { "epoch": 0.571520846282281, "grad_norm": 1.8621435165405273, "learning_rate": 2.2552277100705582e-06, "loss": 0.8115, "step": 46895 }, { "epoch": 0.5715817825064288, "grad_norm": 1.9809327125549316, "learning_rate": 2.2549069916613212e-06, "loss": 0.7676, "step": 46900 }, { "epoch": 0.5716427187305766, "grad_norm": 2.161660671234131, "learning_rate": 2.2545862732520847e-06, "loss": 0.9184, "step": 46905 }, { "epoch": 0.5717036549547244, "grad_norm": 2.1581928730010986, "learning_rate": 2.254265554842848e-06, "loss": 0.8678, "step": 46910 }, { "epoch": 0.5717645911788722, "grad_norm": 2.036961793899536, "learning_rate": 2.2539448364336116e-06, "loss": 0.836, "step": 46915 }, { "epoch": 0.57182552740302, "grad_norm": 1.8288297653198242, "learning_rate": 2.253624118024375e-06, "loss": 0.8095, "step": 46920 }, { "epoch": 0.5718864636271678, "grad_norm": 2.2744510173797607, "learning_rate": 2.253303399615138e-06, "loss": 0.8411, "step": 46925 }, { "epoch": 0.5719473998513156, "grad_norm": 1.9021780490875244, "learning_rate": 2.2529826812059015e-06, "loss": 0.8458, "step": 46930 }, { "epoch": 0.5720083360754634, "grad_norm": 2.2052054405212402, "learning_rate": 2.252661962796665e-06, "loss": 0.7987, "step": 46935 }, { "epoch": 0.5720692722996112, "grad_norm": 2.4707212448120117, "learning_rate": 2.252341244387428e-06, "loss": 0.8402, "step": 46940 }, { "epoch": 0.5721302085237591, "grad_norm": 2.0091686248779297, "learning_rate": 2.2520205259781914e-06, "loss": 0.7801, "step": 46945 }, { "epoch": 0.5721911447479069, "grad_norm": 2.662595272064209, "learning_rate": 2.251699807568955e-06, "loss": 0.9143, "step": 46950 }, { "epoch": 0.5722520809720546, "grad_norm": 2.017106533050537, "learning_rate": 2.251379089159718e-06, "loss": 0.7987, "step": 46955 }, { "epoch": 0.5723130171962024, "grad_norm": 1.6721469163894653, "learning_rate": 2.2510583707504813e-06, "loss": 0.7942, "step": 46960 }, { "epoch": 0.5723739534203502, "grad_norm": 1.9399025440216064, "learning_rate": 2.2507376523412447e-06, "loss": 0.8785, "step": 46965 }, { "epoch": 0.5724348896444981, "grad_norm": 1.9971392154693604, "learning_rate": 2.2504169339320077e-06, "loss": 0.832, "step": 46970 }, { "epoch": 0.5724958258686459, "grad_norm": 2.0071942806243896, "learning_rate": 2.250096215522771e-06, "loss": 0.7571, "step": 46975 }, { "epoch": 0.5725567620927937, "grad_norm": 1.7709094285964966, "learning_rate": 2.249775497113534e-06, "loss": 0.764, "step": 46980 }, { "epoch": 0.5726176983169415, "grad_norm": 1.9704328775405884, "learning_rate": 2.2494547787042976e-06, "loss": 0.8233, "step": 46985 }, { "epoch": 0.5726786345410892, "grad_norm": 1.7019670009613037, "learning_rate": 2.249134060295061e-06, "loss": 0.8684, "step": 46990 }, { "epoch": 0.5727395707652371, "grad_norm": 1.8983043432235718, "learning_rate": 2.2488133418858245e-06, "loss": 0.7966, "step": 46995 }, { "epoch": 0.5728005069893849, "grad_norm": 1.7508488893508911, "learning_rate": 2.248492623476588e-06, "loss": 0.8545, "step": 47000 }, { "epoch": 0.5728614432135327, "grad_norm": 1.603040099143982, "learning_rate": 2.2481719050673514e-06, "loss": 0.7626, "step": 47005 }, { "epoch": 0.5729223794376805, "grad_norm": 2.2118868827819824, "learning_rate": 2.2478511866581144e-06, "loss": 0.8313, "step": 47010 }, { "epoch": 0.5729833156618284, "grad_norm": 2.0103495121002197, "learning_rate": 2.247530468248878e-06, "loss": 0.885, "step": 47015 }, { "epoch": 0.5730442518859762, "grad_norm": 2.13848614692688, "learning_rate": 2.247209749839641e-06, "loss": 0.9468, "step": 47020 }, { "epoch": 0.5731051881101239, "grad_norm": 1.864274263381958, "learning_rate": 2.2468890314304043e-06, "loss": 0.828, "step": 47025 }, { "epoch": 0.5731661243342717, "grad_norm": 2.3885080814361572, "learning_rate": 2.2465683130211677e-06, "loss": 0.8031, "step": 47030 }, { "epoch": 0.5732270605584195, "grad_norm": 1.8019083738327026, "learning_rate": 2.2462475946119307e-06, "loss": 0.8717, "step": 47035 }, { "epoch": 0.5732879967825674, "grad_norm": 1.7757906913757324, "learning_rate": 2.245926876202694e-06, "loss": 0.8951, "step": 47040 }, { "epoch": 0.5733489330067152, "grad_norm": 1.6696476936340332, "learning_rate": 2.2456061577934576e-06, "loss": 0.8493, "step": 47045 }, { "epoch": 0.573409869230863, "grad_norm": 1.9588500261306763, "learning_rate": 2.2452854393842206e-06, "loss": 0.8567, "step": 47050 }, { "epoch": 0.5734708054550108, "grad_norm": 1.7770169973373413, "learning_rate": 2.244964720974984e-06, "loss": 0.8219, "step": 47055 }, { "epoch": 0.5735317416791585, "grad_norm": 1.7072302103042603, "learning_rate": 2.2446440025657475e-06, "loss": 0.7682, "step": 47060 }, { "epoch": 0.5735926779033064, "grad_norm": 2.1063668727874756, "learning_rate": 2.2443232841565105e-06, "loss": 0.8937, "step": 47065 }, { "epoch": 0.5736536141274542, "grad_norm": 2.0892622470855713, "learning_rate": 2.244002565747274e-06, "loss": 0.7956, "step": 47070 }, { "epoch": 0.573714550351602, "grad_norm": 1.9652055501937866, "learning_rate": 2.2436818473380374e-06, "loss": 0.8003, "step": 47075 }, { "epoch": 0.5737754865757498, "grad_norm": 2.18943452835083, "learning_rate": 2.243361128928801e-06, "loss": 0.824, "step": 47080 }, { "epoch": 0.5738364227998977, "grad_norm": 1.8807405233383179, "learning_rate": 2.2430404105195643e-06, "loss": 0.7923, "step": 47085 }, { "epoch": 0.5738973590240455, "grad_norm": 2.115684747695923, "learning_rate": 2.2427196921103273e-06, "loss": 0.8494, "step": 47090 }, { "epoch": 0.5739582952481932, "grad_norm": 1.7671098709106445, "learning_rate": 2.2423989737010908e-06, "loss": 0.8235, "step": 47095 }, { "epoch": 0.574019231472341, "grad_norm": 1.9417331218719482, "learning_rate": 2.2420782552918538e-06, "loss": 0.7615, "step": 47100 }, { "epoch": 0.5740801676964888, "grad_norm": 2.033505916595459, "learning_rate": 2.241757536882617e-06, "loss": 0.818, "step": 47105 }, { "epoch": 0.5741411039206367, "grad_norm": 1.9909141063690186, "learning_rate": 2.2414368184733807e-06, "loss": 0.7758, "step": 47110 }, { "epoch": 0.5742020401447845, "grad_norm": 2.1937642097473145, "learning_rate": 2.2411161000641437e-06, "loss": 0.8081, "step": 47115 }, { "epoch": 0.5742629763689323, "grad_norm": 1.7270281314849854, "learning_rate": 2.240795381654907e-06, "loss": 0.7664, "step": 47120 }, { "epoch": 0.5743239125930801, "grad_norm": 2.7394702434539795, "learning_rate": 2.2404746632456705e-06, "loss": 0.8216, "step": 47125 }, { "epoch": 0.5743848488172278, "grad_norm": 2.199720859527588, "learning_rate": 2.2401539448364336e-06, "loss": 0.913, "step": 47130 }, { "epoch": 0.5744457850413757, "grad_norm": 2.0133473873138428, "learning_rate": 2.239833226427197e-06, "loss": 0.8581, "step": 47135 }, { "epoch": 0.5745067212655235, "grad_norm": 2.23717999458313, "learning_rate": 2.2395125080179604e-06, "loss": 0.8287, "step": 47140 }, { "epoch": 0.5745676574896713, "grad_norm": 1.9070662260055542, "learning_rate": 2.239191789608724e-06, "loss": 0.8773, "step": 47145 }, { "epoch": 0.5746285937138191, "grad_norm": 1.8701460361480713, "learning_rate": 2.238871071199487e-06, "loss": 0.7593, "step": 47150 }, { "epoch": 0.574689529937967, "grad_norm": 1.8350210189819336, "learning_rate": 2.2385503527902503e-06, "loss": 0.8923, "step": 47155 }, { "epoch": 0.5747504661621148, "grad_norm": 2.0802886486053467, "learning_rate": 2.2382296343810138e-06, "loss": 0.8632, "step": 47160 }, { "epoch": 0.5748114023862625, "grad_norm": 1.7901848554611206, "learning_rate": 2.2379089159717772e-06, "loss": 0.7991, "step": 47165 }, { "epoch": 0.5748723386104103, "grad_norm": 1.6663516759872437, "learning_rate": 2.2375881975625402e-06, "loss": 0.8437, "step": 47170 }, { "epoch": 0.5749332748345581, "grad_norm": 2.0603647232055664, "learning_rate": 2.2372674791533037e-06, "loss": 0.865, "step": 47175 }, { "epoch": 0.574994211058706, "grad_norm": 1.8827122449874878, "learning_rate": 2.236946760744067e-06, "loss": 0.743, "step": 47180 }, { "epoch": 0.5750551472828538, "grad_norm": 2.133291006088257, "learning_rate": 2.23662604233483e-06, "loss": 0.8071, "step": 47185 }, { "epoch": 0.5751160835070016, "grad_norm": 2.1254265308380127, "learning_rate": 2.2363053239255936e-06, "loss": 0.8513, "step": 47190 }, { "epoch": 0.5751770197311494, "grad_norm": 2.0179803371429443, "learning_rate": 2.2359846055163566e-06, "loss": 0.836, "step": 47195 }, { "epoch": 0.5752379559552971, "grad_norm": 2.064774990081787, "learning_rate": 2.23566388710712e-06, "loss": 0.8692, "step": 47200 }, { "epoch": 0.575298892179445, "grad_norm": 1.8839198350906372, "learning_rate": 2.2353431686978835e-06, "loss": 0.9149, "step": 47205 }, { "epoch": 0.5753598284035928, "grad_norm": 2.112069606781006, "learning_rate": 2.2350224502886465e-06, "loss": 0.8206, "step": 47210 }, { "epoch": 0.5754207646277406, "grad_norm": 2.01507830619812, "learning_rate": 2.23470173187941e-06, "loss": 0.8361, "step": 47215 }, { "epoch": 0.5754817008518884, "grad_norm": 2.088522434234619, "learning_rate": 2.2343810134701734e-06, "loss": 0.8637, "step": 47220 }, { "epoch": 0.5755426370760363, "grad_norm": 1.8279672861099243, "learning_rate": 2.234060295060937e-06, "loss": 0.8619, "step": 47225 }, { "epoch": 0.5756035733001841, "grad_norm": 1.9101427793502808, "learning_rate": 2.2337395766517003e-06, "loss": 0.7984, "step": 47230 }, { "epoch": 0.5756645095243318, "grad_norm": 2.1435933113098145, "learning_rate": 2.2334188582424633e-06, "loss": 0.8243, "step": 47235 }, { "epoch": 0.5757254457484796, "grad_norm": 1.9760034084320068, "learning_rate": 2.2330981398332267e-06, "loss": 0.8147, "step": 47240 }, { "epoch": 0.5757863819726274, "grad_norm": 1.9260270595550537, "learning_rate": 2.23277742142399e-06, "loss": 0.8258, "step": 47245 }, { "epoch": 0.5758473181967753, "grad_norm": 1.8981492519378662, "learning_rate": 2.232456703014753e-06, "loss": 0.8546, "step": 47250 }, { "epoch": 0.5759082544209231, "grad_norm": 2.0777323246002197, "learning_rate": 2.2321359846055166e-06, "loss": 0.8187, "step": 47255 }, { "epoch": 0.5759691906450709, "grad_norm": 2.0483202934265137, "learning_rate": 2.23181526619628e-06, "loss": 0.8655, "step": 47260 }, { "epoch": 0.5760301268692187, "grad_norm": 1.7913423776626587, "learning_rate": 2.231494547787043e-06, "loss": 0.7788, "step": 47265 }, { "epoch": 0.5760910630933664, "grad_norm": 1.9576265811920166, "learning_rate": 2.2311738293778065e-06, "loss": 0.825, "step": 47270 }, { "epoch": 0.5761519993175143, "grad_norm": 1.8021678924560547, "learning_rate": 2.2308531109685695e-06, "loss": 0.8403, "step": 47275 }, { "epoch": 0.5762129355416621, "grad_norm": 2.3269553184509277, "learning_rate": 2.230532392559333e-06, "loss": 0.8247, "step": 47280 }, { "epoch": 0.5762738717658099, "grad_norm": 2.011883497238159, "learning_rate": 2.2302116741500964e-06, "loss": 0.8697, "step": 47285 }, { "epoch": 0.5763348079899577, "grad_norm": 2.144747734069824, "learning_rate": 2.2298909557408594e-06, "loss": 0.8999, "step": 47290 }, { "epoch": 0.5763957442141056, "grad_norm": 1.9954802989959717, "learning_rate": 2.229570237331623e-06, "loss": 0.8476, "step": 47295 }, { "epoch": 0.5764566804382534, "grad_norm": 2.0618538856506348, "learning_rate": 2.2292495189223863e-06, "loss": 0.8156, "step": 47300 }, { "epoch": 0.5765176166624011, "grad_norm": 1.673414945602417, "learning_rate": 2.2289288005131497e-06, "loss": 0.8631, "step": 47305 }, { "epoch": 0.5765785528865489, "grad_norm": 1.7613978385925293, "learning_rate": 2.228608082103913e-06, "loss": 0.8658, "step": 47310 }, { "epoch": 0.5766394891106967, "grad_norm": 2.1317296028137207, "learning_rate": 2.228287363694676e-06, "loss": 0.7894, "step": 47315 }, { "epoch": 0.5767004253348446, "grad_norm": 2.016538143157959, "learning_rate": 2.2279666452854396e-06, "loss": 0.8279, "step": 47320 }, { "epoch": 0.5767613615589924, "grad_norm": 1.9722260236740112, "learning_rate": 2.227645926876203e-06, "loss": 0.8438, "step": 47325 }, { "epoch": 0.5768222977831402, "grad_norm": 2.0343141555786133, "learning_rate": 2.227325208466966e-06, "loss": 0.8228, "step": 47330 }, { "epoch": 0.576883234007288, "grad_norm": 1.9622544050216675, "learning_rate": 2.2270044900577295e-06, "loss": 0.7701, "step": 47335 }, { "epoch": 0.5769441702314357, "grad_norm": 1.8600789308547974, "learning_rate": 2.226683771648493e-06, "loss": 0.8099, "step": 47340 }, { "epoch": 0.5770051064555836, "grad_norm": 1.8647464513778687, "learning_rate": 2.226363053239256e-06, "loss": 0.7972, "step": 47345 }, { "epoch": 0.5770660426797314, "grad_norm": 2.1326076984405518, "learning_rate": 2.2260423348300194e-06, "loss": 0.8776, "step": 47350 }, { "epoch": 0.5771269789038792, "grad_norm": 2.228563070297241, "learning_rate": 2.2257216164207824e-06, "loss": 0.7807, "step": 47355 }, { "epoch": 0.577187915128027, "grad_norm": 1.8125441074371338, "learning_rate": 2.225400898011546e-06, "loss": 0.858, "step": 47360 }, { "epoch": 0.5772488513521749, "grad_norm": 2.1009562015533447, "learning_rate": 2.2250801796023093e-06, "loss": 0.8378, "step": 47365 }, { "epoch": 0.5773097875763227, "grad_norm": 3.4822099208831787, "learning_rate": 2.2247594611930728e-06, "loss": 0.8524, "step": 47370 }, { "epoch": 0.5773707238004704, "grad_norm": 1.893110752105713, "learning_rate": 2.2244387427838358e-06, "loss": 0.7889, "step": 47375 }, { "epoch": 0.5774316600246182, "grad_norm": 1.8884577751159668, "learning_rate": 2.2241180243745992e-06, "loss": 0.8433, "step": 47380 }, { "epoch": 0.577492596248766, "grad_norm": 2.051547050476074, "learning_rate": 2.2237973059653627e-06, "loss": 0.8543, "step": 47385 }, { "epoch": 0.5775535324729139, "grad_norm": 1.7285373210906982, "learning_rate": 2.223476587556126e-06, "loss": 0.819, "step": 47390 }, { "epoch": 0.5776144686970617, "grad_norm": 1.9479094743728638, "learning_rate": 2.223155869146889e-06, "loss": 0.8392, "step": 47395 }, { "epoch": 0.5776754049212095, "grad_norm": 1.873484492301941, "learning_rate": 2.2228351507376526e-06, "loss": 0.7738, "step": 47400 }, { "epoch": 0.5777363411453573, "grad_norm": 2.143285036087036, "learning_rate": 2.222514432328416e-06, "loss": 0.7544, "step": 47405 }, { "epoch": 0.577797277369505, "grad_norm": 2.0057897567749023, "learning_rate": 2.222193713919179e-06, "loss": 0.9081, "step": 47410 }, { "epoch": 0.5778582135936529, "grad_norm": 1.9087793827056885, "learning_rate": 2.2218729955099425e-06, "loss": 0.7795, "step": 47415 }, { "epoch": 0.5779191498178007, "grad_norm": 1.890323281288147, "learning_rate": 2.221552277100706e-06, "loss": 0.8356, "step": 47420 }, { "epoch": 0.5779800860419485, "grad_norm": 1.8754448890686035, "learning_rate": 2.221231558691469e-06, "loss": 0.8063, "step": 47425 }, { "epoch": 0.5780410222660963, "grad_norm": 1.7851272821426392, "learning_rate": 2.2209108402822324e-06, "loss": 0.8139, "step": 47430 }, { "epoch": 0.5781019584902441, "grad_norm": 1.9322986602783203, "learning_rate": 2.220590121872996e-06, "loss": 0.8101, "step": 47435 }, { "epoch": 0.578162894714392, "grad_norm": 2.0426485538482666, "learning_rate": 2.220269403463759e-06, "loss": 0.7878, "step": 47440 }, { "epoch": 0.5782238309385397, "grad_norm": 2.179583787918091, "learning_rate": 2.2199486850545223e-06, "loss": 0.812, "step": 47445 }, { "epoch": 0.5782847671626875, "grad_norm": 1.8743211030960083, "learning_rate": 2.2196279666452857e-06, "loss": 0.7932, "step": 47450 }, { "epoch": 0.5783457033868353, "grad_norm": 1.9582901000976562, "learning_rate": 2.219307248236049e-06, "loss": 0.841, "step": 47455 }, { "epoch": 0.5784066396109832, "grad_norm": 2.0888094902038574, "learning_rate": 2.218986529826812e-06, "loss": 0.7179, "step": 47460 }, { "epoch": 0.578467575835131, "grad_norm": 1.9396800994873047, "learning_rate": 2.2186658114175756e-06, "loss": 0.8618, "step": 47465 }, { "epoch": 0.5785285120592788, "grad_norm": 2.145982265472412, "learning_rate": 2.218345093008339e-06, "loss": 0.8301, "step": 47470 }, { "epoch": 0.5785894482834265, "grad_norm": 2.0556092262268066, "learning_rate": 2.2180243745991025e-06, "loss": 0.8094, "step": 47475 }, { "epoch": 0.5786503845075743, "grad_norm": 1.7931073904037476, "learning_rate": 2.2177036561898655e-06, "loss": 0.8057, "step": 47480 }, { "epoch": 0.5787113207317222, "grad_norm": 1.6243205070495605, "learning_rate": 2.217382937780629e-06, "loss": 0.8298, "step": 47485 }, { "epoch": 0.57877225695587, "grad_norm": 2.2125306129455566, "learning_rate": 2.217062219371392e-06, "loss": 0.8143, "step": 47490 }, { "epoch": 0.5788331931800178, "grad_norm": 1.858522653579712, "learning_rate": 2.2167415009621554e-06, "loss": 0.8584, "step": 47495 }, { "epoch": 0.5788941294041656, "grad_norm": 1.9474153518676758, "learning_rate": 2.216420782552919e-06, "loss": 0.8116, "step": 47500 }, { "epoch": 0.5789550656283134, "grad_norm": 2.0228803157806396, "learning_rate": 2.216100064143682e-06, "loss": 0.8141, "step": 47505 }, { "epoch": 0.5790160018524612, "grad_norm": 2.116211175918579, "learning_rate": 2.2157793457344453e-06, "loss": 0.9267, "step": 47510 }, { "epoch": 0.579076938076609, "grad_norm": 2.0691416263580322, "learning_rate": 2.2154586273252087e-06, "loss": 0.7754, "step": 47515 }, { "epoch": 0.5791378743007568, "grad_norm": 2.0882060527801514, "learning_rate": 2.2151379089159717e-06, "loss": 0.9403, "step": 47520 }, { "epoch": 0.5791988105249046, "grad_norm": 1.8449324369430542, "learning_rate": 2.214817190506735e-06, "loss": 0.8202, "step": 47525 }, { "epoch": 0.5792597467490525, "grad_norm": 1.8768686056137085, "learning_rate": 2.2144964720974986e-06, "loss": 0.8183, "step": 47530 }, { "epoch": 0.5793206829732003, "grad_norm": 1.8373702764511108, "learning_rate": 2.214175753688262e-06, "loss": 0.9021, "step": 47535 }, { "epoch": 0.5793816191973481, "grad_norm": 2.0043318271636963, "learning_rate": 2.213855035279025e-06, "loss": 0.8032, "step": 47540 }, { "epoch": 0.5794425554214958, "grad_norm": 2.390099048614502, "learning_rate": 2.2135343168697885e-06, "loss": 0.9346, "step": 47545 }, { "epoch": 0.5795034916456436, "grad_norm": 2.1114630699157715, "learning_rate": 2.213213598460552e-06, "loss": 0.7833, "step": 47550 }, { "epoch": 0.5795644278697915, "grad_norm": 1.9375452995300293, "learning_rate": 2.2128928800513154e-06, "loss": 0.8294, "step": 47555 }, { "epoch": 0.5796253640939393, "grad_norm": 4.271149635314941, "learning_rate": 2.2125721616420784e-06, "loss": 0.8749, "step": 47560 }, { "epoch": 0.5796863003180871, "grad_norm": 2.296778440475464, "learning_rate": 2.212251443232842e-06, "loss": 0.7432, "step": 47565 }, { "epoch": 0.5797472365422349, "grad_norm": 2.3811752796173096, "learning_rate": 2.211930724823605e-06, "loss": 0.8368, "step": 47570 }, { "epoch": 0.5798081727663827, "grad_norm": 2.2099828720092773, "learning_rate": 2.2116100064143683e-06, "loss": 0.839, "step": 47575 }, { "epoch": 0.5798691089905305, "grad_norm": 1.8697724342346191, "learning_rate": 2.2112892880051318e-06, "loss": 0.8176, "step": 47580 }, { "epoch": 0.5799300452146783, "grad_norm": 2.036248207092285, "learning_rate": 2.2109685695958948e-06, "loss": 0.825, "step": 47585 }, { "epoch": 0.5799909814388261, "grad_norm": 1.8992762565612793, "learning_rate": 2.2106478511866582e-06, "loss": 0.8816, "step": 47590 }, { "epoch": 0.5800519176629739, "grad_norm": 2.2853753566741943, "learning_rate": 2.2103271327774217e-06, "loss": 0.8565, "step": 47595 }, { "epoch": 0.5801128538871217, "grad_norm": 2.104984998703003, "learning_rate": 2.2100064143681847e-06, "loss": 0.8068, "step": 47600 }, { "epoch": 0.5801737901112696, "grad_norm": 2.168212652206421, "learning_rate": 2.209685695958948e-06, "loss": 0.8872, "step": 47605 }, { "epoch": 0.5802347263354174, "grad_norm": 2.1141304969787598, "learning_rate": 2.2093649775497116e-06, "loss": 0.809, "step": 47610 }, { "epoch": 0.5802956625595651, "grad_norm": 2.306769847869873, "learning_rate": 2.209044259140475e-06, "loss": 0.8691, "step": 47615 }, { "epoch": 0.5803565987837129, "grad_norm": 2.049274444580078, "learning_rate": 2.2087235407312384e-06, "loss": 0.9013, "step": 47620 }, { "epoch": 0.5804175350078608, "grad_norm": 1.9543567895889282, "learning_rate": 2.2084028223220014e-06, "loss": 0.8242, "step": 47625 }, { "epoch": 0.5804784712320086, "grad_norm": 2.2716195583343506, "learning_rate": 2.208082103912765e-06, "loss": 0.8954, "step": 47630 }, { "epoch": 0.5805394074561564, "grad_norm": 2.847050428390503, "learning_rate": 2.2077613855035283e-06, "loss": 0.8764, "step": 47635 }, { "epoch": 0.5806003436803042, "grad_norm": 2.1712799072265625, "learning_rate": 2.2074406670942913e-06, "loss": 0.8207, "step": 47640 }, { "epoch": 0.580661279904452, "grad_norm": 1.75460946559906, "learning_rate": 2.2071199486850548e-06, "loss": 0.7939, "step": 47645 }, { "epoch": 0.5807222161285998, "grad_norm": 1.9846670627593994, "learning_rate": 2.206799230275818e-06, "loss": 0.8129, "step": 47650 }, { "epoch": 0.5807831523527476, "grad_norm": 2.1179068088531494, "learning_rate": 2.2064785118665812e-06, "loss": 0.8211, "step": 47655 }, { "epoch": 0.5808440885768954, "grad_norm": 1.8967907428741455, "learning_rate": 2.2061577934573447e-06, "loss": 0.8466, "step": 47660 }, { "epoch": 0.5809050248010432, "grad_norm": 2.359097480773926, "learning_rate": 2.2058370750481077e-06, "loss": 0.8719, "step": 47665 }, { "epoch": 0.580965961025191, "grad_norm": 1.7889143228530884, "learning_rate": 2.205516356638871e-06, "loss": 0.7576, "step": 47670 }, { "epoch": 0.5810268972493389, "grad_norm": 2.157142400741577, "learning_rate": 2.2051956382296346e-06, "loss": 0.7753, "step": 47675 }, { "epoch": 0.5810878334734867, "grad_norm": 1.8551034927368164, "learning_rate": 2.2048749198203976e-06, "loss": 0.7389, "step": 47680 }, { "epoch": 0.5811487696976344, "grad_norm": 2.0443320274353027, "learning_rate": 2.204554201411161e-06, "loss": 0.8457, "step": 47685 }, { "epoch": 0.5812097059217822, "grad_norm": 1.6073204278945923, "learning_rate": 2.2042334830019245e-06, "loss": 0.8175, "step": 47690 }, { "epoch": 0.58127064214593, "grad_norm": 2.2542929649353027, "learning_rate": 2.203912764592688e-06, "loss": 0.8056, "step": 47695 }, { "epoch": 0.5813315783700779, "grad_norm": 1.9668688774108887, "learning_rate": 2.2035920461834514e-06, "loss": 0.8707, "step": 47700 }, { "epoch": 0.5813925145942257, "grad_norm": 1.962602972984314, "learning_rate": 2.2032713277742144e-06, "loss": 0.7057, "step": 47705 }, { "epoch": 0.5814534508183735, "grad_norm": 2.1088244915008545, "learning_rate": 2.202950609364978e-06, "loss": 0.9069, "step": 47710 }, { "epoch": 0.5815143870425213, "grad_norm": 1.9016884565353394, "learning_rate": 2.2026298909557413e-06, "loss": 0.7416, "step": 47715 }, { "epoch": 0.581575323266669, "grad_norm": 2.122163772583008, "learning_rate": 2.2023091725465043e-06, "loss": 0.9017, "step": 47720 }, { "epoch": 0.5816362594908169, "grad_norm": 1.5381760597229004, "learning_rate": 2.2019884541372677e-06, "loss": 0.7873, "step": 47725 }, { "epoch": 0.5816971957149647, "grad_norm": 2.2088756561279297, "learning_rate": 2.2016677357280307e-06, "loss": 0.8895, "step": 47730 }, { "epoch": 0.5817581319391125, "grad_norm": 1.878679871559143, "learning_rate": 2.201347017318794e-06, "loss": 0.7817, "step": 47735 }, { "epoch": 0.5818190681632603, "grad_norm": 1.9401817321777344, "learning_rate": 2.2010262989095576e-06, "loss": 0.7749, "step": 47740 }, { "epoch": 0.5818800043874082, "grad_norm": 2.0124447345733643, "learning_rate": 2.2007055805003206e-06, "loss": 0.7621, "step": 47745 }, { "epoch": 0.581940940611556, "grad_norm": 2.001065492630005, "learning_rate": 2.200384862091084e-06, "loss": 0.8162, "step": 47750 }, { "epoch": 0.5820018768357037, "grad_norm": 1.9451324939727783, "learning_rate": 2.2000641436818475e-06, "loss": 0.8557, "step": 47755 }, { "epoch": 0.5820628130598515, "grad_norm": 1.7092865705490112, "learning_rate": 2.199743425272611e-06, "loss": 0.7908, "step": 47760 }, { "epoch": 0.5821237492839993, "grad_norm": 1.9854693412780762, "learning_rate": 2.199422706863374e-06, "loss": 0.8129, "step": 47765 }, { "epoch": 0.5821846855081472, "grad_norm": 1.6863347291946411, "learning_rate": 2.1991019884541374e-06, "loss": 0.8033, "step": 47770 }, { "epoch": 0.582245621732295, "grad_norm": 1.857620358467102, "learning_rate": 2.198781270044901e-06, "loss": 0.8517, "step": 47775 }, { "epoch": 0.5823065579564428, "grad_norm": 1.975942611694336, "learning_rate": 2.1984605516356643e-06, "loss": 0.8178, "step": 47780 }, { "epoch": 0.5823674941805906, "grad_norm": 1.8422085046768188, "learning_rate": 2.1981398332264273e-06, "loss": 0.8315, "step": 47785 }, { "epoch": 0.5824284304047384, "grad_norm": 2.0960774421691895, "learning_rate": 2.1978191148171907e-06, "loss": 0.8055, "step": 47790 }, { "epoch": 0.5824893666288862, "grad_norm": 2.1139307022094727, "learning_rate": 2.197498396407954e-06, "loss": 0.7779, "step": 47795 }, { "epoch": 0.582550302853034, "grad_norm": 1.857606291770935, "learning_rate": 2.197177677998717e-06, "loss": 0.8309, "step": 47800 }, { "epoch": 0.5826112390771818, "grad_norm": 2.100093364715576, "learning_rate": 2.1968569595894806e-06, "loss": 0.8436, "step": 47805 }, { "epoch": 0.5826721753013296, "grad_norm": 1.8126189708709717, "learning_rate": 2.196536241180244e-06, "loss": 0.8599, "step": 47810 }, { "epoch": 0.5827331115254775, "grad_norm": 2.183215618133545, "learning_rate": 2.196215522771007e-06, "loss": 0.7829, "step": 47815 }, { "epoch": 0.5827940477496253, "grad_norm": 1.8531006574630737, "learning_rate": 2.1958948043617705e-06, "loss": 0.8601, "step": 47820 }, { "epoch": 0.582854983973773, "grad_norm": 1.9786728620529175, "learning_rate": 2.1955740859525336e-06, "loss": 0.8562, "step": 47825 }, { "epoch": 0.5829159201979208, "grad_norm": 2.0015408992767334, "learning_rate": 2.195253367543297e-06, "loss": 0.8601, "step": 47830 }, { "epoch": 0.5829768564220686, "grad_norm": 1.6585668325424194, "learning_rate": 2.1949326491340604e-06, "loss": 0.8239, "step": 47835 }, { "epoch": 0.5830377926462165, "grad_norm": 1.9910839796066284, "learning_rate": 2.194611930724824e-06, "loss": 0.7843, "step": 47840 }, { "epoch": 0.5830987288703643, "grad_norm": 1.9132028818130493, "learning_rate": 2.1942912123155873e-06, "loss": 0.8145, "step": 47845 }, { "epoch": 0.5831596650945121, "grad_norm": 2.091137647628784, "learning_rate": 2.1939704939063503e-06, "loss": 0.8314, "step": 47850 }, { "epoch": 0.5832206013186599, "grad_norm": 2.2697415351867676, "learning_rate": 2.1936497754971138e-06, "loss": 0.8896, "step": 47855 }, { "epoch": 0.5832815375428076, "grad_norm": 1.8074018955230713, "learning_rate": 2.1933290570878772e-06, "loss": 0.7593, "step": 47860 }, { "epoch": 0.5833424737669555, "grad_norm": 2.0557847023010254, "learning_rate": 2.1930083386786402e-06, "loss": 0.8604, "step": 47865 }, { "epoch": 0.5834034099911033, "grad_norm": 2.259510040283203, "learning_rate": 2.1926876202694037e-06, "loss": 0.7628, "step": 47870 }, { "epoch": 0.5834643462152511, "grad_norm": 1.8496758937835693, "learning_rate": 2.192366901860167e-06, "loss": 0.7452, "step": 47875 }, { "epoch": 0.5835252824393989, "grad_norm": 1.9624696969985962, "learning_rate": 2.19204618345093e-06, "loss": 0.8559, "step": 47880 }, { "epoch": 0.5835862186635468, "grad_norm": 2.0570738315582275, "learning_rate": 2.1917254650416936e-06, "loss": 0.858, "step": 47885 }, { "epoch": 0.5836471548876946, "grad_norm": 1.8078229427337646, "learning_rate": 2.191404746632457e-06, "loss": 0.7594, "step": 47890 }, { "epoch": 0.5837080911118423, "grad_norm": 2.2938430309295654, "learning_rate": 2.19108402822322e-06, "loss": 0.8233, "step": 47895 }, { "epoch": 0.5837690273359901, "grad_norm": 2.175973892211914, "learning_rate": 2.1907633098139835e-06, "loss": 0.8477, "step": 47900 }, { "epoch": 0.5838299635601379, "grad_norm": 1.8006260395050049, "learning_rate": 2.1904425914047465e-06, "loss": 0.8981, "step": 47905 }, { "epoch": 0.5838908997842858, "grad_norm": 2.15179181098938, "learning_rate": 2.19012187299551e-06, "loss": 0.8371, "step": 47910 }, { "epoch": 0.5839518360084336, "grad_norm": 1.6572288274765015, "learning_rate": 2.1898011545862734e-06, "loss": 0.8251, "step": 47915 }, { "epoch": 0.5840127722325814, "grad_norm": 1.9034491777420044, "learning_rate": 2.189480436177037e-06, "loss": 0.861, "step": 47920 }, { "epoch": 0.5840737084567292, "grad_norm": 2.8550703525543213, "learning_rate": 2.1891597177678002e-06, "loss": 0.8254, "step": 47925 }, { "epoch": 0.584134644680877, "grad_norm": 1.900664210319519, "learning_rate": 2.1888389993585637e-06, "loss": 0.7862, "step": 47930 }, { "epoch": 0.5841955809050248, "grad_norm": 1.7522766590118408, "learning_rate": 2.1885182809493267e-06, "loss": 0.7433, "step": 47935 }, { "epoch": 0.5842565171291726, "grad_norm": 1.96455717086792, "learning_rate": 2.18819756254009e-06, "loss": 0.7457, "step": 47940 }, { "epoch": 0.5843174533533204, "grad_norm": 1.813214898109436, "learning_rate": 2.187876844130853e-06, "loss": 0.7557, "step": 47945 }, { "epoch": 0.5843783895774682, "grad_norm": 1.8683444261550903, "learning_rate": 2.1875561257216166e-06, "loss": 0.8286, "step": 47950 }, { "epoch": 0.5844393258016161, "grad_norm": 1.8102309703826904, "learning_rate": 2.18723540731238e-06, "loss": 0.7951, "step": 47955 }, { "epoch": 0.5845002620257639, "grad_norm": 1.8282853364944458, "learning_rate": 2.186914688903143e-06, "loss": 0.836, "step": 47960 }, { "epoch": 0.5845611982499116, "grad_norm": 1.8478060960769653, "learning_rate": 2.1865939704939065e-06, "loss": 0.7381, "step": 47965 }, { "epoch": 0.5846221344740594, "grad_norm": 2.2894070148468018, "learning_rate": 2.18627325208467e-06, "loss": 0.9214, "step": 47970 }, { "epoch": 0.5846830706982072, "grad_norm": 2.0135247707366943, "learning_rate": 2.185952533675433e-06, "loss": 0.7552, "step": 47975 }, { "epoch": 0.5847440069223551, "grad_norm": 1.8966832160949707, "learning_rate": 2.1856318152661964e-06, "loss": 0.7894, "step": 47980 }, { "epoch": 0.5848049431465029, "grad_norm": 2.1841013431549072, "learning_rate": 2.18531109685696e-06, "loss": 0.8619, "step": 47985 }, { "epoch": 0.5848658793706507, "grad_norm": 2.2522389888763428, "learning_rate": 2.184990378447723e-06, "loss": 0.8243, "step": 47990 }, { "epoch": 0.5849268155947985, "grad_norm": 1.7820740938186646, "learning_rate": 2.1846696600384863e-06, "loss": 0.7473, "step": 47995 }, { "epoch": 0.5849877518189462, "grad_norm": 2.3770627975463867, "learning_rate": 2.1843489416292497e-06, "loss": 0.8295, "step": 48000 }, { "epoch": 0.5850486880430941, "grad_norm": 1.8907824754714966, "learning_rate": 2.184028223220013e-06, "loss": 0.8141, "step": 48005 }, { "epoch": 0.5851096242672419, "grad_norm": 2.1839182376861572, "learning_rate": 2.1837075048107766e-06, "loss": 0.8192, "step": 48010 }, { "epoch": 0.5851705604913897, "grad_norm": 2.036034107208252, "learning_rate": 2.1833867864015396e-06, "loss": 0.8552, "step": 48015 }, { "epoch": 0.5852314967155375, "grad_norm": 1.8169946670532227, "learning_rate": 2.183066067992303e-06, "loss": 0.8339, "step": 48020 }, { "epoch": 0.5852924329396854, "grad_norm": 2.1167051792144775, "learning_rate": 2.182745349583066e-06, "loss": 0.848, "step": 48025 }, { "epoch": 0.5853533691638332, "grad_norm": 2.6721251010894775, "learning_rate": 2.1824246311738295e-06, "loss": 0.8722, "step": 48030 }, { "epoch": 0.5854143053879809, "grad_norm": 1.9313210248947144, "learning_rate": 2.182103912764593e-06, "loss": 0.8074, "step": 48035 }, { "epoch": 0.5854752416121287, "grad_norm": 2.13560152053833, "learning_rate": 2.181783194355356e-06, "loss": 0.8555, "step": 48040 }, { "epoch": 0.5855361778362765, "grad_norm": 2.1491212844848633, "learning_rate": 2.1814624759461194e-06, "loss": 0.8937, "step": 48045 }, { "epoch": 0.5855971140604244, "grad_norm": 2.147469997406006, "learning_rate": 2.181141757536883e-06, "loss": 0.83, "step": 48050 }, { "epoch": 0.5856580502845722, "grad_norm": 1.9707834720611572, "learning_rate": 2.180821039127646e-06, "loss": 0.8117, "step": 48055 }, { "epoch": 0.58571898650872, "grad_norm": 2.071274995803833, "learning_rate": 2.1805003207184093e-06, "loss": 0.8294, "step": 48060 }, { "epoch": 0.5857799227328678, "grad_norm": 3.543811321258545, "learning_rate": 2.1801796023091728e-06, "loss": 0.8275, "step": 48065 }, { "epoch": 0.5858408589570155, "grad_norm": 1.892647624015808, "learning_rate": 2.179858883899936e-06, "loss": 0.8089, "step": 48070 }, { "epoch": 0.5859017951811634, "grad_norm": 2.0881099700927734, "learning_rate": 2.1795381654906992e-06, "loss": 0.7991, "step": 48075 }, { "epoch": 0.5859627314053112, "grad_norm": 2.0847012996673584, "learning_rate": 2.1792174470814627e-06, "loss": 0.8547, "step": 48080 }, { "epoch": 0.586023667629459, "grad_norm": 2.064736843109131, "learning_rate": 2.178896728672226e-06, "loss": 0.8031, "step": 48085 }, { "epoch": 0.5860846038536068, "grad_norm": 1.9022680521011353, "learning_rate": 2.1785760102629895e-06, "loss": 0.853, "step": 48090 }, { "epoch": 0.5861455400777547, "grad_norm": 1.8197290897369385, "learning_rate": 2.1782552918537526e-06, "loss": 0.8151, "step": 48095 }, { "epoch": 0.5862064763019025, "grad_norm": 2.1575441360473633, "learning_rate": 2.177934573444516e-06, "loss": 0.7686, "step": 48100 }, { "epoch": 0.5862674125260502, "grad_norm": 1.9314149618148804, "learning_rate": 2.1776138550352794e-06, "loss": 0.8863, "step": 48105 }, { "epoch": 0.586328348750198, "grad_norm": 1.847859263420105, "learning_rate": 2.1772931366260425e-06, "loss": 0.8227, "step": 48110 }, { "epoch": 0.5863892849743458, "grad_norm": 1.649359941482544, "learning_rate": 2.176972418216806e-06, "loss": 0.7946, "step": 48115 }, { "epoch": 0.5864502211984937, "grad_norm": 1.697322964668274, "learning_rate": 2.176651699807569e-06, "loss": 0.8246, "step": 48120 }, { "epoch": 0.5865111574226415, "grad_norm": 2.288137197494507, "learning_rate": 2.1763309813983323e-06, "loss": 0.7965, "step": 48125 }, { "epoch": 0.5865720936467893, "grad_norm": 2.0770530700683594, "learning_rate": 2.176010262989096e-06, "loss": 0.8684, "step": 48130 }, { "epoch": 0.5866330298709371, "grad_norm": 2.0753531455993652, "learning_rate": 2.175689544579859e-06, "loss": 0.8465, "step": 48135 }, { "epoch": 0.5866939660950848, "grad_norm": 1.8385131359100342, "learning_rate": 2.1753688261706222e-06, "loss": 0.8649, "step": 48140 }, { "epoch": 0.5867549023192327, "grad_norm": 2.031278133392334, "learning_rate": 2.1750481077613857e-06, "loss": 0.8905, "step": 48145 }, { "epoch": 0.5868158385433805, "grad_norm": 2.063509464263916, "learning_rate": 2.174727389352149e-06, "loss": 0.8444, "step": 48150 }, { "epoch": 0.5868767747675283, "grad_norm": 2.09444260597229, "learning_rate": 2.1744066709429126e-06, "loss": 0.7729, "step": 48155 }, { "epoch": 0.5869377109916761, "grad_norm": 2.0619564056396484, "learning_rate": 2.1740859525336756e-06, "loss": 0.8388, "step": 48160 }, { "epoch": 0.586998647215824, "grad_norm": 1.9209239482879639, "learning_rate": 2.173765234124439e-06, "loss": 0.8341, "step": 48165 }, { "epoch": 0.5870595834399718, "grad_norm": 1.7749927043914795, "learning_rate": 2.1734445157152025e-06, "loss": 0.8053, "step": 48170 }, { "epoch": 0.5871205196641195, "grad_norm": 1.707335114479065, "learning_rate": 2.1731237973059655e-06, "loss": 0.9043, "step": 48175 }, { "epoch": 0.5871814558882673, "grad_norm": 1.9405440092086792, "learning_rate": 2.172803078896729e-06, "loss": 0.8523, "step": 48180 }, { "epoch": 0.5872423921124151, "grad_norm": 1.9738818407058716, "learning_rate": 2.1724823604874924e-06, "loss": 0.8004, "step": 48185 }, { "epoch": 0.587303328336563, "grad_norm": 2.22114634513855, "learning_rate": 2.1721616420782554e-06, "loss": 0.8359, "step": 48190 }, { "epoch": 0.5873642645607108, "grad_norm": 2.1557393074035645, "learning_rate": 2.171840923669019e-06, "loss": 0.8063, "step": 48195 }, { "epoch": 0.5874252007848586, "grad_norm": 2.8506927490234375, "learning_rate": 2.171520205259782e-06, "loss": 0.7905, "step": 48200 }, { "epoch": 0.5874861370090064, "grad_norm": 1.8294845819473267, "learning_rate": 2.1711994868505453e-06, "loss": 0.8534, "step": 48205 }, { "epoch": 0.5875470732331541, "grad_norm": 1.9566421508789062, "learning_rate": 2.1708787684413087e-06, "loss": 0.8103, "step": 48210 }, { "epoch": 0.587608009457302, "grad_norm": 2.0169179439544678, "learning_rate": 2.1705580500320717e-06, "loss": 0.8317, "step": 48215 }, { "epoch": 0.5876689456814498, "grad_norm": 1.5835367441177368, "learning_rate": 2.170237331622835e-06, "loss": 0.859, "step": 48220 }, { "epoch": 0.5877298819055976, "grad_norm": 1.9977344274520874, "learning_rate": 2.1699166132135986e-06, "loss": 0.8488, "step": 48225 }, { "epoch": 0.5877908181297454, "grad_norm": 1.6719003915786743, "learning_rate": 2.169595894804362e-06, "loss": 0.7997, "step": 48230 }, { "epoch": 0.5878517543538933, "grad_norm": 1.9227079153060913, "learning_rate": 2.1692751763951255e-06, "loss": 0.8131, "step": 48235 }, { "epoch": 0.5879126905780411, "grad_norm": 2.0577218532562256, "learning_rate": 2.1689544579858885e-06, "loss": 0.8192, "step": 48240 }, { "epoch": 0.5879736268021888, "grad_norm": 1.803305745124817, "learning_rate": 2.168633739576652e-06, "loss": 0.7888, "step": 48245 }, { "epoch": 0.5880345630263366, "grad_norm": 2.054767370223999, "learning_rate": 2.1683130211674154e-06, "loss": 0.7991, "step": 48250 }, { "epoch": 0.5880954992504844, "grad_norm": 1.9951468706130981, "learning_rate": 2.1679923027581784e-06, "loss": 0.7365, "step": 48255 }, { "epoch": 0.5881564354746323, "grad_norm": 1.8410651683807373, "learning_rate": 2.167671584348942e-06, "loss": 0.8231, "step": 48260 }, { "epoch": 0.5882173716987801, "grad_norm": 2.1291351318359375, "learning_rate": 2.1673508659397053e-06, "loss": 0.8167, "step": 48265 }, { "epoch": 0.5882783079229279, "grad_norm": 1.7605403661727905, "learning_rate": 2.1670301475304683e-06, "loss": 0.8143, "step": 48270 }, { "epoch": 0.5883392441470757, "grad_norm": 1.5897107124328613, "learning_rate": 2.1667094291212317e-06, "loss": 0.8366, "step": 48275 }, { "epoch": 0.5884001803712234, "grad_norm": 2.018038034439087, "learning_rate": 2.1663887107119948e-06, "loss": 0.8702, "step": 48280 }, { "epoch": 0.5884611165953713, "grad_norm": 2.9266772270202637, "learning_rate": 2.166067992302758e-06, "loss": 0.8858, "step": 48285 }, { "epoch": 0.5885220528195191, "grad_norm": 2.2368557453155518, "learning_rate": 2.1657472738935216e-06, "loss": 0.7845, "step": 48290 }, { "epoch": 0.5885829890436669, "grad_norm": 2.110527992248535, "learning_rate": 2.165426555484285e-06, "loss": 0.8996, "step": 48295 }, { "epoch": 0.5886439252678147, "grad_norm": 1.708096981048584, "learning_rate": 2.165105837075048e-06, "loss": 0.8608, "step": 48300 }, { "epoch": 0.5887048614919625, "grad_norm": 2.153379201889038, "learning_rate": 2.1647851186658115e-06, "loss": 0.7548, "step": 48305 }, { "epoch": 0.5887657977161104, "grad_norm": 1.9564918279647827, "learning_rate": 2.164464400256575e-06, "loss": 0.8518, "step": 48310 }, { "epoch": 0.5888267339402581, "grad_norm": 2.13645339012146, "learning_rate": 2.1641436818473384e-06, "loss": 0.8259, "step": 48315 }, { "epoch": 0.5888876701644059, "grad_norm": 1.8715996742248535, "learning_rate": 2.1638229634381014e-06, "loss": 0.8063, "step": 48320 }, { "epoch": 0.5889486063885537, "grad_norm": 2.002337694168091, "learning_rate": 2.163502245028865e-06, "loss": 0.8055, "step": 48325 }, { "epoch": 0.5890095426127016, "grad_norm": 2.0306341648101807, "learning_rate": 2.1631815266196283e-06, "loss": 0.768, "step": 48330 }, { "epoch": 0.5890704788368494, "grad_norm": 1.655934453010559, "learning_rate": 2.1628608082103913e-06, "loss": 0.7945, "step": 48335 }, { "epoch": 0.5891314150609972, "grad_norm": 1.8391714096069336, "learning_rate": 2.1625400898011548e-06, "loss": 0.8287, "step": 48340 }, { "epoch": 0.589192351285145, "grad_norm": 1.8393429517745972, "learning_rate": 2.1622193713919182e-06, "loss": 0.8113, "step": 48345 }, { "epoch": 0.5892532875092927, "grad_norm": 2.3887574672698975, "learning_rate": 2.1618986529826812e-06, "loss": 0.8425, "step": 48350 }, { "epoch": 0.5893142237334406, "grad_norm": 1.9178320169448853, "learning_rate": 2.1615779345734447e-06, "loss": 0.8787, "step": 48355 }, { "epoch": 0.5893751599575884, "grad_norm": 2.232060432434082, "learning_rate": 2.161257216164208e-06, "loss": 0.872, "step": 48360 }, { "epoch": 0.5894360961817362, "grad_norm": 1.7568974494934082, "learning_rate": 2.160936497754971e-06, "loss": 0.8318, "step": 48365 }, { "epoch": 0.589497032405884, "grad_norm": 1.9007236957550049, "learning_rate": 2.1606157793457346e-06, "loss": 0.8616, "step": 48370 }, { "epoch": 0.5895579686300318, "grad_norm": 1.8786036968231201, "learning_rate": 2.160295060936498e-06, "loss": 0.8675, "step": 48375 }, { "epoch": 0.5896189048541797, "grad_norm": 1.881718397140503, "learning_rate": 2.159974342527261e-06, "loss": 0.8149, "step": 48380 }, { "epoch": 0.5896798410783274, "grad_norm": 2.0670065879821777, "learning_rate": 2.1596536241180245e-06, "loss": 0.7802, "step": 48385 }, { "epoch": 0.5897407773024752, "grad_norm": 1.9251421689987183, "learning_rate": 2.159332905708788e-06, "loss": 0.7753, "step": 48390 }, { "epoch": 0.589801713526623, "grad_norm": 1.7885854244232178, "learning_rate": 2.1590121872995513e-06, "loss": 0.8564, "step": 48395 }, { "epoch": 0.5898626497507709, "grad_norm": 1.7967860698699951, "learning_rate": 2.158691468890315e-06, "loss": 0.7819, "step": 48400 }, { "epoch": 0.5899235859749187, "grad_norm": 2.128037929534912, "learning_rate": 2.158370750481078e-06, "loss": 0.8305, "step": 48405 }, { "epoch": 0.5899845221990665, "grad_norm": 2.2989330291748047, "learning_rate": 2.1580500320718412e-06, "loss": 0.8368, "step": 48410 }, { "epoch": 0.5900454584232142, "grad_norm": 1.809643030166626, "learning_rate": 2.1577293136626043e-06, "loss": 0.8529, "step": 48415 }, { "epoch": 0.590106394647362, "grad_norm": 1.8533612489700317, "learning_rate": 2.1574085952533677e-06, "loss": 0.7608, "step": 48420 }, { "epoch": 0.5901673308715099, "grad_norm": 2.1731204986572266, "learning_rate": 2.157087876844131e-06, "loss": 0.8027, "step": 48425 }, { "epoch": 0.5902282670956577, "grad_norm": 2.749114990234375, "learning_rate": 2.156767158434894e-06, "loss": 0.8331, "step": 48430 }, { "epoch": 0.5902892033198055, "grad_norm": 2.273057460784912, "learning_rate": 2.1564464400256576e-06, "loss": 0.7454, "step": 48435 }, { "epoch": 0.5903501395439533, "grad_norm": 1.8493032455444336, "learning_rate": 2.156125721616421e-06, "loss": 0.8246, "step": 48440 }, { "epoch": 0.5904110757681011, "grad_norm": 1.892495036125183, "learning_rate": 2.155805003207184e-06, "loss": 0.8421, "step": 48445 }, { "epoch": 0.5904720119922489, "grad_norm": 1.98349130153656, "learning_rate": 2.1554842847979475e-06, "loss": 0.846, "step": 48450 }, { "epoch": 0.5905329482163967, "grad_norm": 2.1547460556030273, "learning_rate": 2.155163566388711e-06, "loss": 0.8735, "step": 48455 }, { "epoch": 0.5905938844405445, "grad_norm": 2.1720423698425293, "learning_rate": 2.1548428479794744e-06, "loss": 0.9099, "step": 48460 }, { "epoch": 0.5906548206646923, "grad_norm": 1.9045106172561646, "learning_rate": 2.1545221295702374e-06, "loss": 0.8093, "step": 48465 }, { "epoch": 0.5907157568888401, "grad_norm": 1.8520852327346802, "learning_rate": 2.154201411161001e-06, "loss": 0.8285, "step": 48470 }, { "epoch": 0.590776693112988, "grad_norm": 2.1730661392211914, "learning_rate": 2.1538806927517643e-06, "loss": 0.8121, "step": 48475 }, { "epoch": 0.5908376293371358, "grad_norm": 1.9095499515533447, "learning_rate": 2.1535599743425277e-06, "loss": 0.7738, "step": 48480 }, { "epoch": 0.5908985655612835, "grad_norm": 1.9373823404312134, "learning_rate": 2.1532392559332907e-06, "loss": 0.8384, "step": 48485 }, { "epoch": 0.5909595017854313, "grad_norm": 2.0974459648132324, "learning_rate": 2.152918537524054e-06, "loss": 0.8298, "step": 48490 }, { "epoch": 0.5910204380095792, "grad_norm": 1.9624783992767334, "learning_rate": 2.152597819114817e-06, "loss": 0.842, "step": 48495 }, { "epoch": 0.591081374233727, "grad_norm": 2.0573935508728027, "learning_rate": 2.1522771007055806e-06, "loss": 0.7924, "step": 48500 }, { "epoch": 0.5911423104578748, "grad_norm": 1.8263487815856934, "learning_rate": 2.151956382296344e-06, "loss": 0.8222, "step": 48505 }, { "epoch": 0.5912032466820226, "grad_norm": 1.8802006244659424, "learning_rate": 2.151635663887107e-06, "loss": 0.8249, "step": 48510 }, { "epoch": 0.5912641829061704, "grad_norm": 2.071767807006836, "learning_rate": 2.1513149454778705e-06, "loss": 0.826, "step": 48515 }, { "epoch": 0.5913251191303182, "grad_norm": 2.0667545795440674, "learning_rate": 2.150994227068634e-06, "loss": 0.818, "step": 48520 }, { "epoch": 0.591386055354466, "grad_norm": 2.0983080863952637, "learning_rate": 2.150673508659397e-06, "loss": 0.872, "step": 48525 }, { "epoch": 0.5914469915786138, "grad_norm": 2.4251022338867188, "learning_rate": 2.1503527902501604e-06, "loss": 0.8743, "step": 48530 }, { "epoch": 0.5915079278027616, "grad_norm": 2.141648054122925, "learning_rate": 2.150032071840924e-06, "loss": 0.8349, "step": 48535 }, { "epoch": 0.5915688640269094, "grad_norm": 2.0549421310424805, "learning_rate": 2.1497113534316873e-06, "loss": 0.8568, "step": 48540 }, { "epoch": 0.5916298002510573, "grad_norm": 2.4553050994873047, "learning_rate": 2.1493906350224507e-06, "loss": 0.8204, "step": 48545 }, { "epoch": 0.5916907364752051, "grad_norm": 1.881384015083313, "learning_rate": 2.1490699166132138e-06, "loss": 0.8352, "step": 48550 }, { "epoch": 0.5917516726993528, "grad_norm": 1.8328795433044434, "learning_rate": 2.148749198203977e-06, "loss": 0.8834, "step": 48555 }, { "epoch": 0.5918126089235006, "grad_norm": 2.33981990814209, "learning_rate": 2.1484284797947406e-06, "loss": 0.846, "step": 48560 }, { "epoch": 0.5918735451476484, "grad_norm": 1.8261539936065674, "learning_rate": 2.1481077613855037e-06, "loss": 0.8236, "step": 48565 }, { "epoch": 0.5919344813717963, "grad_norm": 1.7501128911972046, "learning_rate": 2.147787042976267e-06, "loss": 0.8243, "step": 48570 }, { "epoch": 0.5919954175959441, "grad_norm": 1.8364269733428955, "learning_rate": 2.14746632456703e-06, "loss": 0.8569, "step": 48575 }, { "epoch": 0.5920563538200919, "grad_norm": 1.9697821140289307, "learning_rate": 2.1471456061577936e-06, "loss": 0.7709, "step": 48580 }, { "epoch": 0.5921172900442397, "grad_norm": 2.3413076400756836, "learning_rate": 2.146824887748557e-06, "loss": 0.8864, "step": 48585 }, { "epoch": 0.5921782262683875, "grad_norm": 1.9464906454086304, "learning_rate": 2.14650416933932e-06, "loss": 0.9312, "step": 48590 }, { "epoch": 0.5922391624925353, "grad_norm": 1.8528242111206055, "learning_rate": 2.1461834509300835e-06, "loss": 0.7615, "step": 48595 }, { "epoch": 0.5923000987166831, "grad_norm": 1.7297080755233765, "learning_rate": 2.145862732520847e-06, "loss": 0.8211, "step": 48600 }, { "epoch": 0.5923610349408309, "grad_norm": 1.8637323379516602, "learning_rate": 2.14554201411161e-06, "loss": 0.8519, "step": 48605 }, { "epoch": 0.5924219711649787, "grad_norm": 1.8857825994491577, "learning_rate": 2.1452212957023734e-06, "loss": 0.8249, "step": 48610 }, { "epoch": 0.5924829073891266, "grad_norm": 2.1824419498443604, "learning_rate": 2.144900577293137e-06, "loss": 0.8676, "step": 48615 }, { "epoch": 0.5925438436132744, "grad_norm": 1.9507031440734863, "learning_rate": 2.1445798588839002e-06, "loss": 0.9005, "step": 48620 }, { "epoch": 0.5926047798374221, "grad_norm": 1.8602430820465088, "learning_rate": 2.1442591404746637e-06, "loss": 0.7816, "step": 48625 }, { "epoch": 0.5926657160615699, "grad_norm": 1.5591087341308594, "learning_rate": 2.1439384220654267e-06, "loss": 0.7725, "step": 48630 }, { "epoch": 0.5927266522857177, "grad_norm": 2.00034761428833, "learning_rate": 2.14361770365619e-06, "loss": 0.9136, "step": 48635 }, { "epoch": 0.5927875885098656, "grad_norm": 2.0950591564178467, "learning_rate": 2.1432969852469536e-06, "loss": 0.9229, "step": 48640 }, { "epoch": 0.5928485247340134, "grad_norm": 1.9161685705184937, "learning_rate": 2.1429762668377166e-06, "loss": 0.8494, "step": 48645 }, { "epoch": 0.5929094609581612, "grad_norm": 1.8106681108474731, "learning_rate": 2.14265554842848e-06, "loss": 0.8094, "step": 48650 }, { "epoch": 0.592970397182309, "grad_norm": 2.0619513988494873, "learning_rate": 2.1423348300192435e-06, "loss": 0.8114, "step": 48655 }, { "epoch": 0.5930313334064568, "grad_norm": 1.9872640371322632, "learning_rate": 2.1420141116100065e-06, "loss": 0.8087, "step": 48660 }, { "epoch": 0.5930922696306046, "grad_norm": 2.0180270671844482, "learning_rate": 2.14169339320077e-06, "loss": 0.7633, "step": 48665 }, { "epoch": 0.5931532058547524, "grad_norm": 2.0728023052215576, "learning_rate": 2.141372674791533e-06, "loss": 0.8006, "step": 48670 }, { "epoch": 0.5932141420789002, "grad_norm": 1.7961503267288208, "learning_rate": 2.1410519563822964e-06, "loss": 0.8356, "step": 48675 }, { "epoch": 0.593275078303048, "grad_norm": 1.9045555591583252, "learning_rate": 2.14073123797306e-06, "loss": 0.7842, "step": 48680 }, { "epoch": 0.5933360145271959, "grad_norm": 1.7009625434875488, "learning_rate": 2.1404105195638233e-06, "loss": 0.8139, "step": 48685 }, { "epoch": 0.5933969507513437, "grad_norm": 1.9202287197113037, "learning_rate": 2.1400898011545863e-06, "loss": 0.851, "step": 48690 }, { "epoch": 0.5934578869754914, "grad_norm": 2.1896674633026123, "learning_rate": 2.1397690827453497e-06, "loss": 0.8172, "step": 48695 }, { "epoch": 0.5935188231996392, "grad_norm": 2.4592692852020264, "learning_rate": 2.139448364336113e-06, "loss": 0.861, "step": 48700 }, { "epoch": 0.593579759423787, "grad_norm": 2.259347915649414, "learning_rate": 2.1391276459268766e-06, "loss": 0.783, "step": 48705 }, { "epoch": 0.5936406956479349, "grad_norm": 2.381716012954712, "learning_rate": 2.1388069275176396e-06, "loss": 0.8505, "step": 48710 }, { "epoch": 0.5937016318720827, "grad_norm": 1.905687928199768, "learning_rate": 2.138486209108403e-06, "loss": 0.859, "step": 48715 }, { "epoch": 0.5937625680962305, "grad_norm": 2.0658557415008545, "learning_rate": 2.1381654906991665e-06, "loss": 0.8387, "step": 48720 }, { "epoch": 0.5938235043203783, "grad_norm": 2.4019811153411865, "learning_rate": 2.1378447722899295e-06, "loss": 0.8679, "step": 48725 }, { "epoch": 0.593884440544526, "grad_norm": 1.7676640748977661, "learning_rate": 2.137524053880693e-06, "loss": 0.8153, "step": 48730 }, { "epoch": 0.5939453767686739, "grad_norm": 1.9369977712631226, "learning_rate": 2.1372033354714564e-06, "loss": 0.8254, "step": 48735 }, { "epoch": 0.5940063129928217, "grad_norm": 2.1231014728546143, "learning_rate": 2.1368826170622194e-06, "loss": 0.8142, "step": 48740 }, { "epoch": 0.5940672492169695, "grad_norm": 1.752331256866455, "learning_rate": 2.136561898652983e-06, "loss": 0.9044, "step": 48745 }, { "epoch": 0.5941281854411173, "grad_norm": 1.7465280294418335, "learning_rate": 2.136241180243746e-06, "loss": 0.8569, "step": 48750 }, { "epoch": 0.5941891216652652, "grad_norm": 2.0950188636779785, "learning_rate": 2.1359204618345093e-06, "loss": 0.8183, "step": 48755 }, { "epoch": 0.594250057889413, "grad_norm": 2.0672831535339355, "learning_rate": 2.1355997434252727e-06, "loss": 0.9527, "step": 48760 }, { "epoch": 0.5943109941135607, "grad_norm": 2.591002941131592, "learning_rate": 2.135279025016036e-06, "loss": 0.8414, "step": 48765 }, { "epoch": 0.5943719303377085, "grad_norm": 2.0620906352996826, "learning_rate": 2.1349583066067996e-06, "loss": 0.8021, "step": 48770 }, { "epoch": 0.5944328665618563, "grad_norm": 2.1115403175354004, "learning_rate": 2.1346375881975626e-06, "loss": 0.8425, "step": 48775 }, { "epoch": 0.5944938027860042, "grad_norm": 2.2276268005371094, "learning_rate": 2.134316869788326e-06, "loss": 0.8449, "step": 48780 }, { "epoch": 0.594554739010152, "grad_norm": 1.8785498142242432, "learning_rate": 2.1339961513790895e-06, "loss": 0.8155, "step": 48785 }, { "epoch": 0.5946156752342998, "grad_norm": 2.0205914974212646, "learning_rate": 2.1336754329698525e-06, "loss": 0.7924, "step": 48790 }, { "epoch": 0.5946766114584476, "grad_norm": 2.4427554607391357, "learning_rate": 2.133354714560616e-06, "loss": 0.7486, "step": 48795 }, { "epoch": 0.5947375476825953, "grad_norm": 2.2247724533081055, "learning_rate": 2.1330339961513794e-06, "loss": 0.7987, "step": 48800 }, { "epoch": 0.5947984839067432, "grad_norm": 1.8739973306655884, "learning_rate": 2.1327132777421424e-06, "loss": 0.8316, "step": 48805 }, { "epoch": 0.594859420130891, "grad_norm": 2.0904486179351807, "learning_rate": 2.132392559332906e-06, "loss": 0.8767, "step": 48810 }, { "epoch": 0.5949203563550388, "grad_norm": 1.9853227138519287, "learning_rate": 2.1320718409236693e-06, "loss": 0.7667, "step": 48815 }, { "epoch": 0.5949812925791866, "grad_norm": 1.8816611766815186, "learning_rate": 2.1317511225144323e-06, "loss": 0.8327, "step": 48820 }, { "epoch": 0.5950422288033345, "grad_norm": 2.0316309928894043, "learning_rate": 2.1314304041051958e-06, "loss": 0.763, "step": 48825 }, { "epoch": 0.5951031650274823, "grad_norm": 1.7751381397247314, "learning_rate": 2.131109685695959e-06, "loss": 0.8108, "step": 48830 }, { "epoch": 0.59516410125163, "grad_norm": 1.86109459400177, "learning_rate": 2.1307889672867222e-06, "loss": 0.8488, "step": 48835 }, { "epoch": 0.5952250374757778, "grad_norm": 1.9141842126846313, "learning_rate": 2.1304682488774857e-06, "loss": 0.8368, "step": 48840 }, { "epoch": 0.5952859736999256, "grad_norm": 2.015976905822754, "learning_rate": 2.130147530468249e-06, "loss": 0.8116, "step": 48845 }, { "epoch": 0.5953469099240735, "grad_norm": 2.219163417816162, "learning_rate": 2.1298268120590126e-06, "loss": 0.8747, "step": 48850 }, { "epoch": 0.5954078461482213, "grad_norm": 1.7106517553329468, "learning_rate": 2.1295060936497756e-06, "loss": 0.7929, "step": 48855 }, { "epoch": 0.5954687823723691, "grad_norm": 2.0096418857574463, "learning_rate": 2.129185375240539e-06, "loss": 0.83, "step": 48860 }, { "epoch": 0.5955297185965169, "grad_norm": 2.703672170639038, "learning_rate": 2.1288646568313025e-06, "loss": 0.882, "step": 48865 }, { "epoch": 0.5955906548206646, "grad_norm": 1.8436064720153809, "learning_rate": 2.1285439384220655e-06, "loss": 0.8832, "step": 48870 }, { "epoch": 0.5956515910448125, "grad_norm": 1.4624909162521362, "learning_rate": 2.128223220012829e-06, "loss": 0.7978, "step": 48875 }, { "epoch": 0.5957125272689603, "grad_norm": 1.720432996749878, "learning_rate": 2.1279025016035924e-06, "loss": 0.8591, "step": 48880 }, { "epoch": 0.5957734634931081, "grad_norm": 2.2496132850646973, "learning_rate": 2.1275817831943554e-06, "loss": 0.8574, "step": 48885 }, { "epoch": 0.5958343997172559, "grad_norm": 1.953922986984253, "learning_rate": 2.127261064785119e-06, "loss": 0.7821, "step": 48890 }, { "epoch": 0.5958953359414038, "grad_norm": 1.699394702911377, "learning_rate": 2.1269403463758822e-06, "loss": 0.781, "step": 48895 }, { "epoch": 0.5959562721655516, "grad_norm": 1.6706417798995972, "learning_rate": 2.1266196279666453e-06, "loss": 0.8417, "step": 48900 }, { "epoch": 0.5960172083896993, "grad_norm": 2.140899896621704, "learning_rate": 2.1262989095574087e-06, "loss": 0.8217, "step": 48905 }, { "epoch": 0.5960781446138471, "grad_norm": 2.3302557468414307, "learning_rate": 2.125978191148172e-06, "loss": 0.7817, "step": 48910 }, { "epoch": 0.5961390808379949, "grad_norm": 1.826947569847107, "learning_rate": 2.125657472738935e-06, "loss": 0.7663, "step": 48915 }, { "epoch": 0.5962000170621428, "grad_norm": 1.6968388557434082, "learning_rate": 2.1253367543296986e-06, "loss": 0.7921, "step": 48920 }, { "epoch": 0.5962609532862906, "grad_norm": 1.7733427286148071, "learning_rate": 2.125016035920462e-06, "loss": 0.8195, "step": 48925 }, { "epoch": 0.5963218895104384, "grad_norm": 2.438822031021118, "learning_rate": 2.1246953175112255e-06, "loss": 0.7645, "step": 48930 }, { "epoch": 0.5963828257345862, "grad_norm": 2.2355916500091553, "learning_rate": 2.124374599101989e-06, "loss": 0.8242, "step": 48935 }, { "epoch": 0.5964437619587339, "grad_norm": 2.112478017807007, "learning_rate": 2.124053880692752e-06, "loss": 0.8109, "step": 48940 }, { "epoch": 0.5965046981828818, "grad_norm": 2.081295967102051, "learning_rate": 2.1237331622835154e-06, "loss": 0.7943, "step": 48945 }, { "epoch": 0.5965656344070296, "grad_norm": 1.7986518144607544, "learning_rate": 2.123412443874279e-06, "loss": 0.7554, "step": 48950 }, { "epoch": 0.5966265706311774, "grad_norm": 1.7414591312408447, "learning_rate": 2.123091725465042e-06, "loss": 0.8799, "step": 48955 }, { "epoch": 0.5966875068553252, "grad_norm": 3.448103666305542, "learning_rate": 2.1227710070558053e-06, "loss": 0.8271, "step": 48960 }, { "epoch": 0.5967484430794731, "grad_norm": 1.7356091737747192, "learning_rate": 2.1224502886465683e-06, "loss": 0.8141, "step": 48965 }, { "epoch": 0.5968093793036209, "grad_norm": 1.986674189567566, "learning_rate": 2.1221295702373317e-06, "loss": 0.8193, "step": 48970 }, { "epoch": 0.5968703155277686, "grad_norm": 1.9455963373184204, "learning_rate": 2.121808851828095e-06, "loss": 0.8885, "step": 48975 }, { "epoch": 0.5969312517519164, "grad_norm": 1.888426423072815, "learning_rate": 2.121488133418858e-06, "loss": 0.811, "step": 48980 }, { "epoch": 0.5969921879760642, "grad_norm": 1.9122949838638306, "learning_rate": 2.1211674150096216e-06, "loss": 0.8317, "step": 48985 }, { "epoch": 0.5970531242002121, "grad_norm": 1.9094207286834717, "learning_rate": 2.120846696600385e-06, "loss": 0.869, "step": 48990 }, { "epoch": 0.5971140604243599, "grad_norm": 2.197511911392212, "learning_rate": 2.1205259781911485e-06, "loss": 0.8859, "step": 48995 }, { "epoch": 0.5971749966485077, "grad_norm": 1.7398295402526855, "learning_rate": 2.1202052597819115e-06, "loss": 0.8517, "step": 49000 }, { "epoch": 0.5972359328726555, "grad_norm": 2.0039148330688477, "learning_rate": 2.119884541372675e-06, "loss": 0.8152, "step": 49005 }, { "epoch": 0.5972968690968032, "grad_norm": 1.830749750137329, "learning_rate": 2.1195638229634384e-06, "loss": 0.8852, "step": 49010 }, { "epoch": 0.5973578053209511, "grad_norm": 2.3528215885162354, "learning_rate": 2.119243104554202e-06, "loss": 0.7692, "step": 49015 }, { "epoch": 0.5974187415450989, "grad_norm": 2.18585205078125, "learning_rate": 2.118922386144965e-06, "loss": 0.8121, "step": 49020 }, { "epoch": 0.5974796777692467, "grad_norm": 1.8006081581115723, "learning_rate": 2.1186016677357283e-06, "loss": 0.8239, "step": 49025 }, { "epoch": 0.5975406139933945, "grad_norm": 1.922019362449646, "learning_rate": 2.1182809493264917e-06, "loss": 0.7515, "step": 49030 }, { "epoch": 0.5976015502175424, "grad_norm": 1.9981824159622192, "learning_rate": 2.1179602309172548e-06, "loss": 0.7674, "step": 49035 }, { "epoch": 0.5976624864416902, "grad_norm": 1.9001318216323853, "learning_rate": 2.117639512508018e-06, "loss": 0.8409, "step": 49040 }, { "epoch": 0.5977234226658379, "grad_norm": 2.219550132751465, "learning_rate": 2.1173187940987812e-06, "loss": 0.824, "step": 49045 }, { "epoch": 0.5977843588899857, "grad_norm": 1.9446755647659302, "learning_rate": 2.1169980756895447e-06, "loss": 0.7949, "step": 49050 }, { "epoch": 0.5978452951141335, "grad_norm": 2.2158994674682617, "learning_rate": 2.116677357280308e-06, "loss": 0.8253, "step": 49055 }, { "epoch": 0.5979062313382814, "grad_norm": 1.5951790809631348, "learning_rate": 2.116356638871071e-06, "loss": 0.7912, "step": 49060 }, { "epoch": 0.5979671675624292, "grad_norm": 2.0418853759765625, "learning_rate": 2.1160359204618346e-06, "loss": 0.7741, "step": 49065 }, { "epoch": 0.598028103786577, "grad_norm": 1.9280654191970825, "learning_rate": 2.115715202052598e-06, "loss": 0.8168, "step": 49070 }, { "epoch": 0.5980890400107248, "grad_norm": 2.127314567565918, "learning_rate": 2.1153944836433614e-06, "loss": 0.8338, "step": 49075 }, { "epoch": 0.5981499762348725, "grad_norm": 1.9129472970962524, "learning_rate": 2.1150737652341245e-06, "loss": 0.8292, "step": 49080 }, { "epoch": 0.5982109124590204, "grad_norm": 1.751173496246338, "learning_rate": 2.114753046824888e-06, "loss": 0.8442, "step": 49085 }, { "epoch": 0.5982718486831682, "grad_norm": 1.821195363998413, "learning_rate": 2.1144323284156513e-06, "loss": 0.755, "step": 49090 }, { "epoch": 0.598332784907316, "grad_norm": 1.6653577089309692, "learning_rate": 2.1141116100064148e-06, "loss": 0.7365, "step": 49095 }, { "epoch": 0.5983937211314638, "grad_norm": 2.1342196464538574, "learning_rate": 2.113790891597178e-06, "loss": 0.8206, "step": 49100 }, { "epoch": 0.5984546573556117, "grad_norm": 2.0635616779327393, "learning_rate": 2.1134701731879412e-06, "loss": 0.8061, "step": 49105 }, { "epoch": 0.5985155935797595, "grad_norm": 2.22991681098938, "learning_rate": 2.1131494547787047e-06, "loss": 0.8029, "step": 49110 }, { "epoch": 0.5985765298039072, "grad_norm": 1.6061898469924927, "learning_rate": 2.1128287363694677e-06, "loss": 0.8313, "step": 49115 }, { "epoch": 0.598637466028055, "grad_norm": 1.8261470794677734, "learning_rate": 2.112508017960231e-06, "loss": 0.7789, "step": 49120 }, { "epoch": 0.5986984022522028, "grad_norm": 1.692800521850586, "learning_rate": 2.112187299550994e-06, "loss": 0.7742, "step": 49125 }, { "epoch": 0.5987593384763507, "grad_norm": 2.0655746459960938, "learning_rate": 2.1118665811417576e-06, "loss": 0.7789, "step": 49130 }, { "epoch": 0.5988202747004985, "grad_norm": 2.346778392791748, "learning_rate": 2.111545862732521e-06, "loss": 0.7886, "step": 49135 }, { "epoch": 0.5988812109246463, "grad_norm": 1.6846799850463867, "learning_rate": 2.111225144323284e-06, "loss": 0.7424, "step": 49140 }, { "epoch": 0.5989421471487941, "grad_norm": 2.136481285095215, "learning_rate": 2.1109044259140475e-06, "loss": 0.8279, "step": 49145 }, { "epoch": 0.5990030833729418, "grad_norm": 1.6096749305725098, "learning_rate": 2.110583707504811e-06, "loss": 0.812, "step": 49150 }, { "epoch": 0.5990640195970897, "grad_norm": 2.633451461791992, "learning_rate": 2.1102629890955744e-06, "loss": 0.8337, "step": 49155 }, { "epoch": 0.5991249558212375, "grad_norm": 2.525968074798584, "learning_rate": 2.109942270686338e-06, "loss": 0.8645, "step": 49160 }, { "epoch": 0.5991858920453853, "grad_norm": 1.7816762924194336, "learning_rate": 2.109621552277101e-06, "loss": 0.8247, "step": 49165 }, { "epoch": 0.5992468282695331, "grad_norm": 1.7289776802062988, "learning_rate": 2.1093008338678643e-06, "loss": 0.8545, "step": 49170 }, { "epoch": 0.599307764493681, "grad_norm": 3.0011043548583984, "learning_rate": 2.1089801154586277e-06, "loss": 0.8875, "step": 49175 }, { "epoch": 0.5993687007178288, "grad_norm": 3.291104316711426, "learning_rate": 2.1086593970493907e-06, "loss": 0.7969, "step": 49180 }, { "epoch": 0.5994296369419765, "grad_norm": 1.9359664916992188, "learning_rate": 2.108338678640154e-06, "loss": 0.8039, "step": 49185 }, { "epoch": 0.5994905731661243, "grad_norm": 2.1136515140533447, "learning_rate": 2.1080179602309176e-06, "loss": 0.8034, "step": 49190 }, { "epoch": 0.5995515093902721, "grad_norm": 1.5346969366073608, "learning_rate": 2.1076972418216806e-06, "loss": 0.7543, "step": 49195 }, { "epoch": 0.59961244561442, "grad_norm": 1.626920461654663, "learning_rate": 2.107376523412444e-06, "loss": 0.81, "step": 49200 }, { "epoch": 0.5996733818385678, "grad_norm": 1.725084900856018, "learning_rate": 2.107055805003207e-06, "loss": 0.7594, "step": 49205 }, { "epoch": 0.5997343180627156, "grad_norm": 1.873247504234314, "learning_rate": 2.1067350865939705e-06, "loss": 0.8338, "step": 49210 }, { "epoch": 0.5997952542868634, "grad_norm": 1.9645198583602905, "learning_rate": 2.106414368184734e-06, "loss": 0.8291, "step": 49215 }, { "epoch": 0.5998561905110111, "grad_norm": 1.9718602895736694, "learning_rate": 2.106093649775497e-06, "loss": 0.845, "step": 49220 }, { "epoch": 0.599917126735159, "grad_norm": 1.792769193649292, "learning_rate": 2.1057729313662604e-06, "loss": 0.8117, "step": 49225 }, { "epoch": 0.5999780629593068, "grad_norm": 1.9767273664474487, "learning_rate": 2.105452212957024e-06, "loss": 0.8506, "step": 49230 }, { "epoch": 0.6000389991834546, "grad_norm": 1.9305081367492676, "learning_rate": 2.1051314945477873e-06, "loss": 0.8647, "step": 49235 }, { "epoch": 0.6000999354076024, "grad_norm": 1.7827297449111938, "learning_rate": 2.1048107761385507e-06, "loss": 0.8381, "step": 49240 }, { "epoch": 0.6001608716317502, "grad_norm": 2.24214506149292, "learning_rate": 2.104490057729314e-06, "loss": 0.7937, "step": 49245 }, { "epoch": 0.6002218078558981, "grad_norm": 1.9906165599822998, "learning_rate": 2.104169339320077e-06, "loss": 0.8738, "step": 49250 }, { "epoch": 0.6002827440800458, "grad_norm": 1.7739707231521606, "learning_rate": 2.1038486209108406e-06, "loss": 0.87, "step": 49255 }, { "epoch": 0.6003436803041936, "grad_norm": 2.2643585205078125, "learning_rate": 2.1035279025016036e-06, "loss": 0.8706, "step": 49260 }, { "epoch": 0.6004046165283414, "grad_norm": 1.7038180828094482, "learning_rate": 2.103207184092367e-06, "loss": 0.8198, "step": 49265 }, { "epoch": 0.6004655527524893, "grad_norm": 1.6880285739898682, "learning_rate": 2.1028864656831305e-06, "loss": 0.7811, "step": 49270 }, { "epoch": 0.6005264889766371, "grad_norm": 2.1967432498931885, "learning_rate": 2.1025657472738935e-06, "loss": 0.8308, "step": 49275 }, { "epoch": 0.6005874252007849, "grad_norm": 1.9854626655578613, "learning_rate": 2.102245028864657e-06, "loss": 0.8097, "step": 49280 }, { "epoch": 0.6006483614249327, "grad_norm": 1.9534342288970947, "learning_rate": 2.1019243104554204e-06, "loss": 0.8696, "step": 49285 }, { "epoch": 0.6007092976490804, "grad_norm": 1.8311551809310913, "learning_rate": 2.1016035920461834e-06, "loss": 0.8699, "step": 49290 }, { "epoch": 0.6007702338732283, "grad_norm": 2.297250509262085, "learning_rate": 2.101282873636947e-06, "loss": 0.8048, "step": 49295 }, { "epoch": 0.6008311700973761, "grad_norm": 2.091254711151123, "learning_rate": 2.1009621552277103e-06, "loss": 0.7549, "step": 49300 }, { "epoch": 0.6008921063215239, "grad_norm": 1.8865424394607544, "learning_rate": 2.1006414368184733e-06, "loss": 0.8872, "step": 49305 }, { "epoch": 0.6009530425456717, "grad_norm": 2.2362453937530518, "learning_rate": 2.1003207184092368e-06, "loss": 0.8293, "step": 49310 }, { "epoch": 0.6010139787698195, "grad_norm": 1.7126637697219849, "learning_rate": 2.1000000000000002e-06, "loss": 0.8119, "step": 49315 }, { "epoch": 0.6010749149939674, "grad_norm": 2.0041275024414062, "learning_rate": 2.0996792815907637e-06, "loss": 0.8065, "step": 49320 }, { "epoch": 0.6011358512181151, "grad_norm": 2.027982234954834, "learning_rate": 2.099358563181527e-06, "loss": 0.8031, "step": 49325 }, { "epoch": 0.6011967874422629, "grad_norm": 1.8739463090896606, "learning_rate": 2.09903784477229e-06, "loss": 0.8852, "step": 49330 }, { "epoch": 0.6012577236664107, "grad_norm": 1.9098901748657227, "learning_rate": 2.0987171263630536e-06, "loss": 0.8025, "step": 49335 }, { "epoch": 0.6013186598905585, "grad_norm": 2.1969144344329834, "learning_rate": 2.0983964079538166e-06, "loss": 0.8727, "step": 49340 }, { "epoch": 0.6013795961147064, "grad_norm": 1.808927059173584, "learning_rate": 2.09807568954458e-06, "loss": 0.8547, "step": 49345 }, { "epoch": 0.6014405323388542, "grad_norm": 1.9505473375320435, "learning_rate": 2.0977549711353435e-06, "loss": 0.8178, "step": 49350 }, { "epoch": 0.601501468563002, "grad_norm": 2.023569107055664, "learning_rate": 2.0974342527261065e-06, "loss": 0.7903, "step": 49355 }, { "epoch": 0.6015624047871497, "grad_norm": 2.3263158798217773, "learning_rate": 2.09711353431687e-06, "loss": 0.8909, "step": 49360 }, { "epoch": 0.6016233410112976, "grad_norm": 1.9300583600997925, "learning_rate": 2.0967928159076334e-06, "loss": 0.7991, "step": 49365 }, { "epoch": 0.6016842772354454, "grad_norm": 1.864586353302002, "learning_rate": 2.0964720974983964e-06, "loss": 0.8015, "step": 49370 }, { "epoch": 0.6017452134595932, "grad_norm": 1.781911849975586, "learning_rate": 2.09615137908916e-06, "loss": 0.886, "step": 49375 }, { "epoch": 0.601806149683741, "grad_norm": 2.055396795272827, "learning_rate": 2.0958306606799233e-06, "loss": 0.7443, "step": 49380 }, { "epoch": 0.6018670859078888, "grad_norm": 1.9306423664093018, "learning_rate": 2.0955099422706867e-06, "loss": 0.8451, "step": 49385 }, { "epoch": 0.6019280221320366, "grad_norm": 1.8144389390945435, "learning_rate": 2.0951892238614497e-06, "loss": 0.8147, "step": 49390 }, { "epoch": 0.6019889583561844, "grad_norm": 1.9087483882904053, "learning_rate": 2.094868505452213e-06, "loss": 0.8604, "step": 49395 }, { "epoch": 0.6020498945803322, "grad_norm": 1.8815431594848633, "learning_rate": 2.0945477870429766e-06, "loss": 0.8013, "step": 49400 }, { "epoch": 0.60211083080448, "grad_norm": 1.8810734748840332, "learning_rate": 2.09422706863374e-06, "loss": 0.8381, "step": 49405 }, { "epoch": 0.6021717670286278, "grad_norm": 2.438324213027954, "learning_rate": 2.093906350224503e-06, "loss": 0.785, "step": 49410 }, { "epoch": 0.6022327032527757, "grad_norm": 2.0063042640686035, "learning_rate": 2.0935856318152665e-06, "loss": 0.8354, "step": 49415 }, { "epoch": 0.6022936394769235, "grad_norm": 1.8139822483062744, "learning_rate": 2.0932649134060295e-06, "loss": 0.9028, "step": 49420 }, { "epoch": 0.6023545757010712, "grad_norm": 2.3131167888641357, "learning_rate": 2.092944194996793e-06, "loss": 0.7803, "step": 49425 }, { "epoch": 0.602415511925219, "grad_norm": 1.9184776544570923, "learning_rate": 2.0926234765875564e-06, "loss": 0.8472, "step": 49430 }, { "epoch": 0.6024764481493668, "grad_norm": 2.0335845947265625, "learning_rate": 2.0923027581783194e-06, "loss": 0.8415, "step": 49435 }, { "epoch": 0.6025373843735147, "grad_norm": 1.7083297967910767, "learning_rate": 2.091982039769083e-06, "loss": 0.7939, "step": 49440 }, { "epoch": 0.6025983205976625, "grad_norm": 1.7480617761611938, "learning_rate": 2.0916613213598463e-06, "loss": 0.8014, "step": 49445 }, { "epoch": 0.6026592568218103, "grad_norm": 3.1159257888793945, "learning_rate": 2.0913406029506093e-06, "loss": 0.7914, "step": 49450 }, { "epoch": 0.6027201930459581, "grad_norm": 1.963673710823059, "learning_rate": 2.0910198845413727e-06, "loss": 0.8382, "step": 49455 }, { "epoch": 0.6027811292701059, "grad_norm": 1.8818929195404053, "learning_rate": 2.090699166132136e-06, "loss": 0.8298, "step": 49460 }, { "epoch": 0.6028420654942537, "grad_norm": 2.4219043254852295, "learning_rate": 2.0903784477228996e-06, "loss": 0.8473, "step": 49465 }, { "epoch": 0.6029030017184015, "grad_norm": 2.097672462463379, "learning_rate": 2.090057729313663e-06, "loss": 0.8532, "step": 49470 }, { "epoch": 0.6029639379425493, "grad_norm": 1.8124781847000122, "learning_rate": 2.089737010904426e-06, "loss": 0.8277, "step": 49475 }, { "epoch": 0.6030248741666971, "grad_norm": 1.9470587968826294, "learning_rate": 2.0894162924951895e-06, "loss": 0.8779, "step": 49480 }, { "epoch": 0.603085810390845, "grad_norm": 2.3473522663116455, "learning_rate": 2.089095574085953e-06, "loss": 0.7793, "step": 49485 }, { "epoch": 0.6031467466149928, "grad_norm": 1.8564718961715698, "learning_rate": 2.088774855676716e-06, "loss": 0.7956, "step": 49490 }, { "epoch": 0.6032076828391405, "grad_norm": 1.9449716806411743, "learning_rate": 2.0884541372674794e-06, "loss": 0.8302, "step": 49495 }, { "epoch": 0.6032686190632883, "grad_norm": 1.9198957681655884, "learning_rate": 2.0881334188582424e-06, "loss": 0.8361, "step": 49500 }, { "epoch": 0.6033295552874361, "grad_norm": 1.9745289087295532, "learning_rate": 2.087812700449006e-06, "loss": 0.8046, "step": 49505 }, { "epoch": 0.603390491511584, "grad_norm": 1.654723048210144, "learning_rate": 2.0874919820397693e-06, "loss": 0.7581, "step": 49510 }, { "epoch": 0.6034514277357318, "grad_norm": 2.3049378395080566, "learning_rate": 2.0871712636305323e-06, "loss": 0.766, "step": 49515 }, { "epoch": 0.6035123639598796, "grad_norm": 1.891478419303894, "learning_rate": 2.0868505452212958e-06, "loss": 0.9013, "step": 49520 }, { "epoch": 0.6035733001840274, "grad_norm": 1.8526066541671753, "learning_rate": 2.086529826812059e-06, "loss": 0.7757, "step": 49525 }, { "epoch": 0.6036342364081752, "grad_norm": 1.637410283088684, "learning_rate": 2.0862091084028222e-06, "loss": 0.7711, "step": 49530 }, { "epoch": 0.603695172632323, "grad_norm": 2.1506850719451904, "learning_rate": 2.0858883899935857e-06, "loss": 0.8639, "step": 49535 }, { "epoch": 0.6037561088564708, "grad_norm": 1.951555848121643, "learning_rate": 2.085567671584349e-06, "loss": 0.834, "step": 49540 }, { "epoch": 0.6038170450806186, "grad_norm": 2.7177512645721436, "learning_rate": 2.0852469531751125e-06, "loss": 0.7998, "step": 49545 }, { "epoch": 0.6038779813047664, "grad_norm": 2.019148111343384, "learning_rate": 2.084926234765876e-06, "loss": 0.7938, "step": 49550 }, { "epoch": 0.6039389175289143, "grad_norm": 1.851454257965088, "learning_rate": 2.084605516356639e-06, "loss": 0.7554, "step": 49555 }, { "epoch": 0.6039998537530621, "grad_norm": 2.0449657440185547, "learning_rate": 2.0842847979474024e-06, "loss": 0.861, "step": 49560 }, { "epoch": 0.6040607899772098, "grad_norm": 1.8868781328201294, "learning_rate": 2.083964079538166e-06, "loss": 0.8285, "step": 49565 }, { "epoch": 0.6041217262013576, "grad_norm": 1.9487645626068115, "learning_rate": 2.083643361128929e-06, "loss": 0.9205, "step": 49570 }, { "epoch": 0.6041826624255054, "grad_norm": 2.069375991821289, "learning_rate": 2.0833226427196923e-06, "loss": 0.7884, "step": 49575 }, { "epoch": 0.6042435986496533, "grad_norm": 1.9849520921707153, "learning_rate": 2.0830019243104558e-06, "loss": 0.883, "step": 49580 }, { "epoch": 0.6043045348738011, "grad_norm": 2.2386748790740967, "learning_rate": 2.082681205901219e-06, "loss": 0.8935, "step": 49585 }, { "epoch": 0.6043654710979489, "grad_norm": 1.8549121618270874, "learning_rate": 2.0823604874919822e-06, "loss": 0.8958, "step": 49590 }, { "epoch": 0.6044264073220967, "grad_norm": 2.128582715988159, "learning_rate": 2.0820397690827453e-06, "loss": 0.8745, "step": 49595 }, { "epoch": 0.6044873435462444, "grad_norm": 2.147700548171997, "learning_rate": 2.0817190506735087e-06, "loss": 0.8213, "step": 49600 }, { "epoch": 0.6045482797703923, "grad_norm": 1.9740837812423706, "learning_rate": 2.081398332264272e-06, "loss": 0.7913, "step": 49605 }, { "epoch": 0.6046092159945401, "grad_norm": 1.754855990409851, "learning_rate": 2.0810776138550356e-06, "loss": 0.8399, "step": 49610 }, { "epoch": 0.6046701522186879, "grad_norm": 2.067490577697754, "learning_rate": 2.0807568954457986e-06, "loss": 0.8475, "step": 49615 }, { "epoch": 0.6047310884428357, "grad_norm": 1.9707142114639282, "learning_rate": 2.080436177036562e-06, "loss": 0.8335, "step": 49620 }, { "epoch": 0.6047920246669836, "grad_norm": 2.2166192531585693, "learning_rate": 2.0801154586273255e-06, "loss": 0.8698, "step": 49625 }, { "epoch": 0.6048529608911314, "grad_norm": 1.9717557430267334, "learning_rate": 2.079794740218089e-06, "loss": 0.8654, "step": 49630 }, { "epoch": 0.6049138971152791, "grad_norm": 1.7523267269134521, "learning_rate": 2.079474021808852e-06, "loss": 0.8466, "step": 49635 }, { "epoch": 0.6049748333394269, "grad_norm": 1.869038462638855, "learning_rate": 2.0791533033996154e-06, "loss": 0.8576, "step": 49640 }, { "epoch": 0.6050357695635747, "grad_norm": 2.496290445327759, "learning_rate": 2.078832584990379e-06, "loss": 0.8265, "step": 49645 }, { "epoch": 0.6050967057877226, "grad_norm": 2.234421968460083, "learning_rate": 2.078511866581142e-06, "loss": 0.8148, "step": 49650 }, { "epoch": 0.6051576420118704, "grad_norm": 2.129779577255249, "learning_rate": 2.0781911481719053e-06, "loss": 0.7855, "step": 49655 }, { "epoch": 0.6052185782360182, "grad_norm": 1.7027812004089355, "learning_rate": 2.0778704297626687e-06, "loss": 0.7928, "step": 49660 }, { "epoch": 0.605279514460166, "grad_norm": 2.0053212642669678, "learning_rate": 2.0775497113534317e-06, "loss": 0.8048, "step": 49665 }, { "epoch": 0.6053404506843137, "grad_norm": 2.4284260272979736, "learning_rate": 2.077228992944195e-06, "loss": 0.8138, "step": 49670 }, { "epoch": 0.6054013869084616, "grad_norm": 1.8547453880310059, "learning_rate": 2.076908274534958e-06, "loss": 0.8571, "step": 49675 }, { "epoch": 0.6054623231326094, "grad_norm": 2.146634101867676, "learning_rate": 2.0765875561257216e-06, "loss": 0.8474, "step": 49680 }, { "epoch": 0.6055232593567572, "grad_norm": 2.402803421020508, "learning_rate": 2.076266837716485e-06, "loss": 0.9131, "step": 49685 }, { "epoch": 0.605584195580905, "grad_norm": 1.9233372211456299, "learning_rate": 2.0759461193072485e-06, "loss": 0.781, "step": 49690 }, { "epoch": 0.6056451318050529, "grad_norm": 2.450702667236328, "learning_rate": 2.075625400898012e-06, "loss": 0.8423, "step": 49695 }, { "epoch": 0.6057060680292007, "grad_norm": 2.2859597206115723, "learning_rate": 2.075304682488775e-06, "loss": 0.8719, "step": 49700 }, { "epoch": 0.6057670042533484, "grad_norm": 2.330803394317627, "learning_rate": 2.0749839640795384e-06, "loss": 0.8048, "step": 49705 }, { "epoch": 0.6058279404774962, "grad_norm": 2.0675313472747803, "learning_rate": 2.074663245670302e-06, "loss": 0.8832, "step": 49710 }, { "epoch": 0.605888876701644, "grad_norm": 1.9252828359603882, "learning_rate": 2.074342527261065e-06, "loss": 0.8953, "step": 49715 }, { "epoch": 0.6059498129257919, "grad_norm": 1.658610224723816, "learning_rate": 2.0740218088518283e-06, "loss": 0.844, "step": 49720 }, { "epoch": 0.6060107491499397, "grad_norm": 1.6106261014938354, "learning_rate": 2.0737010904425917e-06, "loss": 0.8128, "step": 49725 }, { "epoch": 0.6060716853740875, "grad_norm": 1.9508843421936035, "learning_rate": 2.0733803720333548e-06, "loss": 0.828, "step": 49730 }, { "epoch": 0.6061326215982353, "grad_norm": 2.116849660873413, "learning_rate": 2.073059653624118e-06, "loss": 0.8468, "step": 49735 }, { "epoch": 0.606193557822383, "grad_norm": 2.1060433387756348, "learning_rate": 2.0727389352148816e-06, "loss": 0.8423, "step": 49740 }, { "epoch": 0.6062544940465309, "grad_norm": 2.159895420074463, "learning_rate": 2.0724182168056447e-06, "loss": 0.8106, "step": 49745 }, { "epoch": 0.6063154302706787, "grad_norm": 2.2011005878448486, "learning_rate": 2.072097498396408e-06, "loss": 0.764, "step": 49750 }, { "epoch": 0.6063763664948265, "grad_norm": 2.2902157306671143, "learning_rate": 2.071776779987171e-06, "loss": 0.7786, "step": 49755 }, { "epoch": 0.6064373027189743, "grad_norm": 1.9124418497085571, "learning_rate": 2.0714560615779346e-06, "loss": 0.8002, "step": 49760 }, { "epoch": 0.6064982389431222, "grad_norm": 1.9940578937530518, "learning_rate": 2.071135343168698e-06, "loss": 0.8053, "step": 49765 }, { "epoch": 0.60655917516727, "grad_norm": 1.8477294445037842, "learning_rate": 2.0708146247594614e-06, "loss": 0.7833, "step": 49770 }, { "epoch": 0.6066201113914177, "grad_norm": 1.92381751537323, "learning_rate": 2.070493906350225e-06, "loss": 0.775, "step": 49775 }, { "epoch": 0.6066810476155655, "grad_norm": 2.1661298274993896, "learning_rate": 2.070173187940988e-06, "loss": 0.81, "step": 49780 }, { "epoch": 0.6067419838397133, "grad_norm": 2.2368671894073486, "learning_rate": 2.0698524695317513e-06, "loss": 0.8383, "step": 49785 }, { "epoch": 0.6068029200638612, "grad_norm": 2.0055806636810303, "learning_rate": 2.0695317511225148e-06, "loss": 0.86, "step": 49790 }, { "epoch": 0.606863856288009, "grad_norm": 2.2142531871795654, "learning_rate": 2.0692110327132778e-06, "loss": 0.8724, "step": 49795 }, { "epoch": 0.6069247925121568, "grad_norm": 2.143453598022461, "learning_rate": 2.0688903143040412e-06, "loss": 0.8512, "step": 49800 }, { "epoch": 0.6069857287363046, "grad_norm": 2.2237088680267334, "learning_rate": 2.0685695958948047e-06, "loss": 0.8661, "step": 49805 }, { "epoch": 0.6070466649604523, "grad_norm": 2.3110198974609375, "learning_rate": 2.0682488774855677e-06, "loss": 0.7425, "step": 49810 }, { "epoch": 0.6071076011846002, "grad_norm": 1.94426691532135, "learning_rate": 2.067928159076331e-06, "loss": 0.7984, "step": 49815 }, { "epoch": 0.607168537408748, "grad_norm": 2.099472999572754, "learning_rate": 2.0676074406670946e-06, "loss": 0.8284, "step": 49820 }, { "epoch": 0.6072294736328958, "grad_norm": 1.8273571729660034, "learning_rate": 2.0672867222578576e-06, "loss": 0.8328, "step": 49825 }, { "epoch": 0.6072904098570436, "grad_norm": 1.877771019935608, "learning_rate": 2.066966003848621e-06, "loss": 0.9001, "step": 49830 }, { "epoch": 0.6073513460811915, "grad_norm": 1.8290989398956299, "learning_rate": 2.0666452854393845e-06, "loss": 0.8705, "step": 49835 }, { "epoch": 0.6074122823053393, "grad_norm": 2.0655157566070557, "learning_rate": 2.0663245670301475e-06, "loss": 0.8489, "step": 49840 }, { "epoch": 0.607473218529487, "grad_norm": 1.9137603044509888, "learning_rate": 2.066003848620911e-06, "loss": 0.8289, "step": 49845 }, { "epoch": 0.6075341547536348, "grad_norm": 1.8805984258651733, "learning_rate": 2.0656831302116744e-06, "loss": 0.6756, "step": 49850 }, { "epoch": 0.6075950909777826, "grad_norm": 2.082045793533325, "learning_rate": 2.065362411802438e-06, "loss": 0.8062, "step": 49855 }, { "epoch": 0.6076560272019305, "grad_norm": 1.8526591062545776, "learning_rate": 2.0650416933932012e-06, "loss": 0.8031, "step": 49860 }, { "epoch": 0.6077169634260783, "grad_norm": 1.9649364948272705, "learning_rate": 2.0647209749839643e-06, "loss": 0.8622, "step": 49865 }, { "epoch": 0.6077778996502261, "grad_norm": 1.8035962581634521, "learning_rate": 2.0644002565747277e-06, "loss": 0.8292, "step": 49870 }, { "epoch": 0.6078388358743739, "grad_norm": 1.838713526725769, "learning_rate": 2.064079538165491e-06, "loss": 0.8063, "step": 49875 }, { "epoch": 0.6078997720985216, "grad_norm": 1.851245403289795, "learning_rate": 2.063758819756254e-06, "loss": 0.8934, "step": 49880 }, { "epoch": 0.6079607083226695, "grad_norm": 1.890377402305603, "learning_rate": 2.0634381013470176e-06, "loss": 0.8379, "step": 49885 }, { "epoch": 0.6080216445468173, "grad_norm": 2.3456192016601562, "learning_rate": 2.0631173829377806e-06, "loss": 0.8899, "step": 49890 }, { "epoch": 0.6080825807709651, "grad_norm": 2.165065288543701, "learning_rate": 2.062796664528544e-06, "loss": 0.8638, "step": 49895 }, { "epoch": 0.6081435169951129, "grad_norm": 2.112710952758789, "learning_rate": 2.0624759461193075e-06, "loss": 0.8663, "step": 49900 }, { "epoch": 0.6082044532192608, "grad_norm": 2.141153573989868, "learning_rate": 2.0621552277100705e-06, "loss": 0.8239, "step": 49905 }, { "epoch": 0.6082653894434086, "grad_norm": 2.0356335639953613, "learning_rate": 2.061834509300834e-06, "loss": 0.8568, "step": 49910 }, { "epoch": 0.6083263256675563, "grad_norm": 1.9083061218261719, "learning_rate": 2.0615137908915974e-06, "loss": 0.7936, "step": 49915 }, { "epoch": 0.6083872618917041, "grad_norm": 1.6908131837844849, "learning_rate": 2.0611930724823604e-06, "loss": 0.8294, "step": 49920 }, { "epoch": 0.6084481981158519, "grad_norm": 1.632354736328125, "learning_rate": 2.060872354073124e-06, "loss": 0.7744, "step": 49925 }, { "epoch": 0.6085091343399998, "grad_norm": 1.939091444015503, "learning_rate": 2.0605516356638873e-06, "loss": 0.8449, "step": 49930 }, { "epoch": 0.6085700705641476, "grad_norm": 2.0883731842041016, "learning_rate": 2.0602309172546507e-06, "loss": 0.7885, "step": 49935 }, { "epoch": 0.6086310067882954, "grad_norm": 2.269498348236084, "learning_rate": 2.059910198845414e-06, "loss": 0.8444, "step": 49940 }, { "epoch": 0.6086919430124432, "grad_norm": 2.0745091438293457, "learning_rate": 2.059589480436177e-06, "loss": 0.7055, "step": 49945 }, { "epoch": 0.6087528792365909, "grad_norm": 1.581833839416504, "learning_rate": 2.0592687620269406e-06, "loss": 0.833, "step": 49950 }, { "epoch": 0.6088138154607388, "grad_norm": 1.9144195318222046, "learning_rate": 2.058948043617704e-06, "loss": 0.8197, "step": 49955 }, { "epoch": 0.6088747516848866, "grad_norm": 2.2879068851470947, "learning_rate": 2.058627325208467e-06, "loss": 0.8423, "step": 49960 }, { "epoch": 0.6089356879090344, "grad_norm": 2.284183979034424, "learning_rate": 2.0583066067992305e-06, "loss": 0.8673, "step": 49965 }, { "epoch": 0.6089966241331822, "grad_norm": 2.0144810676574707, "learning_rate": 2.0579858883899935e-06, "loss": 0.8334, "step": 49970 }, { "epoch": 0.60905756035733, "grad_norm": 2.177973508834839, "learning_rate": 2.057665169980757e-06, "loss": 0.8605, "step": 49975 }, { "epoch": 0.6091184965814779, "grad_norm": 2.038691282272339, "learning_rate": 2.0573444515715204e-06, "loss": 0.7834, "step": 49980 }, { "epoch": 0.6091794328056256, "grad_norm": 2.3313257694244385, "learning_rate": 2.0570237331622834e-06, "loss": 0.8195, "step": 49985 }, { "epoch": 0.6092403690297734, "grad_norm": 1.6698050498962402, "learning_rate": 2.056703014753047e-06, "loss": 0.7749, "step": 49990 }, { "epoch": 0.6093013052539212, "grad_norm": 1.7730069160461426, "learning_rate": 2.0563822963438103e-06, "loss": 0.818, "step": 49995 }, { "epoch": 0.609362241478069, "grad_norm": 1.4701182842254639, "learning_rate": 2.0560615779345738e-06, "loss": 0.8283, "step": 50000 }, { "epoch": 0.6094231777022169, "grad_norm": 1.9339442253112793, "learning_rate": 2.0557408595253368e-06, "loss": 0.9241, "step": 50005 }, { "epoch": 0.6094841139263647, "grad_norm": 1.791664481163025, "learning_rate": 2.0554201411161002e-06, "loss": 0.826, "step": 50010 }, { "epoch": 0.6095450501505125, "grad_norm": 2.1047346591949463, "learning_rate": 2.0550994227068637e-06, "loss": 0.7948, "step": 50015 }, { "epoch": 0.6096059863746602, "grad_norm": 2.2570197582244873, "learning_rate": 2.054778704297627e-06, "loss": 0.7746, "step": 50020 }, { "epoch": 0.6096669225988081, "grad_norm": 2.0247642993927, "learning_rate": 2.05445798588839e-06, "loss": 0.8286, "step": 50025 }, { "epoch": 0.6097278588229559, "grad_norm": 1.9682645797729492, "learning_rate": 2.0541372674791535e-06, "loss": 0.7563, "step": 50030 }, { "epoch": 0.6097887950471037, "grad_norm": 1.8860628604888916, "learning_rate": 2.053816549069917e-06, "loss": 0.8816, "step": 50035 }, { "epoch": 0.6098497312712515, "grad_norm": 1.960633635520935, "learning_rate": 2.05349583066068e-06, "loss": 0.7858, "step": 50040 }, { "epoch": 0.6099106674953994, "grad_norm": 2.161278009414673, "learning_rate": 2.0531751122514434e-06, "loss": 0.8584, "step": 50045 }, { "epoch": 0.6099716037195472, "grad_norm": 1.8645704984664917, "learning_rate": 2.0528543938422065e-06, "loss": 0.7872, "step": 50050 }, { "epoch": 0.6100325399436949, "grad_norm": 1.8294405937194824, "learning_rate": 2.05253367543297e-06, "loss": 0.823, "step": 50055 }, { "epoch": 0.6100934761678427, "grad_norm": 1.851380705833435, "learning_rate": 2.0522129570237333e-06, "loss": 0.8225, "step": 50060 }, { "epoch": 0.6101544123919905, "grad_norm": 2.363842010498047, "learning_rate": 2.0518922386144964e-06, "loss": 0.8156, "step": 50065 }, { "epoch": 0.6102153486161384, "grad_norm": 1.9243550300598145, "learning_rate": 2.05157152020526e-06, "loss": 0.8271, "step": 50070 }, { "epoch": 0.6102762848402862, "grad_norm": 2.029658555984497, "learning_rate": 2.0512508017960232e-06, "loss": 0.82, "step": 50075 }, { "epoch": 0.610337221064434, "grad_norm": 1.8329516649246216, "learning_rate": 2.0509300833867867e-06, "loss": 0.8856, "step": 50080 }, { "epoch": 0.6103981572885818, "grad_norm": 1.8035181760787964, "learning_rate": 2.05060936497755e-06, "loss": 0.8294, "step": 50085 }, { "epoch": 0.6104590935127295, "grad_norm": 1.96126127243042, "learning_rate": 2.050288646568313e-06, "loss": 0.7968, "step": 50090 }, { "epoch": 0.6105200297368774, "grad_norm": 1.7973147630691528, "learning_rate": 2.0499679281590766e-06, "loss": 0.8826, "step": 50095 }, { "epoch": 0.6105809659610252, "grad_norm": 2.314918279647827, "learning_rate": 2.04964720974984e-06, "loss": 0.8951, "step": 50100 }, { "epoch": 0.610641902185173, "grad_norm": 1.965331792831421, "learning_rate": 2.049326491340603e-06, "loss": 0.8702, "step": 50105 }, { "epoch": 0.6107028384093208, "grad_norm": 2.059056282043457, "learning_rate": 2.0490057729313665e-06, "loss": 0.7758, "step": 50110 }, { "epoch": 0.6107637746334686, "grad_norm": 1.8117878437042236, "learning_rate": 2.04868505452213e-06, "loss": 0.8381, "step": 50115 }, { "epoch": 0.6108247108576165, "grad_norm": 2.223917245864868, "learning_rate": 2.048364336112893e-06, "loss": 0.817, "step": 50120 }, { "epoch": 0.6108856470817642, "grad_norm": 2.518984317779541, "learning_rate": 2.0480436177036564e-06, "loss": 0.8519, "step": 50125 }, { "epoch": 0.610946583305912, "grad_norm": 2.0350418090820312, "learning_rate": 2.0477228992944194e-06, "loss": 0.7894, "step": 50130 }, { "epoch": 0.6110075195300598, "grad_norm": 1.6920552253723145, "learning_rate": 2.047402180885183e-06, "loss": 0.7622, "step": 50135 }, { "epoch": 0.6110684557542077, "grad_norm": 2.06209397315979, "learning_rate": 2.0470814624759463e-06, "loss": 0.8269, "step": 50140 }, { "epoch": 0.6111293919783555, "grad_norm": 1.9420795440673828, "learning_rate": 2.0467607440667093e-06, "loss": 0.8701, "step": 50145 }, { "epoch": 0.6111903282025033, "grad_norm": 2.125786781311035, "learning_rate": 2.0464400256574727e-06, "loss": 0.864, "step": 50150 }, { "epoch": 0.6112512644266511, "grad_norm": 1.9854005575180054, "learning_rate": 2.046119307248236e-06, "loss": 0.8082, "step": 50155 }, { "epoch": 0.6113122006507988, "grad_norm": 1.995469570159912, "learning_rate": 2.0457985888389996e-06, "loss": 0.8294, "step": 50160 }, { "epoch": 0.6113731368749467, "grad_norm": 1.9010854959487915, "learning_rate": 2.045477870429763e-06, "loss": 0.8237, "step": 50165 }, { "epoch": 0.6114340730990945, "grad_norm": 1.7708550691604614, "learning_rate": 2.0451571520205265e-06, "loss": 0.8092, "step": 50170 }, { "epoch": 0.6114950093232423, "grad_norm": 2.242636203765869, "learning_rate": 2.0448364336112895e-06, "loss": 0.7889, "step": 50175 }, { "epoch": 0.6115559455473901, "grad_norm": 1.99323308467865, "learning_rate": 2.044515715202053e-06, "loss": 0.8741, "step": 50180 }, { "epoch": 0.611616881771538, "grad_norm": 1.6785441637039185, "learning_rate": 2.044194996792816e-06, "loss": 0.7228, "step": 50185 }, { "epoch": 0.6116778179956858, "grad_norm": 2.1836700439453125, "learning_rate": 2.0438742783835794e-06, "loss": 0.8762, "step": 50190 }, { "epoch": 0.6117387542198335, "grad_norm": 2.2035036087036133, "learning_rate": 2.043553559974343e-06, "loss": 0.8432, "step": 50195 }, { "epoch": 0.6117996904439813, "grad_norm": 1.9494127035140991, "learning_rate": 2.043232841565106e-06, "loss": 0.8614, "step": 50200 }, { "epoch": 0.6118606266681291, "grad_norm": 2.0642266273498535, "learning_rate": 2.0429121231558693e-06, "loss": 0.8769, "step": 50205 }, { "epoch": 0.611921562892277, "grad_norm": 1.8668464422225952, "learning_rate": 2.0425914047466327e-06, "loss": 0.8128, "step": 50210 }, { "epoch": 0.6119824991164248, "grad_norm": 2.066760778427124, "learning_rate": 2.0422706863373958e-06, "loss": 0.8035, "step": 50215 }, { "epoch": 0.6120434353405726, "grad_norm": 1.9887834787368774, "learning_rate": 2.041949967928159e-06, "loss": 0.7658, "step": 50220 }, { "epoch": 0.6121043715647204, "grad_norm": 1.8570857048034668, "learning_rate": 2.0416292495189226e-06, "loss": 0.8564, "step": 50225 }, { "epoch": 0.6121653077888681, "grad_norm": 1.8960200548171997, "learning_rate": 2.0413085311096857e-06, "loss": 0.8139, "step": 50230 }, { "epoch": 0.612226244013016, "grad_norm": 1.9165067672729492, "learning_rate": 2.040987812700449e-06, "loss": 0.7704, "step": 50235 }, { "epoch": 0.6122871802371638, "grad_norm": 2.390083074569702, "learning_rate": 2.0406670942912125e-06, "loss": 0.8139, "step": 50240 }, { "epoch": 0.6123481164613116, "grad_norm": 2.1867032051086426, "learning_rate": 2.040346375881976e-06, "loss": 0.8241, "step": 50245 }, { "epoch": 0.6124090526854594, "grad_norm": 1.7231067419052124, "learning_rate": 2.0400256574727394e-06, "loss": 0.8209, "step": 50250 }, { "epoch": 0.6124699889096072, "grad_norm": 1.8490638732910156, "learning_rate": 2.0397049390635024e-06, "loss": 0.8751, "step": 50255 }, { "epoch": 0.6125309251337551, "grad_norm": 1.897413730621338, "learning_rate": 2.039384220654266e-06, "loss": 0.8193, "step": 50260 }, { "epoch": 0.6125918613579028, "grad_norm": 2.3339853286743164, "learning_rate": 2.039063502245029e-06, "loss": 0.8391, "step": 50265 }, { "epoch": 0.6126527975820506, "grad_norm": 1.671625018119812, "learning_rate": 2.0387427838357923e-06, "loss": 0.8462, "step": 50270 }, { "epoch": 0.6127137338061984, "grad_norm": 2.3323020935058594, "learning_rate": 2.0384220654265558e-06, "loss": 0.8145, "step": 50275 }, { "epoch": 0.6127746700303462, "grad_norm": 2.258613109588623, "learning_rate": 2.0381013470173188e-06, "loss": 0.8596, "step": 50280 }, { "epoch": 0.6128356062544941, "grad_norm": 1.8823186159133911, "learning_rate": 2.0377806286080822e-06, "loss": 0.7573, "step": 50285 }, { "epoch": 0.6128965424786419, "grad_norm": 1.888852596282959, "learning_rate": 2.0374599101988457e-06, "loss": 0.8235, "step": 50290 }, { "epoch": 0.6129574787027897, "grad_norm": 1.9576181173324585, "learning_rate": 2.0371391917896087e-06, "loss": 0.8385, "step": 50295 }, { "epoch": 0.6130184149269374, "grad_norm": 1.9130545854568481, "learning_rate": 2.036818473380372e-06, "loss": 0.8343, "step": 50300 }, { "epoch": 0.6130793511510853, "grad_norm": 1.9427490234375, "learning_rate": 2.0364977549711356e-06, "loss": 0.8735, "step": 50305 }, { "epoch": 0.6131402873752331, "grad_norm": 1.7885384559631348, "learning_rate": 2.036177036561899e-06, "loss": 0.8575, "step": 50310 }, { "epoch": 0.6132012235993809, "grad_norm": 2.436807632446289, "learning_rate": 2.035856318152662e-06, "loss": 0.8035, "step": 50315 }, { "epoch": 0.6132621598235287, "grad_norm": 1.9069759845733643, "learning_rate": 2.0355355997434255e-06, "loss": 0.8007, "step": 50320 }, { "epoch": 0.6133230960476765, "grad_norm": 1.785160779953003, "learning_rate": 2.035214881334189e-06, "loss": 0.7965, "step": 50325 }, { "epoch": 0.6133840322718243, "grad_norm": 2.010856866836548, "learning_rate": 2.0348941629249523e-06, "loss": 0.8391, "step": 50330 }, { "epoch": 0.6134449684959721, "grad_norm": 2.0154852867126465, "learning_rate": 2.0345734445157154e-06, "loss": 0.8542, "step": 50335 }, { "epoch": 0.6135059047201199, "grad_norm": 1.9863697290420532, "learning_rate": 2.034252726106479e-06, "loss": 0.7778, "step": 50340 }, { "epoch": 0.6135668409442677, "grad_norm": 3.0227155685424805, "learning_rate": 2.033932007697242e-06, "loss": 0.8224, "step": 50345 }, { "epoch": 0.6136277771684155, "grad_norm": 1.971988558769226, "learning_rate": 2.0336112892880053e-06, "loss": 0.8501, "step": 50350 }, { "epoch": 0.6136887133925634, "grad_norm": 1.8337435722351074, "learning_rate": 2.0332905708787687e-06, "loss": 0.7872, "step": 50355 }, { "epoch": 0.6137496496167112, "grad_norm": 1.8969649076461792, "learning_rate": 2.0329698524695317e-06, "loss": 0.8127, "step": 50360 }, { "epoch": 0.6138105858408589, "grad_norm": 1.912326455116272, "learning_rate": 2.032649134060295e-06, "loss": 0.8511, "step": 50365 }, { "epoch": 0.6138715220650067, "grad_norm": 1.9225925207138062, "learning_rate": 2.0323284156510586e-06, "loss": 0.7613, "step": 50370 }, { "epoch": 0.6139324582891545, "grad_norm": 2.169119358062744, "learning_rate": 2.0320076972418216e-06, "loss": 0.948, "step": 50375 }, { "epoch": 0.6139933945133024, "grad_norm": 2.0436387062072754, "learning_rate": 2.031686978832585e-06, "loss": 0.8072, "step": 50380 }, { "epoch": 0.6140543307374502, "grad_norm": 1.8639873266220093, "learning_rate": 2.0313662604233485e-06, "loss": 0.8905, "step": 50385 }, { "epoch": 0.614115266961598, "grad_norm": 1.8058600425720215, "learning_rate": 2.031045542014112e-06, "loss": 0.8049, "step": 50390 }, { "epoch": 0.6141762031857458, "grad_norm": 1.7567108869552612, "learning_rate": 2.030724823604875e-06, "loss": 0.7536, "step": 50395 }, { "epoch": 0.6142371394098936, "grad_norm": 2.3038690090179443, "learning_rate": 2.0304041051956384e-06, "loss": 0.7991, "step": 50400 }, { "epoch": 0.6142980756340414, "grad_norm": 1.9050204753875732, "learning_rate": 2.030083386786402e-06, "loss": 0.7871, "step": 50405 }, { "epoch": 0.6143590118581892, "grad_norm": 2.3780858516693115, "learning_rate": 2.0297626683771653e-06, "loss": 0.8781, "step": 50410 }, { "epoch": 0.614419948082337, "grad_norm": 1.8987559080123901, "learning_rate": 2.0294419499679283e-06, "loss": 0.8076, "step": 50415 }, { "epoch": 0.6144808843064848, "grad_norm": 2.1297943592071533, "learning_rate": 2.0291212315586917e-06, "loss": 0.7623, "step": 50420 }, { "epoch": 0.6145418205306327, "grad_norm": 2.548509359359741, "learning_rate": 2.0288005131494547e-06, "loss": 0.832, "step": 50425 }, { "epoch": 0.6146027567547805, "grad_norm": 4.51983118057251, "learning_rate": 2.028479794740218e-06, "loss": 0.8068, "step": 50430 }, { "epoch": 0.6146636929789282, "grad_norm": 2.1375622749328613, "learning_rate": 2.0281590763309816e-06, "loss": 0.8434, "step": 50435 }, { "epoch": 0.614724629203076, "grad_norm": 2.5346741676330566, "learning_rate": 2.0278383579217446e-06, "loss": 0.8636, "step": 50440 }, { "epoch": 0.6147855654272238, "grad_norm": 1.9932039976119995, "learning_rate": 2.027517639512508e-06, "loss": 0.8419, "step": 50445 }, { "epoch": 0.6148465016513717, "grad_norm": 1.9113637208938599, "learning_rate": 2.0271969211032715e-06, "loss": 0.8657, "step": 50450 }, { "epoch": 0.6149074378755195, "grad_norm": 1.9974229335784912, "learning_rate": 2.0268762026940345e-06, "loss": 0.8124, "step": 50455 }, { "epoch": 0.6149683740996673, "grad_norm": 1.8298145532608032, "learning_rate": 2.026555484284798e-06, "loss": 0.8319, "step": 50460 }, { "epoch": 0.6150293103238151, "grad_norm": 1.9566035270690918, "learning_rate": 2.0262347658755614e-06, "loss": 0.8364, "step": 50465 }, { "epoch": 0.6150902465479628, "grad_norm": 1.9575825929641724, "learning_rate": 2.025914047466325e-06, "loss": 0.8457, "step": 50470 }, { "epoch": 0.6151511827721107, "grad_norm": 1.8841149806976318, "learning_rate": 2.0255933290570883e-06, "loss": 0.8776, "step": 50475 }, { "epoch": 0.6152121189962585, "grad_norm": 1.7504316568374634, "learning_rate": 2.0252726106478513e-06, "loss": 0.8585, "step": 50480 }, { "epoch": 0.6152730552204063, "grad_norm": 2.1425559520721436, "learning_rate": 2.0249518922386148e-06, "loss": 0.7613, "step": 50485 }, { "epoch": 0.6153339914445541, "grad_norm": 1.803637146949768, "learning_rate": 2.024631173829378e-06, "loss": 0.8139, "step": 50490 }, { "epoch": 0.615394927668702, "grad_norm": 2.1132726669311523, "learning_rate": 2.0243104554201412e-06, "loss": 0.8014, "step": 50495 }, { "epoch": 0.6154558638928498, "grad_norm": 1.7972919940948486, "learning_rate": 2.0239897370109047e-06, "loss": 0.7832, "step": 50500 }, { "epoch": 0.6155168001169975, "grad_norm": 2.06671142578125, "learning_rate": 2.023669018601668e-06, "loss": 0.7766, "step": 50505 }, { "epoch": 0.6155777363411453, "grad_norm": 2.0484468936920166, "learning_rate": 2.023348300192431e-06, "loss": 0.8184, "step": 50510 }, { "epoch": 0.6156386725652931, "grad_norm": 1.846760869026184, "learning_rate": 2.0230275817831946e-06, "loss": 0.8006, "step": 50515 }, { "epoch": 0.615699608789441, "grad_norm": 1.9858949184417725, "learning_rate": 2.0227068633739576e-06, "loss": 0.8452, "step": 50520 }, { "epoch": 0.6157605450135888, "grad_norm": 2.0405802726745605, "learning_rate": 2.022386144964721e-06, "loss": 0.8493, "step": 50525 }, { "epoch": 0.6158214812377366, "grad_norm": 1.9293776750564575, "learning_rate": 2.0220654265554845e-06, "loss": 0.8225, "step": 50530 }, { "epoch": 0.6158824174618844, "grad_norm": 2.306429862976074, "learning_rate": 2.021744708146248e-06, "loss": 0.7684, "step": 50535 }, { "epoch": 0.6159433536860321, "grad_norm": 1.9110474586486816, "learning_rate": 2.021423989737011e-06, "loss": 0.8382, "step": 50540 }, { "epoch": 0.61600428991018, "grad_norm": 2.0048365592956543, "learning_rate": 2.0211032713277743e-06, "loss": 0.8127, "step": 50545 }, { "epoch": 0.6160652261343278, "grad_norm": 2.0508055686950684, "learning_rate": 2.0207825529185378e-06, "loss": 0.7826, "step": 50550 }, { "epoch": 0.6161261623584756, "grad_norm": 1.9668649435043335, "learning_rate": 2.0204618345093012e-06, "loss": 0.8113, "step": 50555 }, { "epoch": 0.6161870985826234, "grad_norm": 1.8409901857376099, "learning_rate": 2.0201411161000642e-06, "loss": 0.853, "step": 50560 }, { "epoch": 0.6162480348067713, "grad_norm": 2.1843831539154053, "learning_rate": 2.0198203976908277e-06, "loss": 0.7667, "step": 50565 }, { "epoch": 0.6163089710309191, "grad_norm": 2.286484956741333, "learning_rate": 2.019499679281591e-06, "loss": 0.844, "step": 50570 }, { "epoch": 0.6163699072550668, "grad_norm": 1.6744673252105713, "learning_rate": 2.019178960872354e-06, "loss": 0.8253, "step": 50575 }, { "epoch": 0.6164308434792146, "grad_norm": 2.001143455505371, "learning_rate": 2.0188582424631176e-06, "loss": 0.834, "step": 50580 }, { "epoch": 0.6164917797033624, "grad_norm": 1.8020395040512085, "learning_rate": 2.018537524053881e-06, "loss": 0.7999, "step": 50585 }, { "epoch": 0.6165527159275103, "grad_norm": 1.7987505197525024, "learning_rate": 2.018216805644644e-06, "loss": 0.9043, "step": 50590 }, { "epoch": 0.6166136521516581, "grad_norm": 1.6394578218460083, "learning_rate": 2.0178960872354075e-06, "loss": 0.8544, "step": 50595 }, { "epoch": 0.6166745883758059, "grad_norm": 1.916480302810669, "learning_rate": 2.0175753688261705e-06, "loss": 0.8368, "step": 50600 }, { "epoch": 0.6167355245999537, "grad_norm": 2.449068546295166, "learning_rate": 2.017254650416934e-06, "loss": 0.8443, "step": 50605 }, { "epoch": 0.6167964608241014, "grad_norm": 2.67026948928833, "learning_rate": 2.0169339320076974e-06, "loss": 0.8853, "step": 50610 }, { "epoch": 0.6168573970482493, "grad_norm": 1.5960745811462402, "learning_rate": 2.016613213598461e-06, "loss": 0.7579, "step": 50615 }, { "epoch": 0.6169183332723971, "grad_norm": 2.175990343093872, "learning_rate": 2.016292495189224e-06, "loss": 0.797, "step": 50620 }, { "epoch": 0.6169792694965449, "grad_norm": 1.9548834562301636, "learning_rate": 2.0159717767799873e-06, "loss": 0.8059, "step": 50625 }, { "epoch": 0.6170402057206927, "grad_norm": 2.125781774520874, "learning_rate": 2.0156510583707507e-06, "loss": 0.831, "step": 50630 }, { "epoch": 0.6171011419448406, "grad_norm": 2.1227893829345703, "learning_rate": 2.015330339961514e-06, "loss": 0.8066, "step": 50635 }, { "epoch": 0.6171620781689884, "grad_norm": 1.9301947355270386, "learning_rate": 2.015009621552277e-06, "loss": 0.8185, "step": 50640 }, { "epoch": 0.6172230143931361, "grad_norm": 1.8902990818023682, "learning_rate": 2.0146889031430406e-06, "loss": 0.8364, "step": 50645 }, { "epoch": 0.6172839506172839, "grad_norm": 2.1119534969329834, "learning_rate": 2.014368184733804e-06, "loss": 0.7565, "step": 50650 }, { "epoch": 0.6173448868414317, "grad_norm": 2.372076988220215, "learning_rate": 2.014047466324567e-06, "loss": 0.7676, "step": 50655 }, { "epoch": 0.6174058230655796, "grad_norm": 1.6767524480819702, "learning_rate": 2.0137267479153305e-06, "loss": 0.8679, "step": 50660 }, { "epoch": 0.6174667592897274, "grad_norm": 1.9658944606781006, "learning_rate": 2.013406029506094e-06, "loss": 0.8061, "step": 50665 }, { "epoch": 0.6175276955138752, "grad_norm": 2.327780246734619, "learning_rate": 2.013085311096857e-06, "loss": 0.8009, "step": 50670 }, { "epoch": 0.617588631738023, "grad_norm": 1.7367150783538818, "learning_rate": 2.0127645926876204e-06, "loss": 0.7917, "step": 50675 }, { "epoch": 0.6176495679621707, "grad_norm": 1.887738823890686, "learning_rate": 2.0124438742783834e-06, "loss": 0.7741, "step": 50680 }, { "epoch": 0.6177105041863186, "grad_norm": 1.9179366827011108, "learning_rate": 2.012123155869147e-06, "loss": 0.7996, "step": 50685 }, { "epoch": 0.6177714404104664, "grad_norm": 2.211639165878296, "learning_rate": 2.0118024374599103e-06, "loss": 0.8147, "step": 50690 }, { "epoch": 0.6178323766346142, "grad_norm": 2.2686495780944824, "learning_rate": 2.0114817190506737e-06, "loss": 0.7552, "step": 50695 }, { "epoch": 0.617893312858762, "grad_norm": 1.8610773086547852, "learning_rate": 2.011161000641437e-06, "loss": 0.8049, "step": 50700 }, { "epoch": 0.6179542490829099, "grad_norm": 1.9211275577545166, "learning_rate": 2.0108402822322e-06, "loss": 0.8651, "step": 50705 }, { "epoch": 0.6180151853070577, "grad_norm": 1.8588578701019287, "learning_rate": 2.0105195638229636e-06, "loss": 0.8425, "step": 50710 }, { "epoch": 0.6180761215312054, "grad_norm": 1.8396159410476685, "learning_rate": 2.010198845413727e-06, "loss": 0.8186, "step": 50715 }, { "epoch": 0.6181370577553532, "grad_norm": 1.8198310136795044, "learning_rate": 2.00987812700449e-06, "loss": 0.8185, "step": 50720 }, { "epoch": 0.618197993979501, "grad_norm": 1.8363655805587769, "learning_rate": 2.0095574085952535e-06, "loss": 0.8376, "step": 50725 }, { "epoch": 0.6182589302036489, "grad_norm": 2.6571450233459473, "learning_rate": 2.009236690186017e-06, "loss": 0.7886, "step": 50730 }, { "epoch": 0.6183198664277967, "grad_norm": 2.0752439498901367, "learning_rate": 2.00891597177678e-06, "loss": 0.9012, "step": 50735 }, { "epoch": 0.6183808026519445, "grad_norm": 2.0614771842956543, "learning_rate": 2.0085952533675434e-06, "loss": 0.8364, "step": 50740 }, { "epoch": 0.6184417388760923, "grad_norm": 1.8590844869613647, "learning_rate": 2.008274534958307e-06, "loss": 0.8741, "step": 50745 }, { "epoch": 0.61850267510024, "grad_norm": 1.9501372575759888, "learning_rate": 2.00795381654907e-06, "loss": 0.7816, "step": 50750 }, { "epoch": 0.6185636113243879, "grad_norm": 1.7020529508590698, "learning_rate": 2.0076330981398333e-06, "loss": 0.8383, "step": 50755 }, { "epoch": 0.6186245475485357, "grad_norm": 2.1980884075164795, "learning_rate": 2.0073123797305968e-06, "loss": 0.8155, "step": 50760 }, { "epoch": 0.6186854837726835, "grad_norm": 1.80702543258667, "learning_rate": 2.00699166132136e-06, "loss": 0.8602, "step": 50765 }, { "epoch": 0.6187464199968313, "grad_norm": 2.223680019378662, "learning_rate": 2.0066709429121232e-06, "loss": 0.8309, "step": 50770 }, { "epoch": 0.6188073562209792, "grad_norm": 1.8495495319366455, "learning_rate": 2.0063502245028867e-06, "loss": 0.841, "step": 50775 }, { "epoch": 0.618868292445127, "grad_norm": 1.9531306028366089, "learning_rate": 2.00602950609365e-06, "loss": 0.8075, "step": 50780 }, { "epoch": 0.6189292286692747, "grad_norm": 2.048114776611328, "learning_rate": 2.0057087876844136e-06, "loss": 0.8034, "step": 50785 }, { "epoch": 0.6189901648934225, "grad_norm": 2.1707684993743896, "learning_rate": 2.0053880692751766e-06, "loss": 0.8259, "step": 50790 }, { "epoch": 0.6190511011175703, "grad_norm": 1.7787785530090332, "learning_rate": 2.00506735086594e-06, "loss": 0.8185, "step": 50795 }, { "epoch": 0.6191120373417182, "grad_norm": 1.8226959705352783, "learning_rate": 2.0047466324567034e-06, "loss": 0.7839, "step": 50800 }, { "epoch": 0.619172973565866, "grad_norm": 1.8552110195159912, "learning_rate": 2.0044259140474665e-06, "loss": 0.7392, "step": 50805 }, { "epoch": 0.6192339097900138, "grad_norm": 1.8648868799209595, "learning_rate": 2.00410519563823e-06, "loss": 0.8386, "step": 50810 }, { "epoch": 0.6192948460141616, "grad_norm": 2.026935338973999, "learning_rate": 2.003784477228993e-06, "loss": 0.8087, "step": 50815 }, { "epoch": 0.6193557822383093, "grad_norm": 2.0401828289031982, "learning_rate": 2.0034637588197564e-06, "loss": 0.8174, "step": 50820 }, { "epoch": 0.6194167184624572, "grad_norm": 1.9322367906570435, "learning_rate": 2.00314304041052e-06, "loss": 0.8625, "step": 50825 }, { "epoch": 0.619477654686605, "grad_norm": 2.0066826343536377, "learning_rate": 2.002822322001283e-06, "loss": 0.8302, "step": 50830 }, { "epoch": 0.6195385909107528, "grad_norm": 1.685353398323059, "learning_rate": 2.0025016035920463e-06, "loss": 0.8469, "step": 50835 }, { "epoch": 0.6195995271349006, "grad_norm": 1.9899942874908447, "learning_rate": 2.0021808851828097e-06, "loss": 0.849, "step": 50840 }, { "epoch": 0.6196604633590485, "grad_norm": 1.991561770439148, "learning_rate": 2.0018601667735727e-06, "loss": 0.8052, "step": 50845 }, { "epoch": 0.6197213995831963, "grad_norm": 1.950513482093811, "learning_rate": 2.001539448364336e-06, "loss": 0.847, "step": 50850 }, { "epoch": 0.619782335807344, "grad_norm": 1.7061364650726318, "learning_rate": 2.0012187299550996e-06, "loss": 0.7897, "step": 50855 }, { "epoch": 0.6198432720314918, "grad_norm": 1.9478453397750854, "learning_rate": 2.000898011545863e-06, "loss": 0.8361, "step": 50860 }, { "epoch": 0.6199042082556396, "grad_norm": 1.8949377536773682, "learning_rate": 2.0005772931366265e-06, "loss": 0.7878, "step": 50865 }, { "epoch": 0.6199651444797875, "grad_norm": 2.0537221431732178, "learning_rate": 2.0002565747273895e-06, "loss": 0.8559, "step": 50870 }, { "epoch": 0.6200260807039353, "grad_norm": 1.859194040298462, "learning_rate": 1.999935856318153e-06, "loss": 0.8344, "step": 50875 }, { "epoch": 0.6200870169280831, "grad_norm": 1.7615916728973389, "learning_rate": 1.9996151379089164e-06, "loss": 0.8016, "step": 50880 }, { "epoch": 0.6201479531522309, "grad_norm": 2.014357328414917, "learning_rate": 1.9992944194996794e-06, "loss": 0.7825, "step": 50885 }, { "epoch": 0.6202088893763786, "grad_norm": 1.8257697820663452, "learning_rate": 1.998973701090443e-06, "loss": 0.8365, "step": 50890 }, { "epoch": 0.6202698256005265, "grad_norm": 2.051344871520996, "learning_rate": 1.998652982681206e-06, "loss": 0.7942, "step": 50895 }, { "epoch": 0.6203307618246743, "grad_norm": 2.1174476146698, "learning_rate": 1.9983322642719693e-06, "loss": 0.8134, "step": 50900 }, { "epoch": 0.6203916980488221, "grad_norm": 2.369209051132202, "learning_rate": 1.9980115458627327e-06, "loss": 0.9251, "step": 50905 }, { "epoch": 0.6204526342729699, "grad_norm": 1.7416191101074219, "learning_rate": 1.9976908274534957e-06, "loss": 0.7595, "step": 50910 }, { "epoch": 0.6205135704971178, "grad_norm": 2.2678370475769043, "learning_rate": 1.997370109044259e-06, "loss": 0.8016, "step": 50915 }, { "epoch": 0.6205745067212656, "grad_norm": 1.9102548360824585, "learning_rate": 1.9970493906350226e-06, "loss": 0.8238, "step": 50920 }, { "epoch": 0.6206354429454133, "grad_norm": 1.8862310647964478, "learning_rate": 1.996728672225786e-06, "loss": 0.9108, "step": 50925 }, { "epoch": 0.6206963791695611, "grad_norm": 1.8391894102096558, "learning_rate": 1.996407953816549e-06, "loss": 0.9016, "step": 50930 }, { "epoch": 0.6207573153937089, "grad_norm": 1.9932018518447876, "learning_rate": 1.9960872354073125e-06, "loss": 0.8125, "step": 50935 }, { "epoch": 0.6208182516178568, "grad_norm": 2.0149924755096436, "learning_rate": 1.995766516998076e-06, "loss": 0.8135, "step": 50940 }, { "epoch": 0.6208791878420046, "grad_norm": 2.2211172580718994, "learning_rate": 1.9954457985888394e-06, "loss": 0.8373, "step": 50945 }, { "epoch": 0.6209401240661524, "grad_norm": 2.017282724380493, "learning_rate": 1.9951250801796024e-06, "loss": 0.8458, "step": 50950 }, { "epoch": 0.6210010602903002, "grad_norm": 1.8825839757919312, "learning_rate": 1.994804361770366e-06, "loss": 0.8426, "step": 50955 }, { "epoch": 0.6210619965144479, "grad_norm": 2.0800013542175293, "learning_rate": 1.9944836433611293e-06, "loss": 0.8346, "step": 50960 }, { "epoch": 0.6211229327385958, "grad_norm": 1.6968878507614136, "learning_rate": 1.9941629249518923e-06, "loss": 0.9415, "step": 50965 }, { "epoch": 0.6211838689627436, "grad_norm": 2.1913421154022217, "learning_rate": 1.9938422065426558e-06, "loss": 0.7678, "step": 50970 }, { "epoch": 0.6212448051868914, "grad_norm": 1.669425129890442, "learning_rate": 1.9935214881334188e-06, "loss": 0.7836, "step": 50975 }, { "epoch": 0.6213057414110392, "grad_norm": 2.562297821044922, "learning_rate": 1.9932007697241822e-06, "loss": 0.8827, "step": 50980 }, { "epoch": 0.621366677635187, "grad_norm": 2.0563485622406006, "learning_rate": 1.9928800513149457e-06, "loss": 0.86, "step": 50985 }, { "epoch": 0.6214276138593349, "grad_norm": 1.5815387964248657, "learning_rate": 1.9925593329057087e-06, "loss": 0.7931, "step": 50990 }, { "epoch": 0.6214885500834826, "grad_norm": 1.7631642818450928, "learning_rate": 1.992238614496472e-06, "loss": 0.8236, "step": 50995 }, { "epoch": 0.6215494863076304, "grad_norm": 2.2883803844451904, "learning_rate": 1.9919178960872356e-06, "loss": 0.7986, "step": 51000 }, { "epoch": 0.6216104225317782, "grad_norm": 2.3114960193634033, "learning_rate": 1.991597177677999e-06, "loss": 0.8705, "step": 51005 }, { "epoch": 0.621671358755926, "grad_norm": 1.8365323543548584, "learning_rate": 1.9912764592687624e-06, "loss": 0.8497, "step": 51010 }, { "epoch": 0.6217322949800739, "grad_norm": 1.7611286640167236, "learning_rate": 1.9909557408595255e-06, "loss": 0.8696, "step": 51015 }, { "epoch": 0.6217932312042217, "grad_norm": 2.1897542476654053, "learning_rate": 1.990635022450289e-06, "loss": 0.8575, "step": 51020 }, { "epoch": 0.6218541674283695, "grad_norm": 1.9385759830474854, "learning_rate": 1.9903143040410523e-06, "loss": 0.8086, "step": 51025 }, { "epoch": 0.6219151036525172, "grad_norm": 2.130082845687866, "learning_rate": 1.9899935856318154e-06, "loss": 0.8712, "step": 51030 }, { "epoch": 0.621976039876665, "grad_norm": 1.8544700145721436, "learning_rate": 1.989672867222579e-06, "loss": 0.8362, "step": 51035 }, { "epoch": 0.6220369761008129, "grad_norm": 1.968264102935791, "learning_rate": 1.9893521488133422e-06, "loss": 0.8214, "step": 51040 }, { "epoch": 0.6220979123249607, "grad_norm": 2.83088755607605, "learning_rate": 1.9890314304041052e-06, "loss": 0.8464, "step": 51045 }, { "epoch": 0.6221588485491085, "grad_norm": 1.605012059211731, "learning_rate": 1.9887107119948687e-06, "loss": 0.8075, "step": 51050 }, { "epoch": 0.6222197847732563, "grad_norm": 2.101588249206543, "learning_rate": 1.9883899935856317e-06, "loss": 0.8739, "step": 51055 }, { "epoch": 0.6222807209974042, "grad_norm": 2.7490806579589844, "learning_rate": 1.988069275176395e-06, "loss": 0.7669, "step": 51060 }, { "epoch": 0.6223416572215519, "grad_norm": 2.1139492988586426, "learning_rate": 1.9877485567671586e-06, "loss": 0.847, "step": 51065 }, { "epoch": 0.6224025934456997, "grad_norm": 2.053272247314453, "learning_rate": 1.9874278383579216e-06, "loss": 0.7899, "step": 51070 }, { "epoch": 0.6224635296698475, "grad_norm": 2.2988975048065186, "learning_rate": 1.987107119948685e-06, "loss": 0.7856, "step": 51075 }, { "epoch": 0.6225244658939953, "grad_norm": 2.344186782836914, "learning_rate": 1.9867864015394485e-06, "loss": 0.8362, "step": 51080 }, { "epoch": 0.6225854021181432, "grad_norm": 2.1375393867492676, "learning_rate": 1.986465683130212e-06, "loss": 0.8302, "step": 51085 }, { "epoch": 0.622646338342291, "grad_norm": 2.176928758621216, "learning_rate": 1.9861449647209754e-06, "loss": 0.7997, "step": 51090 }, { "epoch": 0.6227072745664388, "grad_norm": 1.8903518915176392, "learning_rate": 1.9858242463117384e-06, "loss": 0.8368, "step": 51095 }, { "epoch": 0.6227682107905865, "grad_norm": 1.8678810596466064, "learning_rate": 1.985503527902502e-06, "loss": 0.8301, "step": 51100 }, { "epoch": 0.6228291470147344, "grad_norm": 1.624058485031128, "learning_rate": 1.9851828094932653e-06, "loss": 0.8065, "step": 51105 }, { "epoch": 0.6228900832388822, "grad_norm": 1.810117483139038, "learning_rate": 1.9848620910840283e-06, "loss": 0.8254, "step": 51110 }, { "epoch": 0.62295101946303, "grad_norm": 1.9760087728500366, "learning_rate": 1.9845413726747917e-06, "loss": 0.7679, "step": 51115 }, { "epoch": 0.6230119556871778, "grad_norm": 2.2909140586853027, "learning_rate": 1.984220654265555e-06, "loss": 0.837, "step": 51120 }, { "epoch": 0.6230728919113256, "grad_norm": 2.2241265773773193, "learning_rate": 1.983899935856318e-06, "loss": 0.7948, "step": 51125 }, { "epoch": 0.6231338281354735, "grad_norm": 1.9937258958816528, "learning_rate": 1.9835792174470816e-06, "loss": 0.8518, "step": 51130 }, { "epoch": 0.6231947643596212, "grad_norm": 2.1927995681762695, "learning_rate": 1.983258499037845e-06, "loss": 0.9045, "step": 51135 }, { "epoch": 0.623255700583769, "grad_norm": 1.9708378314971924, "learning_rate": 1.982937780628608e-06, "loss": 0.8362, "step": 51140 }, { "epoch": 0.6233166368079168, "grad_norm": 2.0568621158599854, "learning_rate": 1.9826170622193715e-06, "loss": 0.753, "step": 51145 }, { "epoch": 0.6233775730320646, "grad_norm": 2.1714718341827393, "learning_rate": 1.982296343810135e-06, "loss": 0.7197, "step": 51150 }, { "epoch": 0.6234385092562125, "grad_norm": 2.1428325176239014, "learning_rate": 1.981975625400898e-06, "loss": 0.8446, "step": 51155 }, { "epoch": 0.6234994454803603, "grad_norm": 2.1772513389587402, "learning_rate": 1.9816549069916614e-06, "loss": 0.9173, "step": 51160 }, { "epoch": 0.6235603817045081, "grad_norm": 2.0887932777404785, "learning_rate": 1.981334188582425e-06, "loss": 0.8162, "step": 51165 }, { "epoch": 0.6236213179286558, "grad_norm": 1.8676502704620361, "learning_rate": 1.9810134701731883e-06, "loss": 0.802, "step": 51170 }, { "epoch": 0.6236822541528037, "grad_norm": 2.151200294494629, "learning_rate": 1.9806927517639517e-06, "loss": 0.7674, "step": 51175 }, { "epoch": 0.6237431903769515, "grad_norm": 1.9683977365493774, "learning_rate": 1.9803720333547147e-06, "loss": 0.8252, "step": 51180 }, { "epoch": 0.6238041266010993, "grad_norm": 1.854284405708313, "learning_rate": 1.980051314945478e-06, "loss": 0.808, "step": 51185 }, { "epoch": 0.6238650628252471, "grad_norm": 2.2230231761932373, "learning_rate": 1.979730596536241e-06, "loss": 0.8082, "step": 51190 }, { "epoch": 0.6239259990493949, "grad_norm": 1.8995108604431152, "learning_rate": 1.9794098781270046e-06, "loss": 0.8179, "step": 51195 }, { "epoch": 0.6239869352735428, "grad_norm": 2.127838611602783, "learning_rate": 1.979089159717768e-06, "loss": 0.7865, "step": 51200 }, { "epoch": 0.6240478714976905, "grad_norm": 2.2730519771575928, "learning_rate": 1.978768441308531e-06, "loss": 0.8438, "step": 51205 }, { "epoch": 0.6241088077218383, "grad_norm": 2.0640203952789307, "learning_rate": 1.9784477228992945e-06, "loss": 0.8423, "step": 51210 }, { "epoch": 0.6241697439459861, "grad_norm": 1.9138339757919312, "learning_rate": 1.978127004490058e-06, "loss": 0.8107, "step": 51215 }, { "epoch": 0.624230680170134, "grad_norm": 1.7102887630462646, "learning_rate": 1.977806286080821e-06, "loss": 0.7877, "step": 51220 }, { "epoch": 0.6242916163942818, "grad_norm": 2.0318546295166016, "learning_rate": 1.9774855676715844e-06, "loss": 0.9133, "step": 51225 }, { "epoch": 0.6243525526184296, "grad_norm": 1.8840919733047485, "learning_rate": 1.977164849262348e-06, "loss": 0.8233, "step": 51230 }, { "epoch": 0.6244134888425774, "grad_norm": 1.8977196216583252, "learning_rate": 1.9768441308531113e-06, "loss": 0.769, "step": 51235 }, { "epoch": 0.6244744250667251, "grad_norm": 2.4580955505371094, "learning_rate": 1.9765234124438743e-06, "loss": 0.8483, "step": 51240 }, { "epoch": 0.624535361290873, "grad_norm": 1.9151806831359863, "learning_rate": 1.9762026940346378e-06, "loss": 0.8273, "step": 51245 }, { "epoch": 0.6245962975150208, "grad_norm": 2.1151583194732666, "learning_rate": 1.9758819756254012e-06, "loss": 0.7918, "step": 51250 }, { "epoch": 0.6246572337391686, "grad_norm": 1.8492534160614014, "learning_rate": 1.9755612572161647e-06, "loss": 0.7622, "step": 51255 }, { "epoch": 0.6247181699633164, "grad_norm": 2.0351953506469727, "learning_rate": 1.9752405388069277e-06, "loss": 0.857, "step": 51260 }, { "epoch": 0.6247791061874642, "grad_norm": 1.7674264907836914, "learning_rate": 1.974919820397691e-06, "loss": 0.8974, "step": 51265 }, { "epoch": 0.6248400424116121, "grad_norm": 2.1340553760528564, "learning_rate": 1.974599101988454e-06, "loss": 0.836, "step": 51270 }, { "epoch": 0.6249009786357598, "grad_norm": 1.9979047775268555, "learning_rate": 1.9742783835792176e-06, "loss": 0.7666, "step": 51275 }, { "epoch": 0.6249619148599076, "grad_norm": 1.863659381866455, "learning_rate": 1.973957665169981e-06, "loss": 0.8873, "step": 51280 }, { "epoch": 0.6250228510840554, "grad_norm": 1.677149772644043, "learning_rate": 1.973636946760744e-06, "loss": 0.8366, "step": 51285 }, { "epoch": 0.6250837873082032, "grad_norm": 2.0759129524230957, "learning_rate": 1.9733162283515075e-06, "loss": 0.8098, "step": 51290 }, { "epoch": 0.6251447235323511, "grad_norm": 1.6655113697052002, "learning_rate": 1.972995509942271e-06, "loss": 0.8484, "step": 51295 }, { "epoch": 0.6252056597564989, "grad_norm": 2.2530179023742676, "learning_rate": 1.972674791533034e-06, "loss": 0.8572, "step": 51300 }, { "epoch": 0.6252665959806466, "grad_norm": 2.155271291732788, "learning_rate": 1.9723540731237974e-06, "loss": 0.8574, "step": 51305 }, { "epoch": 0.6253275322047944, "grad_norm": 1.9273508787155151, "learning_rate": 1.972033354714561e-06, "loss": 0.8508, "step": 51310 }, { "epoch": 0.6253884684289422, "grad_norm": 1.9214611053466797, "learning_rate": 1.9717126363053242e-06, "loss": 0.8029, "step": 51315 }, { "epoch": 0.6254494046530901, "grad_norm": 1.991538405418396, "learning_rate": 1.9713919178960873e-06, "loss": 0.8514, "step": 51320 }, { "epoch": 0.6255103408772379, "grad_norm": 1.9721864461898804, "learning_rate": 1.9710711994868507e-06, "loss": 0.8291, "step": 51325 }, { "epoch": 0.6255712771013857, "grad_norm": 2.213709831237793, "learning_rate": 1.970750481077614e-06, "loss": 0.7951, "step": 51330 }, { "epoch": 0.6256322133255335, "grad_norm": 2.1246895790100098, "learning_rate": 1.9704297626683776e-06, "loss": 0.8169, "step": 51335 }, { "epoch": 0.6256931495496812, "grad_norm": 2.04768443107605, "learning_rate": 1.9701090442591406e-06, "loss": 0.8775, "step": 51340 }, { "epoch": 0.6257540857738291, "grad_norm": 1.724560260772705, "learning_rate": 1.969788325849904e-06, "loss": 0.7984, "step": 51345 }, { "epoch": 0.6258150219979769, "grad_norm": 2.1819145679473877, "learning_rate": 1.969467607440667e-06, "loss": 0.8949, "step": 51350 }, { "epoch": 0.6258759582221247, "grad_norm": 2.304147958755493, "learning_rate": 1.9691468890314305e-06, "loss": 0.7945, "step": 51355 }, { "epoch": 0.6259368944462725, "grad_norm": 1.9257662296295166, "learning_rate": 1.968826170622194e-06, "loss": 0.8311, "step": 51360 }, { "epoch": 0.6259978306704204, "grad_norm": 1.8341131210327148, "learning_rate": 1.968505452212957e-06, "loss": 0.7494, "step": 51365 }, { "epoch": 0.6260587668945682, "grad_norm": 1.8348323106765747, "learning_rate": 1.9681847338037204e-06, "loss": 0.7622, "step": 51370 }, { "epoch": 0.6261197031187159, "grad_norm": 1.8575774431228638, "learning_rate": 1.967864015394484e-06, "loss": 0.7806, "step": 51375 }, { "epoch": 0.6261806393428637, "grad_norm": 1.8134071826934814, "learning_rate": 1.967543296985247e-06, "loss": 0.86, "step": 51380 }, { "epoch": 0.6262415755670115, "grad_norm": 1.8038530349731445, "learning_rate": 1.9672225785760103e-06, "loss": 0.8774, "step": 51385 }, { "epoch": 0.6263025117911594, "grad_norm": 2.4073710441589355, "learning_rate": 1.9669018601667737e-06, "loss": 0.8408, "step": 51390 }, { "epoch": 0.6263634480153072, "grad_norm": 2.070056438446045, "learning_rate": 1.966581141757537e-06, "loss": 0.7993, "step": 51395 }, { "epoch": 0.626424384239455, "grad_norm": 1.9028234481811523, "learning_rate": 1.9662604233483006e-06, "loss": 0.9259, "step": 51400 }, { "epoch": 0.6264853204636028, "grad_norm": 1.8343291282653809, "learning_rate": 1.9659397049390636e-06, "loss": 0.8349, "step": 51405 }, { "epoch": 0.6265462566877505, "grad_norm": 2.195694923400879, "learning_rate": 1.965618986529827e-06, "loss": 0.8269, "step": 51410 }, { "epoch": 0.6266071929118984, "grad_norm": 1.7672836780548096, "learning_rate": 1.9652982681205905e-06, "loss": 0.8229, "step": 51415 }, { "epoch": 0.6266681291360462, "grad_norm": 2.695072889328003, "learning_rate": 1.9649775497113535e-06, "loss": 0.8126, "step": 51420 }, { "epoch": 0.626729065360194, "grad_norm": 2.1656715869903564, "learning_rate": 1.964656831302117e-06, "loss": 0.8259, "step": 51425 }, { "epoch": 0.6267900015843418, "grad_norm": 1.6600472927093506, "learning_rate": 1.9643361128928804e-06, "loss": 0.7932, "step": 51430 }, { "epoch": 0.6268509378084897, "grad_norm": 1.8596538305282593, "learning_rate": 1.9640153944836434e-06, "loss": 0.8657, "step": 51435 }, { "epoch": 0.6269118740326375, "grad_norm": 1.9293874502182007, "learning_rate": 1.963694676074407e-06, "loss": 0.7891, "step": 51440 }, { "epoch": 0.6269728102567852, "grad_norm": 2.012578010559082, "learning_rate": 1.96337395766517e-06, "loss": 0.7482, "step": 51445 }, { "epoch": 0.627033746480933, "grad_norm": 2.0574090480804443, "learning_rate": 1.9630532392559333e-06, "loss": 0.8054, "step": 51450 }, { "epoch": 0.6270946827050808, "grad_norm": 2.0917043685913086, "learning_rate": 1.9627325208466968e-06, "loss": 0.771, "step": 51455 }, { "epoch": 0.6271556189292287, "grad_norm": 2.2679381370544434, "learning_rate": 1.9624118024374598e-06, "loss": 0.8029, "step": 51460 }, { "epoch": 0.6272165551533765, "grad_norm": 2.158914566040039, "learning_rate": 1.9620910840282232e-06, "loss": 0.7409, "step": 51465 }, { "epoch": 0.6272774913775243, "grad_norm": 2.1139748096466064, "learning_rate": 1.9617703656189867e-06, "loss": 0.8369, "step": 51470 }, { "epoch": 0.6273384276016721, "grad_norm": 1.9415960311889648, "learning_rate": 1.96144964720975e-06, "loss": 0.8244, "step": 51475 }, { "epoch": 0.6273993638258198, "grad_norm": 2.435481548309326, "learning_rate": 1.9611289288005135e-06, "loss": 0.9282, "step": 51480 }, { "epoch": 0.6274603000499677, "grad_norm": 1.9708727598190308, "learning_rate": 1.9608082103912766e-06, "loss": 0.8477, "step": 51485 }, { "epoch": 0.6275212362741155, "grad_norm": 1.9766219854354858, "learning_rate": 1.96048749198204e-06, "loss": 0.9027, "step": 51490 }, { "epoch": 0.6275821724982633, "grad_norm": 1.9232641458511353, "learning_rate": 1.9601667735728034e-06, "loss": 0.8264, "step": 51495 }, { "epoch": 0.6276431087224111, "grad_norm": 1.8514041900634766, "learning_rate": 1.9598460551635665e-06, "loss": 0.8041, "step": 51500 }, { "epoch": 0.627704044946559, "grad_norm": 2.2785089015960693, "learning_rate": 1.95952533675433e-06, "loss": 0.8064, "step": 51505 }, { "epoch": 0.6277649811707068, "grad_norm": 2.3846490383148193, "learning_rate": 1.9592046183450933e-06, "loss": 0.8559, "step": 51510 }, { "epoch": 0.6278259173948545, "grad_norm": 1.621246576309204, "learning_rate": 1.9588838999358564e-06, "loss": 0.8134, "step": 51515 }, { "epoch": 0.6278868536190023, "grad_norm": 1.9482357501983643, "learning_rate": 1.95856318152662e-06, "loss": 0.8053, "step": 51520 }, { "epoch": 0.6279477898431501, "grad_norm": 1.878906488418579, "learning_rate": 1.958242463117383e-06, "loss": 0.7458, "step": 51525 }, { "epoch": 0.628008726067298, "grad_norm": 1.704663634300232, "learning_rate": 1.9579217447081463e-06, "loss": 0.7881, "step": 51530 }, { "epoch": 0.6280696622914458, "grad_norm": 2.242957353591919, "learning_rate": 1.9576010262989097e-06, "loss": 0.8917, "step": 51535 }, { "epoch": 0.6281305985155936, "grad_norm": 1.761959433555603, "learning_rate": 1.957280307889673e-06, "loss": 0.8165, "step": 51540 }, { "epoch": 0.6281915347397414, "grad_norm": 2.3593194484710693, "learning_rate": 1.956959589480436e-06, "loss": 0.8402, "step": 51545 }, { "epoch": 0.6282524709638891, "grad_norm": 1.9999136924743652, "learning_rate": 1.9566388710711996e-06, "loss": 0.8665, "step": 51550 }, { "epoch": 0.628313407188037, "grad_norm": 2.2279303073883057, "learning_rate": 1.956318152661963e-06, "loss": 0.7711, "step": 51555 }, { "epoch": 0.6283743434121848, "grad_norm": 2.2025516033172607, "learning_rate": 1.9559974342527265e-06, "loss": 0.8292, "step": 51560 }, { "epoch": 0.6284352796363326, "grad_norm": 2.4674720764160156, "learning_rate": 1.9556767158434895e-06, "loss": 0.8786, "step": 51565 }, { "epoch": 0.6284962158604804, "grad_norm": 1.7018777132034302, "learning_rate": 1.955355997434253e-06, "loss": 0.8342, "step": 51570 }, { "epoch": 0.6285571520846283, "grad_norm": 2.2398242950439453, "learning_rate": 1.9550352790250164e-06, "loss": 0.7932, "step": 51575 }, { "epoch": 0.6286180883087761, "grad_norm": 2.0761678218841553, "learning_rate": 1.9547145606157794e-06, "loss": 0.7916, "step": 51580 }, { "epoch": 0.6286790245329238, "grad_norm": 1.8999377489089966, "learning_rate": 1.954393842206543e-06, "loss": 0.8493, "step": 51585 }, { "epoch": 0.6287399607570716, "grad_norm": 1.897762417793274, "learning_rate": 1.9540731237973063e-06, "loss": 0.813, "step": 51590 }, { "epoch": 0.6288008969812194, "grad_norm": 2.0163733959198, "learning_rate": 1.9537524053880693e-06, "loss": 0.859, "step": 51595 }, { "epoch": 0.6288618332053673, "grad_norm": 1.8932347297668457, "learning_rate": 1.9534316869788327e-06, "loss": 0.7678, "step": 51600 }, { "epoch": 0.6289227694295151, "grad_norm": 2.1474833488464355, "learning_rate": 1.9531109685695957e-06, "loss": 0.8633, "step": 51605 }, { "epoch": 0.6289837056536629, "grad_norm": 2.7609689235687256, "learning_rate": 1.952790250160359e-06, "loss": 0.8438, "step": 51610 }, { "epoch": 0.6290446418778107, "grad_norm": 1.8930007219314575, "learning_rate": 1.9524695317511226e-06, "loss": 0.7897, "step": 51615 }, { "epoch": 0.6291055781019584, "grad_norm": 2.1211349964141846, "learning_rate": 1.952148813341886e-06, "loss": 0.8253, "step": 51620 }, { "epoch": 0.6291665143261063, "grad_norm": 1.9878290891647339, "learning_rate": 1.9518280949326495e-06, "loss": 0.8304, "step": 51625 }, { "epoch": 0.6292274505502541, "grad_norm": 2.57511305809021, "learning_rate": 1.9515073765234125e-06, "loss": 0.8014, "step": 51630 }, { "epoch": 0.6292883867744019, "grad_norm": 2.0632846355438232, "learning_rate": 1.951186658114176e-06, "loss": 0.7609, "step": 51635 }, { "epoch": 0.6293493229985497, "grad_norm": 2.7582743167877197, "learning_rate": 1.9508659397049394e-06, "loss": 0.8743, "step": 51640 }, { "epoch": 0.6294102592226976, "grad_norm": 2.6390724182128906, "learning_rate": 1.9505452212957024e-06, "loss": 0.8032, "step": 51645 }, { "epoch": 0.6294711954468454, "grad_norm": 2.1288578510284424, "learning_rate": 1.950224502886466e-06, "loss": 0.8881, "step": 51650 }, { "epoch": 0.6295321316709931, "grad_norm": 1.9931461811065674, "learning_rate": 1.9499037844772293e-06, "loss": 0.8912, "step": 51655 }, { "epoch": 0.6295930678951409, "grad_norm": 1.9626859426498413, "learning_rate": 1.9495830660679923e-06, "loss": 0.8142, "step": 51660 }, { "epoch": 0.6296540041192887, "grad_norm": 1.969521403312683, "learning_rate": 1.9492623476587558e-06, "loss": 0.8758, "step": 51665 }, { "epoch": 0.6297149403434366, "grad_norm": 1.8083685636520386, "learning_rate": 1.948941629249519e-06, "loss": 0.8415, "step": 51670 }, { "epoch": 0.6297758765675844, "grad_norm": 1.787729263305664, "learning_rate": 1.948620910840282e-06, "loss": 0.862, "step": 51675 }, { "epoch": 0.6298368127917322, "grad_norm": 1.86578369140625, "learning_rate": 1.9483001924310456e-06, "loss": 0.8109, "step": 51680 }, { "epoch": 0.62989774901588, "grad_norm": 1.974187970161438, "learning_rate": 1.947979474021809e-06, "loss": 0.8357, "step": 51685 }, { "epoch": 0.6299586852400277, "grad_norm": 1.8390454053878784, "learning_rate": 1.947658755612572e-06, "loss": 0.7837, "step": 51690 }, { "epoch": 0.6300196214641756, "grad_norm": 1.7854344844818115, "learning_rate": 1.9473380372033355e-06, "loss": 0.8549, "step": 51695 }, { "epoch": 0.6300805576883234, "grad_norm": 2.0622756481170654, "learning_rate": 1.947017318794099e-06, "loss": 0.8196, "step": 51700 }, { "epoch": 0.6301414939124712, "grad_norm": 1.744274377822876, "learning_rate": 1.9466966003848624e-06, "loss": 0.7914, "step": 51705 }, { "epoch": 0.630202430136619, "grad_norm": 1.9161269664764404, "learning_rate": 1.946375881975626e-06, "loss": 0.8412, "step": 51710 }, { "epoch": 0.6302633663607669, "grad_norm": 2.039233446121216, "learning_rate": 1.946055163566389e-06, "loss": 0.819, "step": 51715 }, { "epoch": 0.6303243025849147, "grad_norm": 1.793718934059143, "learning_rate": 1.9457344451571523e-06, "loss": 0.8029, "step": 51720 }, { "epoch": 0.6303852388090624, "grad_norm": 1.9907206296920776, "learning_rate": 1.9454137267479158e-06, "loss": 0.842, "step": 51725 }, { "epoch": 0.6304461750332102, "grad_norm": 1.9747718572616577, "learning_rate": 1.9450930083386788e-06, "loss": 0.8415, "step": 51730 }, { "epoch": 0.630507111257358, "grad_norm": 1.9199845790863037, "learning_rate": 1.9447722899294422e-06, "loss": 0.8157, "step": 51735 }, { "epoch": 0.6305680474815059, "grad_norm": 2.045891523361206, "learning_rate": 1.9444515715202052e-06, "loss": 0.7903, "step": 51740 }, { "epoch": 0.6306289837056537, "grad_norm": 2.1665849685668945, "learning_rate": 1.9441308531109687e-06, "loss": 0.7463, "step": 51745 }, { "epoch": 0.6306899199298015, "grad_norm": 1.9234226942062378, "learning_rate": 1.943810134701732e-06, "loss": 0.8905, "step": 51750 }, { "epoch": 0.6307508561539493, "grad_norm": 1.959944725036621, "learning_rate": 1.943489416292495e-06, "loss": 0.8676, "step": 51755 }, { "epoch": 0.630811792378097, "grad_norm": 1.8143519163131714, "learning_rate": 1.9431686978832586e-06, "loss": 0.8415, "step": 51760 }, { "epoch": 0.6308727286022449, "grad_norm": 1.9646930694580078, "learning_rate": 1.942847979474022e-06, "loss": 0.7501, "step": 51765 }, { "epoch": 0.6309336648263927, "grad_norm": 1.9660537242889404, "learning_rate": 1.942527261064785e-06, "loss": 0.8764, "step": 51770 }, { "epoch": 0.6309946010505405, "grad_norm": 2.121180772781372, "learning_rate": 1.9422065426555485e-06, "loss": 0.8085, "step": 51775 }, { "epoch": 0.6310555372746883, "grad_norm": 2.2466959953308105, "learning_rate": 1.941885824246312e-06, "loss": 0.8455, "step": 51780 }, { "epoch": 0.6311164734988362, "grad_norm": 2.957984685897827, "learning_rate": 1.9415651058370754e-06, "loss": 0.8354, "step": 51785 }, { "epoch": 0.631177409722984, "grad_norm": 2.1901214122772217, "learning_rate": 1.941244387427839e-06, "loss": 0.7764, "step": 51790 }, { "epoch": 0.6312383459471317, "grad_norm": 1.8700275421142578, "learning_rate": 1.940923669018602e-06, "loss": 0.7883, "step": 51795 }, { "epoch": 0.6312992821712795, "grad_norm": 3.149592876434326, "learning_rate": 1.9406029506093653e-06, "loss": 0.8687, "step": 51800 }, { "epoch": 0.6313602183954273, "grad_norm": 1.69227135181427, "learning_rate": 1.9402822322001287e-06, "loss": 0.8421, "step": 51805 }, { "epoch": 0.6314211546195752, "grad_norm": 2.002455711364746, "learning_rate": 1.9399615137908917e-06, "loss": 0.7795, "step": 51810 }, { "epoch": 0.631482090843723, "grad_norm": 1.7490322589874268, "learning_rate": 1.939640795381655e-06, "loss": 0.8703, "step": 51815 }, { "epoch": 0.6315430270678708, "grad_norm": 2.6779165267944336, "learning_rate": 1.939320076972418e-06, "loss": 0.8114, "step": 51820 }, { "epoch": 0.6316039632920186, "grad_norm": 1.9404160976409912, "learning_rate": 1.9389993585631816e-06, "loss": 0.803, "step": 51825 }, { "epoch": 0.6316648995161663, "grad_norm": 2.269080877304077, "learning_rate": 1.938678640153945e-06, "loss": 0.8907, "step": 51830 }, { "epoch": 0.6317258357403142, "grad_norm": 1.938149094581604, "learning_rate": 1.938357921744708e-06, "loss": 0.7642, "step": 51835 }, { "epoch": 0.631786771964462, "grad_norm": 1.8231130838394165, "learning_rate": 1.9380372033354715e-06, "loss": 0.8155, "step": 51840 }, { "epoch": 0.6318477081886098, "grad_norm": 2.0730738639831543, "learning_rate": 1.937716484926235e-06, "loss": 0.7992, "step": 51845 }, { "epoch": 0.6319086444127576, "grad_norm": 1.945493459701538, "learning_rate": 1.9373957665169984e-06, "loss": 0.8132, "step": 51850 }, { "epoch": 0.6319695806369054, "grad_norm": 1.9316710233688354, "learning_rate": 1.9370750481077614e-06, "loss": 0.8082, "step": 51855 }, { "epoch": 0.6320305168610533, "grad_norm": 2.000185966491699, "learning_rate": 1.936754329698525e-06, "loss": 0.8622, "step": 51860 }, { "epoch": 0.632091453085201, "grad_norm": 1.7548956871032715, "learning_rate": 1.9364336112892883e-06, "loss": 0.819, "step": 51865 }, { "epoch": 0.6321523893093488, "grad_norm": 2.023700714111328, "learning_rate": 1.9361128928800517e-06, "loss": 0.8123, "step": 51870 }, { "epoch": 0.6322133255334966, "grad_norm": 1.9551118612289429, "learning_rate": 1.9357921744708147e-06, "loss": 0.7636, "step": 51875 }, { "epoch": 0.6322742617576445, "grad_norm": 1.8222166299819946, "learning_rate": 1.935471456061578e-06, "loss": 0.7605, "step": 51880 }, { "epoch": 0.6323351979817923, "grad_norm": 1.9855161905288696, "learning_rate": 1.9351507376523416e-06, "loss": 0.784, "step": 51885 }, { "epoch": 0.6323961342059401, "grad_norm": 1.6175519227981567, "learning_rate": 1.9348300192431046e-06, "loss": 0.8092, "step": 51890 }, { "epoch": 0.6324570704300879, "grad_norm": 1.7553949356079102, "learning_rate": 1.934509300833868e-06, "loss": 0.7957, "step": 51895 }, { "epoch": 0.6325180066542356, "grad_norm": 1.8646661043167114, "learning_rate": 1.934188582424631e-06, "loss": 0.8293, "step": 51900 }, { "epoch": 0.6325789428783835, "grad_norm": 1.905704140663147, "learning_rate": 1.9338678640153945e-06, "loss": 0.7546, "step": 51905 }, { "epoch": 0.6326398791025313, "grad_norm": 2.1109044551849365, "learning_rate": 1.933547145606158e-06, "loss": 0.8361, "step": 51910 }, { "epoch": 0.6327008153266791, "grad_norm": 1.8659451007843018, "learning_rate": 1.933226427196921e-06, "loss": 0.8527, "step": 51915 }, { "epoch": 0.6327617515508269, "grad_norm": 2.03737211227417, "learning_rate": 1.9329057087876844e-06, "loss": 0.8191, "step": 51920 }, { "epoch": 0.6328226877749747, "grad_norm": 1.8270683288574219, "learning_rate": 1.932584990378448e-06, "loss": 0.8212, "step": 51925 }, { "epoch": 0.6328836239991226, "grad_norm": 2.1088309288024902, "learning_rate": 1.9322642719692113e-06, "loss": 0.7981, "step": 51930 }, { "epoch": 0.6329445602232703, "grad_norm": 2.1708970069885254, "learning_rate": 1.9319435535599747e-06, "loss": 0.8351, "step": 51935 }, { "epoch": 0.6330054964474181, "grad_norm": 1.8859953880310059, "learning_rate": 1.9316228351507378e-06, "loss": 0.7697, "step": 51940 }, { "epoch": 0.6330664326715659, "grad_norm": 2.1580541133880615, "learning_rate": 1.931302116741501e-06, "loss": 0.8058, "step": 51945 }, { "epoch": 0.6331273688957137, "grad_norm": 1.8375440835952759, "learning_rate": 1.9309813983322646e-06, "loss": 0.8011, "step": 51950 }, { "epoch": 0.6331883051198616, "grad_norm": 2.1735658645629883, "learning_rate": 1.9306606799230277e-06, "loss": 0.8314, "step": 51955 }, { "epoch": 0.6332492413440094, "grad_norm": 1.9016671180725098, "learning_rate": 1.930339961513791e-06, "loss": 0.8742, "step": 51960 }, { "epoch": 0.6333101775681572, "grad_norm": 1.8750450611114502, "learning_rate": 1.9300192431045545e-06, "loss": 0.8752, "step": 51965 }, { "epoch": 0.6333711137923049, "grad_norm": 1.7523521184921265, "learning_rate": 1.9296985246953176e-06, "loss": 0.8194, "step": 51970 }, { "epoch": 0.6334320500164528, "grad_norm": 1.842818260192871, "learning_rate": 1.929377806286081e-06, "loss": 0.8389, "step": 51975 }, { "epoch": 0.6334929862406006, "grad_norm": 2.132047414779663, "learning_rate": 1.9290570878768444e-06, "loss": 0.862, "step": 51980 }, { "epoch": 0.6335539224647484, "grad_norm": 2.4461605548858643, "learning_rate": 1.9287363694676075e-06, "loss": 0.8571, "step": 51985 }, { "epoch": 0.6336148586888962, "grad_norm": 1.8608654737472534, "learning_rate": 1.928415651058371e-06, "loss": 0.8148, "step": 51990 }, { "epoch": 0.633675794913044, "grad_norm": 2.2921626567840576, "learning_rate": 1.928094932649134e-06, "loss": 0.741, "step": 51995 }, { "epoch": 0.6337367311371919, "grad_norm": 2.216693162918091, "learning_rate": 1.9277742142398974e-06, "loss": 0.9056, "step": 52000 }, { "epoch": 0.6337976673613396, "grad_norm": 2.0075316429138184, "learning_rate": 1.927453495830661e-06, "loss": 0.8343, "step": 52005 }, { "epoch": 0.6338586035854874, "grad_norm": 2.1625847816467285, "learning_rate": 1.9271327774214242e-06, "loss": 0.8766, "step": 52010 }, { "epoch": 0.6339195398096352, "grad_norm": 2.314236879348755, "learning_rate": 1.9268120590121877e-06, "loss": 0.7917, "step": 52015 }, { "epoch": 0.633980476033783, "grad_norm": 1.7603734731674194, "learning_rate": 1.9264913406029507e-06, "loss": 0.8238, "step": 52020 }, { "epoch": 0.6340414122579309, "grad_norm": 1.8310490846633911, "learning_rate": 1.926170622193714e-06, "loss": 0.8171, "step": 52025 }, { "epoch": 0.6341023484820787, "grad_norm": 2.0270330905914307, "learning_rate": 1.9258499037844776e-06, "loss": 0.8441, "step": 52030 }, { "epoch": 0.6341632847062265, "grad_norm": 2.106200695037842, "learning_rate": 1.9255291853752406e-06, "loss": 0.8121, "step": 52035 }, { "epoch": 0.6342242209303742, "grad_norm": 1.8258360624313354, "learning_rate": 1.925208466966004e-06, "loss": 0.8213, "step": 52040 }, { "epoch": 0.634285157154522, "grad_norm": 2.2152373790740967, "learning_rate": 1.9248877485567675e-06, "loss": 0.7712, "step": 52045 }, { "epoch": 0.6343460933786699, "grad_norm": 1.9161955118179321, "learning_rate": 1.9245670301475305e-06, "loss": 0.7813, "step": 52050 }, { "epoch": 0.6344070296028177, "grad_norm": 2.0448033809661865, "learning_rate": 1.924246311738294e-06, "loss": 0.8046, "step": 52055 }, { "epoch": 0.6344679658269655, "grad_norm": 1.808012843132019, "learning_rate": 1.9239255933290574e-06, "loss": 0.8432, "step": 52060 }, { "epoch": 0.6345289020511133, "grad_norm": 1.8954046964645386, "learning_rate": 1.9236048749198204e-06, "loss": 0.9235, "step": 52065 }, { "epoch": 0.6345898382752612, "grad_norm": 1.9428597688674927, "learning_rate": 1.923284156510584e-06, "loss": 0.7987, "step": 52070 }, { "epoch": 0.6346507744994089, "grad_norm": 1.7880008220672607, "learning_rate": 1.9229634381013473e-06, "loss": 0.8676, "step": 52075 }, { "epoch": 0.6347117107235567, "grad_norm": 2.409144401550293, "learning_rate": 1.9226427196921103e-06, "loss": 0.7811, "step": 52080 }, { "epoch": 0.6347726469477045, "grad_norm": 2.2806270122528076, "learning_rate": 1.9223220012828737e-06, "loss": 0.7945, "step": 52085 }, { "epoch": 0.6348335831718523, "grad_norm": 1.8010051250457764, "learning_rate": 1.922001282873637e-06, "loss": 0.8568, "step": 52090 }, { "epoch": 0.6348945193960002, "grad_norm": 1.7725013494491577, "learning_rate": 1.9216805644644006e-06, "loss": 0.8419, "step": 52095 }, { "epoch": 0.634955455620148, "grad_norm": 1.8477692604064941, "learning_rate": 1.921359846055164e-06, "loss": 0.8054, "step": 52100 }, { "epoch": 0.6350163918442958, "grad_norm": 2.0174639225006104, "learning_rate": 1.921039127645927e-06, "loss": 0.8093, "step": 52105 }, { "epoch": 0.6350773280684435, "grad_norm": 1.943155288696289, "learning_rate": 1.9207184092366905e-06, "loss": 0.818, "step": 52110 }, { "epoch": 0.6351382642925913, "grad_norm": 2.1230711936950684, "learning_rate": 1.9203976908274535e-06, "loss": 0.7979, "step": 52115 }, { "epoch": 0.6351992005167392, "grad_norm": 1.841668725013733, "learning_rate": 1.920076972418217e-06, "loss": 0.8137, "step": 52120 }, { "epoch": 0.635260136740887, "grad_norm": 1.9769715070724487, "learning_rate": 1.9197562540089804e-06, "loss": 0.8725, "step": 52125 }, { "epoch": 0.6353210729650348, "grad_norm": 1.8444894552230835, "learning_rate": 1.9194355355997434e-06, "loss": 0.8162, "step": 52130 }, { "epoch": 0.6353820091891826, "grad_norm": 1.7754045724868774, "learning_rate": 1.919114817190507e-06, "loss": 0.8107, "step": 52135 }, { "epoch": 0.6354429454133305, "grad_norm": 2.6578376293182373, "learning_rate": 1.9187940987812703e-06, "loss": 0.8687, "step": 52140 }, { "epoch": 0.6355038816374782, "grad_norm": 2.0880086421966553, "learning_rate": 1.9184733803720333e-06, "loss": 0.8751, "step": 52145 }, { "epoch": 0.635564817861626, "grad_norm": 1.9968724250793457, "learning_rate": 1.9181526619627968e-06, "loss": 0.8921, "step": 52150 }, { "epoch": 0.6356257540857738, "grad_norm": 2.198209047317505, "learning_rate": 1.91783194355356e-06, "loss": 0.8812, "step": 52155 }, { "epoch": 0.6356866903099216, "grad_norm": 2.150251626968384, "learning_rate": 1.917511225144323e-06, "loss": 0.8168, "step": 52160 }, { "epoch": 0.6357476265340695, "grad_norm": 2.120176315307617, "learning_rate": 1.9171905067350867e-06, "loss": 0.8011, "step": 52165 }, { "epoch": 0.6358085627582173, "grad_norm": 2.0522758960723877, "learning_rate": 1.91686978832585e-06, "loss": 0.8497, "step": 52170 }, { "epoch": 0.6358694989823651, "grad_norm": 2.219174385070801, "learning_rate": 1.9165490699166135e-06, "loss": 0.7796, "step": 52175 }, { "epoch": 0.6359304352065128, "grad_norm": 2.3441882133483887, "learning_rate": 1.916228351507377e-06, "loss": 0.8295, "step": 52180 }, { "epoch": 0.6359913714306606, "grad_norm": 1.925672173500061, "learning_rate": 1.91590763309814e-06, "loss": 0.8073, "step": 52185 }, { "epoch": 0.6360523076548085, "grad_norm": 2.140519857406616, "learning_rate": 1.9155869146889034e-06, "loss": 0.852, "step": 52190 }, { "epoch": 0.6361132438789563, "grad_norm": 1.8825095891952515, "learning_rate": 1.9152661962796664e-06, "loss": 0.7528, "step": 52195 }, { "epoch": 0.6361741801031041, "grad_norm": 2.33764910697937, "learning_rate": 1.91494547787043e-06, "loss": 0.7548, "step": 52200 }, { "epoch": 0.6362351163272519, "grad_norm": 1.8315982818603516, "learning_rate": 1.9146247594611933e-06, "loss": 0.7937, "step": 52205 }, { "epoch": 0.6362960525513998, "grad_norm": 2.165132761001587, "learning_rate": 1.9143040410519563e-06, "loss": 0.772, "step": 52210 }, { "epoch": 0.6363569887755475, "grad_norm": 1.6744946241378784, "learning_rate": 1.9139833226427198e-06, "loss": 0.7389, "step": 52215 }, { "epoch": 0.6364179249996953, "grad_norm": 2.7064297199249268, "learning_rate": 1.9136626042334832e-06, "loss": 0.8419, "step": 52220 }, { "epoch": 0.6364788612238431, "grad_norm": 1.589089274406433, "learning_rate": 1.9133418858242462e-06, "loss": 0.796, "step": 52225 }, { "epoch": 0.6365397974479909, "grad_norm": 1.9256426095962524, "learning_rate": 1.9130211674150097e-06, "loss": 0.7626, "step": 52230 }, { "epoch": 0.6366007336721388, "grad_norm": 1.8712159395217896, "learning_rate": 1.912700449005773e-06, "loss": 0.8363, "step": 52235 }, { "epoch": 0.6366616698962866, "grad_norm": 1.589648962020874, "learning_rate": 1.9123797305965366e-06, "loss": 0.8858, "step": 52240 }, { "epoch": 0.6367226061204343, "grad_norm": 2.2324886322021484, "learning_rate": 1.9120590121872996e-06, "loss": 0.7971, "step": 52245 }, { "epoch": 0.6367835423445821, "grad_norm": 2.134554862976074, "learning_rate": 1.911738293778063e-06, "loss": 0.8095, "step": 52250 }, { "epoch": 0.6368444785687299, "grad_norm": 2.234832286834717, "learning_rate": 1.9114175753688265e-06, "loss": 0.8409, "step": 52255 }, { "epoch": 0.6369054147928778, "grad_norm": 1.9480301141738892, "learning_rate": 1.91109685695959e-06, "loss": 0.8563, "step": 52260 }, { "epoch": 0.6369663510170256, "grad_norm": 1.8593778610229492, "learning_rate": 1.910776138550353e-06, "loss": 0.7965, "step": 52265 }, { "epoch": 0.6370272872411734, "grad_norm": 1.9005118608474731, "learning_rate": 1.9104554201411164e-06, "loss": 0.8702, "step": 52270 }, { "epoch": 0.6370882234653212, "grad_norm": 2.189890146255493, "learning_rate": 1.91013470173188e-06, "loss": 0.7965, "step": 52275 }, { "epoch": 0.637149159689469, "grad_norm": 1.8313148021697998, "learning_rate": 1.909813983322643e-06, "loss": 0.8519, "step": 52280 }, { "epoch": 0.6372100959136168, "grad_norm": 2.1784026622772217, "learning_rate": 1.9094932649134063e-06, "loss": 0.8908, "step": 52285 }, { "epoch": 0.6372710321377646, "grad_norm": 2.1948869228363037, "learning_rate": 1.9091725465041693e-06, "loss": 0.8749, "step": 52290 }, { "epoch": 0.6373319683619124, "grad_norm": 2.063894748687744, "learning_rate": 1.9088518280949327e-06, "loss": 0.8568, "step": 52295 }, { "epoch": 0.6373929045860602, "grad_norm": 2.4699344635009766, "learning_rate": 1.908531109685696e-06, "loss": 0.7576, "step": 52300 }, { "epoch": 0.6374538408102081, "grad_norm": 1.6646260023117065, "learning_rate": 1.908210391276459e-06, "loss": 0.8015, "step": 52305 }, { "epoch": 0.6375147770343559, "grad_norm": 1.6793413162231445, "learning_rate": 1.9078896728672226e-06, "loss": 0.8584, "step": 52310 }, { "epoch": 0.6375757132585036, "grad_norm": 1.628039836883545, "learning_rate": 1.907568954457986e-06, "loss": 0.8177, "step": 52315 }, { "epoch": 0.6376366494826514, "grad_norm": 1.9929746389389038, "learning_rate": 1.9072482360487493e-06, "loss": 0.85, "step": 52320 }, { "epoch": 0.6376975857067992, "grad_norm": 1.974888801574707, "learning_rate": 1.9069275176395127e-06, "loss": 0.7854, "step": 52325 }, { "epoch": 0.6377585219309471, "grad_norm": 1.8889760971069336, "learning_rate": 1.906606799230276e-06, "loss": 0.8818, "step": 52330 }, { "epoch": 0.6378194581550949, "grad_norm": 1.8422659635543823, "learning_rate": 1.9062860808210392e-06, "loss": 0.8488, "step": 52335 }, { "epoch": 0.6378803943792427, "grad_norm": 2.140122890472412, "learning_rate": 1.9059653624118026e-06, "loss": 0.8419, "step": 52340 }, { "epoch": 0.6379413306033905, "grad_norm": 1.768558144569397, "learning_rate": 1.9056446440025658e-06, "loss": 0.7685, "step": 52345 }, { "epoch": 0.6380022668275382, "grad_norm": 1.9357727766036987, "learning_rate": 1.9053239255933293e-06, "loss": 0.76, "step": 52350 }, { "epoch": 0.6380632030516861, "grad_norm": 1.8720178604125977, "learning_rate": 1.9050032071840927e-06, "loss": 0.8621, "step": 52355 }, { "epoch": 0.6381241392758339, "grad_norm": 1.7667378187179565, "learning_rate": 1.9046824887748557e-06, "loss": 0.7622, "step": 52360 }, { "epoch": 0.6381850754999817, "grad_norm": 2.0081095695495605, "learning_rate": 1.9043617703656192e-06, "loss": 0.7904, "step": 52365 }, { "epoch": 0.6382460117241295, "grad_norm": 1.8733150959014893, "learning_rate": 1.9040410519563824e-06, "loss": 0.7949, "step": 52370 }, { "epoch": 0.6383069479482774, "grad_norm": 2.1986212730407715, "learning_rate": 1.9037203335471458e-06, "loss": 0.8612, "step": 52375 }, { "epoch": 0.6383678841724252, "grad_norm": 2.134786605834961, "learning_rate": 1.903399615137909e-06, "loss": 0.7789, "step": 52380 }, { "epoch": 0.6384288203965729, "grad_norm": 2.213595151901245, "learning_rate": 1.9030788967286723e-06, "loss": 0.784, "step": 52385 }, { "epoch": 0.6384897566207207, "grad_norm": 1.9609969854354858, "learning_rate": 1.9027581783194357e-06, "loss": 0.8347, "step": 52390 }, { "epoch": 0.6385506928448685, "grad_norm": 2.3479223251342773, "learning_rate": 1.9024374599101992e-06, "loss": 0.9036, "step": 52395 }, { "epoch": 0.6386116290690164, "grad_norm": 2.2839512825012207, "learning_rate": 1.9021167415009622e-06, "loss": 0.8669, "step": 52400 }, { "epoch": 0.6386725652931642, "grad_norm": 1.7346423864364624, "learning_rate": 1.9017960230917256e-06, "loss": 0.8245, "step": 52405 }, { "epoch": 0.638733501517312, "grad_norm": 2.0395805835723877, "learning_rate": 1.9014753046824889e-06, "loss": 0.8106, "step": 52410 }, { "epoch": 0.6387944377414598, "grad_norm": 1.9675222635269165, "learning_rate": 1.9011545862732523e-06, "loss": 0.8427, "step": 52415 }, { "epoch": 0.6388553739656075, "grad_norm": 1.8619948625564575, "learning_rate": 1.9008338678640155e-06, "loss": 0.7788, "step": 52420 }, { "epoch": 0.6389163101897554, "grad_norm": 1.8946605920791626, "learning_rate": 1.9005131494547788e-06, "loss": 0.8003, "step": 52425 }, { "epoch": 0.6389772464139032, "grad_norm": 1.912015438079834, "learning_rate": 1.9001924310455422e-06, "loss": 0.8119, "step": 52430 }, { "epoch": 0.639038182638051, "grad_norm": 2.037313938140869, "learning_rate": 1.8998717126363057e-06, "loss": 0.838, "step": 52435 }, { "epoch": 0.6390991188621988, "grad_norm": 2.3245487213134766, "learning_rate": 1.8995509942270687e-06, "loss": 0.8938, "step": 52440 }, { "epoch": 0.6391600550863467, "grad_norm": 1.7820411920547485, "learning_rate": 1.8992302758178321e-06, "loss": 0.8067, "step": 52445 }, { "epoch": 0.6392209913104945, "grad_norm": 1.552179217338562, "learning_rate": 1.8989095574085953e-06, "loss": 0.7001, "step": 52450 }, { "epoch": 0.6392819275346422, "grad_norm": 1.9526878595352173, "learning_rate": 1.8985888389993588e-06, "loss": 0.787, "step": 52455 }, { "epoch": 0.63934286375879, "grad_norm": 1.6856032609939575, "learning_rate": 1.898268120590122e-06, "loss": 0.8166, "step": 52460 }, { "epoch": 0.6394037999829378, "grad_norm": 1.925007700920105, "learning_rate": 1.8979474021808852e-06, "loss": 0.7647, "step": 52465 }, { "epoch": 0.6394647362070857, "grad_norm": 2.0935442447662354, "learning_rate": 1.8976266837716487e-06, "loss": 0.805, "step": 52470 }, { "epoch": 0.6395256724312335, "grad_norm": 2.024876117706299, "learning_rate": 1.8973059653624121e-06, "loss": 0.7838, "step": 52475 }, { "epoch": 0.6395866086553813, "grad_norm": 1.9612089395523071, "learning_rate": 1.8969852469531751e-06, "loss": 0.8704, "step": 52480 }, { "epoch": 0.6396475448795291, "grad_norm": 1.8065075874328613, "learning_rate": 1.8966645285439386e-06, "loss": 0.901, "step": 52485 }, { "epoch": 0.6397084811036768, "grad_norm": 1.9937533140182495, "learning_rate": 1.8963438101347018e-06, "loss": 0.8682, "step": 52490 }, { "epoch": 0.6397694173278247, "grad_norm": 1.856392741203308, "learning_rate": 1.8960230917254652e-06, "loss": 0.8003, "step": 52495 }, { "epoch": 0.6398303535519725, "grad_norm": 1.910695195198059, "learning_rate": 1.8957023733162287e-06, "loss": 0.8229, "step": 52500 }, { "epoch": 0.6398912897761203, "grad_norm": 1.752968192100525, "learning_rate": 1.8953816549069917e-06, "loss": 0.8574, "step": 52505 }, { "epoch": 0.6399522260002681, "grad_norm": 1.75929594039917, "learning_rate": 1.8950609364977551e-06, "loss": 0.8191, "step": 52510 }, { "epoch": 0.640013162224416, "grad_norm": 1.9494742155075073, "learning_rate": 1.8947402180885186e-06, "loss": 0.8175, "step": 52515 }, { "epoch": 0.6400740984485638, "grad_norm": 2.229734182357788, "learning_rate": 1.8944194996792816e-06, "loss": 0.8418, "step": 52520 }, { "epoch": 0.6401350346727115, "grad_norm": 2.519272804260254, "learning_rate": 1.894098781270045e-06, "loss": 0.82, "step": 52525 }, { "epoch": 0.6401959708968593, "grad_norm": 1.865488052368164, "learning_rate": 1.8937780628608083e-06, "loss": 0.7615, "step": 52530 }, { "epoch": 0.6402569071210071, "grad_norm": 1.9054242372512817, "learning_rate": 1.8934573444515717e-06, "loss": 0.8204, "step": 52535 }, { "epoch": 0.640317843345155, "grad_norm": 2.050851583480835, "learning_rate": 1.8931366260423351e-06, "loss": 0.8121, "step": 52540 }, { "epoch": 0.6403787795693028, "grad_norm": 2.101682662963867, "learning_rate": 1.8928159076330982e-06, "loss": 0.8289, "step": 52545 }, { "epoch": 0.6404397157934506, "grad_norm": 2.038849115371704, "learning_rate": 1.8924951892238616e-06, "loss": 0.7741, "step": 52550 }, { "epoch": 0.6405006520175984, "grad_norm": 1.93606436252594, "learning_rate": 1.892174470814625e-06, "loss": 0.7736, "step": 52555 }, { "epoch": 0.6405615882417461, "grad_norm": 2.239790916442871, "learning_rate": 1.891853752405388e-06, "loss": 0.8576, "step": 52560 }, { "epoch": 0.640622524465894, "grad_norm": 2.113394021987915, "learning_rate": 1.8915330339961515e-06, "loss": 0.7783, "step": 52565 }, { "epoch": 0.6406834606900418, "grad_norm": 1.801544189453125, "learning_rate": 1.891212315586915e-06, "loss": 0.8545, "step": 52570 }, { "epoch": 0.6407443969141896, "grad_norm": 2.1386685371398926, "learning_rate": 1.8908915971776782e-06, "loss": 0.8659, "step": 52575 }, { "epoch": 0.6408053331383374, "grad_norm": 2.538583993911743, "learning_rate": 1.8905708787684416e-06, "loss": 0.8694, "step": 52580 }, { "epoch": 0.6408662693624853, "grad_norm": 2.1810896396636963, "learning_rate": 1.8902501603592046e-06, "loss": 0.7739, "step": 52585 }, { "epoch": 0.6409272055866331, "grad_norm": 1.9107519388198853, "learning_rate": 1.889929441949968e-06, "loss": 0.8779, "step": 52590 }, { "epoch": 0.6409881418107808, "grad_norm": 2.112908124923706, "learning_rate": 1.8896087235407315e-06, "loss": 0.8715, "step": 52595 }, { "epoch": 0.6410490780349286, "grad_norm": 2.264026165008545, "learning_rate": 1.8892880051314947e-06, "loss": 0.8981, "step": 52600 }, { "epoch": 0.6411100142590764, "grad_norm": 1.8200753927230835, "learning_rate": 1.888967286722258e-06, "loss": 0.8239, "step": 52605 }, { "epoch": 0.6411709504832243, "grad_norm": 1.9103453159332275, "learning_rate": 1.8886465683130214e-06, "loss": 0.7498, "step": 52610 }, { "epoch": 0.6412318867073721, "grad_norm": 2.43515944480896, "learning_rate": 1.8883258499037846e-06, "loss": 0.8147, "step": 52615 }, { "epoch": 0.6412928229315199, "grad_norm": 2.114557981491089, "learning_rate": 1.888005131494548e-06, "loss": 0.7521, "step": 52620 }, { "epoch": 0.6413537591556677, "grad_norm": 1.9013996124267578, "learning_rate": 1.887684413085311e-06, "loss": 0.795, "step": 52625 }, { "epoch": 0.6414146953798154, "grad_norm": 2.1026151180267334, "learning_rate": 1.8873636946760745e-06, "loss": 0.8264, "step": 52630 }, { "epoch": 0.6414756316039633, "grad_norm": 2.0117173194885254, "learning_rate": 1.887042976266838e-06, "loss": 0.7802, "step": 52635 }, { "epoch": 0.6415365678281111, "grad_norm": 1.826084017753601, "learning_rate": 1.8867222578576012e-06, "loss": 0.8778, "step": 52640 }, { "epoch": 0.6415975040522589, "grad_norm": 1.8899015188217163, "learning_rate": 1.8864015394483644e-06, "loss": 0.7734, "step": 52645 }, { "epoch": 0.6416584402764067, "grad_norm": 2.2548038959503174, "learning_rate": 1.8860808210391279e-06, "loss": 0.8348, "step": 52650 }, { "epoch": 0.6417193765005546, "grad_norm": 2.2686171531677246, "learning_rate": 1.885760102629891e-06, "loss": 0.8311, "step": 52655 }, { "epoch": 0.6417803127247024, "grad_norm": 1.8658905029296875, "learning_rate": 1.8854393842206545e-06, "loss": 0.8039, "step": 52660 }, { "epoch": 0.6418412489488501, "grad_norm": 1.781641960144043, "learning_rate": 1.8851186658114176e-06, "loss": 0.8629, "step": 52665 }, { "epoch": 0.6419021851729979, "grad_norm": 2.0345711708068848, "learning_rate": 1.884797947402181e-06, "loss": 0.7486, "step": 52670 }, { "epoch": 0.6419631213971457, "grad_norm": 1.713358998298645, "learning_rate": 1.8844772289929444e-06, "loss": 0.7731, "step": 52675 }, { "epoch": 0.6420240576212936, "grad_norm": 2.320600986480713, "learning_rate": 1.8841565105837077e-06, "loss": 0.8144, "step": 52680 }, { "epoch": 0.6420849938454414, "grad_norm": 2.10410737991333, "learning_rate": 1.8838357921744709e-06, "loss": 0.8565, "step": 52685 }, { "epoch": 0.6421459300695892, "grad_norm": 2.1769447326660156, "learning_rate": 1.8835150737652343e-06, "loss": 0.7459, "step": 52690 }, { "epoch": 0.642206866293737, "grad_norm": 1.6843892335891724, "learning_rate": 1.8831943553559976e-06, "loss": 0.7499, "step": 52695 }, { "epoch": 0.6422678025178847, "grad_norm": 1.8291585445404053, "learning_rate": 1.882873636946761e-06, "loss": 0.8735, "step": 52700 }, { "epoch": 0.6423287387420326, "grad_norm": 1.8854271173477173, "learning_rate": 1.882552918537524e-06, "loss": 0.8402, "step": 52705 }, { "epoch": 0.6423896749661804, "grad_norm": 2.084439992904663, "learning_rate": 1.8822322001282875e-06, "loss": 0.8905, "step": 52710 }, { "epoch": 0.6424506111903282, "grad_norm": 1.655824899673462, "learning_rate": 1.881911481719051e-06, "loss": 0.8136, "step": 52715 }, { "epoch": 0.642511547414476, "grad_norm": 1.8033287525177002, "learning_rate": 1.8815907633098141e-06, "loss": 0.8672, "step": 52720 }, { "epoch": 0.6425724836386238, "grad_norm": 1.8297041654586792, "learning_rate": 1.8812700449005776e-06, "loss": 0.7899, "step": 52725 }, { "epoch": 0.6426334198627717, "grad_norm": 1.9983468055725098, "learning_rate": 1.8809493264913408e-06, "loss": 0.7849, "step": 52730 }, { "epoch": 0.6426943560869194, "grad_norm": 1.8101460933685303, "learning_rate": 1.880628608082104e-06, "loss": 0.78, "step": 52735 }, { "epoch": 0.6427552923110672, "grad_norm": 1.9049479961395264, "learning_rate": 1.8803078896728675e-06, "loss": 0.802, "step": 52740 }, { "epoch": 0.642816228535215, "grad_norm": 1.8291015625, "learning_rate": 1.8799871712636305e-06, "loss": 0.7729, "step": 52745 }, { "epoch": 0.6428771647593629, "grad_norm": 2.2796263694763184, "learning_rate": 1.879666452854394e-06, "loss": 0.7924, "step": 52750 }, { "epoch": 0.6429381009835107, "grad_norm": 1.8962839841842651, "learning_rate": 1.8793457344451574e-06, "loss": 0.833, "step": 52755 }, { "epoch": 0.6429990372076585, "grad_norm": 1.8594774007797241, "learning_rate": 1.8790250160359206e-06, "loss": 0.8419, "step": 52760 }, { "epoch": 0.6430599734318063, "grad_norm": 2.56046462059021, "learning_rate": 1.878704297626684e-06, "loss": 0.8109, "step": 52765 }, { "epoch": 0.643120909655954, "grad_norm": 2.1903562545776367, "learning_rate": 1.8783835792174473e-06, "loss": 0.7929, "step": 52770 }, { "epoch": 0.6431818458801019, "grad_norm": 2.057307004928589, "learning_rate": 1.8780628608082105e-06, "loss": 0.8448, "step": 52775 }, { "epoch": 0.6432427821042497, "grad_norm": 2.042721748352051, "learning_rate": 1.877742142398974e-06, "loss": 0.9257, "step": 52780 }, { "epoch": 0.6433037183283975, "grad_norm": 2.3089418411254883, "learning_rate": 1.877421423989737e-06, "loss": 0.8384, "step": 52785 }, { "epoch": 0.6433646545525453, "grad_norm": 2.038858652114868, "learning_rate": 1.8771007055805004e-06, "loss": 0.8155, "step": 52790 }, { "epoch": 0.6434255907766931, "grad_norm": 1.9627641439437866, "learning_rate": 1.8767799871712638e-06, "loss": 0.8609, "step": 52795 }, { "epoch": 0.643486527000841, "grad_norm": 2.058354377746582, "learning_rate": 1.876459268762027e-06, "loss": 0.8177, "step": 52800 }, { "epoch": 0.6435474632249887, "grad_norm": 2.2652180194854736, "learning_rate": 1.8761385503527905e-06, "loss": 0.7932, "step": 52805 }, { "epoch": 0.6436083994491365, "grad_norm": 2.1427061557769775, "learning_rate": 1.8758178319435537e-06, "loss": 0.8969, "step": 52810 }, { "epoch": 0.6436693356732843, "grad_norm": 2.1414666175842285, "learning_rate": 1.875497113534317e-06, "loss": 0.8379, "step": 52815 }, { "epoch": 0.6437302718974321, "grad_norm": 2.225193977355957, "learning_rate": 1.8751763951250804e-06, "loss": 0.8492, "step": 52820 }, { "epoch": 0.64379120812158, "grad_norm": 2.013958215713501, "learning_rate": 1.8748556767158434e-06, "loss": 0.8499, "step": 52825 }, { "epoch": 0.6438521443457278, "grad_norm": 2.164630651473999, "learning_rate": 1.8745349583066068e-06, "loss": 0.8462, "step": 52830 }, { "epoch": 0.6439130805698756, "grad_norm": 1.9836220741271973, "learning_rate": 1.8742142398973703e-06, "loss": 0.8097, "step": 52835 }, { "epoch": 0.6439740167940233, "grad_norm": 1.8970041275024414, "learning_rate": 1.8738935214881335e-06, "loss": 0.8523, "step": 52840 }, { "epoch": 0.6440349530181712, "grad_norm": 1.9435527324676514, "learning_rate": 1.873572803078897e-06, "loss": 0.7477, "step": 52845 }, { "epoch": 0.644095889242319, "grad_norm": 1.7435404062271118, "learning_rate": 1.8732520846696604e-06, "loss": 0.8399, "step": 52850 }, { "epoch": 0.6441568254664668, "grad_norm": 2.2839248180389404, "learning_rate": 1.8729313662604234e-06, "loss": 0.8559, "step": 52855 }, { "epoch": 0.6442177616906146, "grad_norm": 2.12998104095459, "learning_rate": 1.8726106478511869e-06, "loss": 0.799, "step": 52860 }, { "epoch": 0.6442786979147624, "grad_norm": 2.1179027557373047, "learning_rate": 1.87228992944195e-06, "loss": 0.8508, "step": 52865 }, { "epoch": 0.6443396341389103, "grad_norm": 1.722551941871643, "learning_rate": 1.8719692110327133e-06, "loss": 0.8213, "step": 52870 }, { "epoch": 0.644400570363058, "grad_norm": 2.0647168159484863, "learning_rate": 1.8716484926234767e-06, "loss": 0.8476, "step": 52875 }, { "epoch": 0.6444615065872058, "grad_norm": 1.867714762687683, "learning_rate": 1.87132777421424e-06, "loss": 0.8469, "step": 52880 }, { "epoch": 0.6445224428113536, "grad_norm": 2.7199904918670654, "learning_rate": 1.8710070558050034e-06, "loss": 0.8105, "step": 52885 }, { "epoch": 0.6445833790355014, "grad_norm": 1.7661020755767822, "learning_rate": 1.8706863373957669e-06, "loss": 0.8068, "step": 52890 }, { "epoch": 0.6446443152596493, "grad_norm": 1.950175166130066, "learning_rate": 1.8703656189865299e-06, "loss": 0.8026, "step": 52895 }, { "epoch": 0.6447052514837971, "grad_norm": 1.8243982791900635, "learning_rate": 1.8700449005772933e-06, "loss": 0.8163, "step": 52900 }, { "epoch": 0.6447661877079449, "grad_norm": 1.8035413026809692, "learning_rate": 1.8697241821680568e-06, "loss": 0.824, "step": 52905 }, { "epoch": 0.6448271239320926, "grad_norm": 2.067601442337036, "learning_rate": 1.8694034637588198e-06, "loss": 0.911, "step": 52910 }, { "epoch": 0.6448880601562405, "grad_norm": 1.9007185697555542, "learning_rate": 1.8690827453495832e-06, "loss": 0.8834, "step": 52915 }, { "epoch": 0.6449489963803883, "grad_norm": 2.0511224269866943, "learning_rate": 1.8687620269403464e-06, "loss": 0.7888, "step": 52920 }, { "epoch": 0.6450099326045361, "grad_norm": 2.02065110206604, "learning_rate": 1.8684413085311099e-06, "loss": 0.8776, "step": 52925 }, { "epoch": 0.6450708688286839, "grad_norm": 1.5977221727371216, "learning_rate": 1.8681205901218733e-06, "loss": 0.8566, "step": 52930 }, { "epoch": 0.6451318050528317, "grad_norm": 2.249094247817993, "learning_rate": 1.8677998717126363e-06, "loss": 0.7535, "step": 52935 }, { "epoch": 0.6451927412769796, "grad_norm": 1.9048177003860474, "learning_rate": 1.8674791533033998e-06, "loss": 0.7705, "step": 52940 }, { "epoch": 0.6452536775011273, "grad_norm": 1.87933349609375, "learning_rate": 1.8671584348941632e-06, "loss": 0.8786, "step": 52945 }, { "epoch": 0.6453146137252751, "grad_norm": 1.9327442646026611, "learning_rate": 1.8668377164849264e-06, "loss": 0.7987, "step": 52950 }, { "epoch": 0.6453755499494229, "grad_norm": 2.091710090637207, "learning_rate": 1.8665169980756897e-06, "loss": 0.8217, "step": 52955 }, { "epoch": 0.6454364861735707, "grad_norm": 2.0708086490631104, "learning_rate": 1.866196279666453e-06, "loss": 0.7601, "step": 52960 }, { "epoch": 0.6454974223977186, "grad_norm": 1.6939573287963867, "learning_rate": 1.8658755612572163e-06, "loss": 0.8121, "step": 52965 }, { "epoch": 0.6455583586218664, "grad_norm": 1.7490489482879639, "learning_rate": 1.8655548428479798e-06, "loss": 0.8279, "step": 52970 }, { "epoch": 0.6456192948460142, "grad_norm": 2.0769786834716797, "learning_rate": 1.8652341244387428e-06, "loss": 0.8057, "step": 52975 }, { "epoch": 0.6456802310701619, "grad_norm": 1.915936827659607, "learning_rate": 1.8649134060295062e-06, "loss": 0.9275, "step": 52980 }, { "epoch": 0.6457411672943097, "grad_norm": 2.5637829303741455, "learning_rate": 1.8645926876202697e-06, "loss": 0.8299, "step": 52985 }, { "epoch": 0.6458021035184576, "grad_norm": 2.042170763015747, "learning_rate": 1.864271969211033e-06, "loss": 0.9142, "step": 52990 }, { "epoch": 0.6458630397426054, "grad_norm": 2.252763271331787, "learning_rate": 1.8639512508017961e-06, "loss": 0.8582, "step": 52995 }, { "epoch": 0.6459239759667532, "grad_norm": 1.9673575162887573, "learning_rate": 1.8636305323925594e-06, "loss": 0.8217, "step": 53000 }, { "epoch": 0.645984912190901, "grad_norm": 1.9538418054580688, "learning_rate": 1.8633098139833228e-06, "loss": 0.9058, "step": 53005 }, { "epoch": 0.6460458484150489, "grad_norm": 1.7198091745376587, "learning_rate": 1.8629890955740862e-06, "loss": 0.7997, "step": 53010 }, { "epoch": 0.6461067846391966, "grad_norm": 1.771996259689331, "learning_rate": 1.8626683771648493e-06, "loss": 0.832, "step": 53015 }, { "epoch": 0.6461677208633444, "grad_norm": 1.9002718925476074, "learning_rate": 1.8623476587556127e-06, "loss": 0.8432, "step": 53020 }, { "epoch": 0.6462286570874922, "grad_norm": 2.010258674621582, "learning_rate": 1.8620269403463761e-06, "loss": 0.8558, "step": 53025 }, { "epoch": 0.64628959331164, "grad_norm": 2.337629556655884, "learning_rate": 1.8617062219371394e-06, "loss": 0.8295, "step": 53030 }, { "epoch": 0.6463505295357879, "grad_norm": 1.9825880527496338, "learning_rate": 1.8613855035279026e-06, "loss": 0.8793, "step": 53035 }, { "epoch": 0.6464114657599357, "grad_norm": 2.143746852874756, "learning_rate": 1.8610647851186658e-06, "loss": 0.8079, "step": 53040 }, { "epoch": 0.6464724019840835, "grad_norm": 1.7757681608200073, "learning_rate": 1.8607440667094293e-06, "loss": 0.8114, "step": 53045 }, { "epoch": 0.6465333382082312, "grad_norm": 2.4764134883880615, "learning_rate": 1.8604233483001927e-06, "loss": 0.8333, "step": 53050 }, { "epoch": 0.646594274432379, "grad_norm": 1.9991986751556396, "learning_rate": 1.8601026298909557e-06, "loss": 0.8601, "step": 53055 }, { "epoch": 0.6466552106565269, "grad_norm": 1.7746533155441284, "learning_rate": 1.8597819114817192e-06, "loss": 0.8343, "step": 53060 }, { "epoch": 0.6467161468806747, "grad_norm": 2.209993362426758, "learning_rate": 1.8594611930724826e-06, "loss": 0.8306, "step": 53065 }, { "epoch": 0.6467770831048225, "grad_norm": 1.9388549327850342, "learning_rate": 1.8591404746632458e-06, "loss": 0.8751, "step": 53070 }, { "epoch": 0.6468380193289703, "grad_norm": 1.9567315578460693, "learning_rate": 1.8588197562540093e-06, "loss": 0.7737, "step": 53075 }, { "epoch": 0.6468989555531182, "grad_norm": 2.1538491249084473, "learning_rate": 1.8584990378447723e-06, "loss": 0.8034, "step": 53080 }, { "epoch": 0.6469598917772659, "grad_norm": 1.8721027374267578, "learning_rate": 1.8581783194355357e-06, "loss": 0.8691, "step": 53085 }, { "epoch": 0.6470208280014137, "grad_norm": 1.751322865486145, "learning_rate": 1.8578576010262992e-06, "loss": 0.829, "step": 53090 }, { "epoch": 0.6470817642255615, "grad_norm": 2.0097241401672363, "learning_rate": 1.8575368826170622e-06, "loss": 0.7754, "step": 53095 }, { "epoch": 0.6471427004497093, "grad_norm": 1.7910693883895874, "learning_rate": 1.8572161642078256e-06, "loss": 0.8613, "step": 53100 }, { "epoch": 0.6472036366738572, "grad_norm": 1.9136587381362915, "learning_rate": 1.856895445798589e-06, "loss": 0.868, "step": 53105 }, { "epoch": 0.647264572898005, "grad_norm": 2.240894079208374, "learning_rate": 1.8565747273893523e-06, "loss": 0.7626, "step": 53110 }, { "epoch": 0.6473255091221528, "grad_norm": 2.189464569091797, "learning_rate": 1.8562540089801157e-06, "loss": 0.7881, "step": 53115 }, { "epoch": 0.6473864453463005, "grad_norm": 2.009629249572754, "learning_rate": 1.8559332905708788e-06, "loss": 0.732, "step": 53120 }, { "epoch": 0.6474473815704483, "grad_norm": 1.838888168334961, "learning_rate": 1.8556125721616422e-06, "loss": 0.7952, "step": 53125 }, { "epoch": 0.6475083177945962, "grad_norm": 1.841558814048767, "learning_rate": 1.8552918537524056e-06, "loss": 0.8921, "step": 53130 }, { "epoch": 0.647569254018744, "grad_norm": 2.159163236618042, "learning_rate": 1.8549711353431687e-06, "loss": 0.7892, "step": 53135 }, { "epoch": 0.6476301902428918, "grad_norm": 1.8071677684783936, "learning_rate": 1.854650416933932e-06, "loss": 0.8582, "step": 53140 }, { "epoch": 0.6476911264670396, "grad_norm": 2.0028109550476074, "learning_rate": 1.8543296985246955e-06, "loss": 0.822, "step": 53145 }, { "epoch": 0.6477520626911875, "grad_norm": 2.010854959487915, "learning_rate": 1.8540089801154588e-06, "loss": 0.8027, "step": 53150 }, { "epoch": 0.6478129989153352, "grad_norm": 1.8577028512954712, "learning_rate": 1.8536882617062222e-06, "loss": 0.8444, "step": 53155 }, { "epoch": 0.647873935139483, "grad_norm": 1.6491265296936035, "learning_rate": 1.8533675432969852e-06, "loss": 0.8107, "step": 53160 }, { "epoch": 0.6479348713636308, "grad_norm": 1.886680245399475, "learning_rate": 1.8530468248877487e-06, "loss": 0.7948, "step": 53165 }, { "epoch": 0.6479958075877786, "grad_norm": 2.218299627304077, "learning_rate": 1.852726106478512e-06, "loss": 0.8658, "step": 53170 }, { "epoch": 0.6480567438119265, "grad_norm": 1.8340610265731812, "learning_rate": 1.8524053880692751e-06, "loss": 0.9122, "step": 53175 }, { "epoch": 0.6481176800360743, "grad_norm": 1.8980774879455566, "learning_rate": 1.8520846696600386e-06, "loss": 0.8437, "step": 53180 }, { "epoch": 0.6481786162602221, "grad_norm": 1.8620163202285767, "learning_rate": 1.851763951250802e-06, "loss": 0.8971, "step": 53185 }, { "epoch": 0.6482395524843698, "grad_norm": 2.1844050884246826, "learning_rate": 1.8514432328415652e-06, "loss": 0.8358, "step": 53190 }, { "epoch": 0.6483004887085176, "grad_norm": 1.8364677429199219, "learning_rate": 1.8511225144323287e-06, "loss": 0.8597, "step": 53195 }, { "epoch": 0.6483614249326655, "grad_norm": 1.8151583671569824, "learning_rate": 1.8508017960230921e-06, "loss": 0.7612, "step": 53200 }, { "epoch": 0.6484223611568133, "grad_norm": 1.9312952756881714, "learning_rate": 1.8504810776138551e-06, "loss": 0.782, "step": 53205 }, { "epoch": 0.6484832973809611, "grad_norm": 1.7884926795959473, "learning_rate": 1.8501603592046186e-06, "loss": 0.7911, "step": 53210 }, { "epoch": 0.6485442336051089, "grad_norm": 2.120187282562256, "learning_rate": 1.8498396407953818e-06, "loss": 0.7921, "step": 53215 }, { "epoch": 0.6486051698292566, "grad_norm": 2.0922420024871826, "learning_rate": 1.849518922386145e-06, "loss": 0.9054, "step": 53220 }, { "epoch": 0.6486661060534045, "grad_norm": 2.0761568546295166, "learning_rate": 1.8491982039769085e-06, "loss": 0.8309, "step": 53225 }, { "epoch": 0.6487270422775523, "grad_norm": 1.8717783689498901, "learning_rate": 1.8488774855676717e-06, "loss": 0.8425, "step": 53230 }, { "epoch": 0.6487879785017001, "grad_norm": 1.8598887920379639, "learning_rate": 1.8485567671584351e-06, "loss": 0.8164, "step": 53235 }, { "epoch": 0.6488489147258479, "grad_norm": 1.7820764780044556, "learning_rate": 1.8482360487491986e-06, "loss": 0.7764, "step": 53240 }, { "epoch": 0.6489098509499958, "grad_norm": 1.7624093294143677, "learning_rate": 1.8479153303399616e-06, "loss": 0.8087, "step": 53245 }, { "epoch": 0.6489707871741436, "grad_norm": 1.8514561653137207, "learning_rate": 1.847594611930725e-06, "loss": 0.8072, "step": 53250 }, { "epoch": 0.6490317233982913, "grad_norm": 2.101172924041748, "learning_rate": 1.8472738935214883e-06, "loss": 0.7775, "step": 53255 }, { "epoch": 0.6490926596224391, "grad_norm": 1.9648125171661377, "learning_rate": 1.8469531751122515e-06, "loss": 0.8478, "step": 53260 }, { "epoch": 0.6491535958465869, "grad_norm": 1.9102725982666016, "learning_rate": 1.846632456703015e-06, "loss": 0.8223, "step": 53265 }, { "epoch": 0.6492145320707348, "grad_norm": 2.4695065021514893, "learning_rate": 1.8463117382937782e-06, "loss": 0.8533, "step": 53270 }, { "epoch": 0.6492754682948826, "grad_norm": 1.8020724058151245, "learning_rate": 1.8459910198845416e-06, "loss": 0.7958, "step": 53275 }, { "epoch": 0.6493364045190304, "grad_norm": 1.959076166152954, "learning_rate": 1.845670301475305e-06, "loss": 0.8337, "step": 53280 }, { "epoch": 0.6493973407431782, "grad_norm": 1.6102571487426758, "learning_rate": 1.845349583066068e-06, "loss": 0.8316, "step": 53285 }, { "epoch": 0.6494582769673259, "grad_norm": 1.6739317178726196, "learning_rate": 1.8450288646568315e-06, "loss": 0.8285, "step": 53290 }, { "epoch": 0.6495192131914738, "grad_norm": 1.9987715482711792, "learning_rate": 1.8447081462475947e-06, "loss": 0.8065, "step": 53295 }, { "epoch": 0.6495801494156216, "grad_norm": 1.903872013092041, "learning_rate": 1.8443874278383582e-06, "loss": 0.7938, "step": 53300 }, { "epoch": 0.6496410856397694, "grad_norm": 1.7285493612289429, "learning_rate": 1.8440667094291214e-06, "loss": 0.7895, "step": 53305 }, { "epoch": 0.6497020218639172, "grad_norm": 2.2653887271881104, "learning_rate": 1.8437459910198846e-06, "loss": 0.8098, "step": 53310 }, { "epoch": 0.6497629580880651, "grad_norm": 2.0875446796417236, "learning_rate": 1.843425272610648e-06, "loss": 0.8216, "step": 53315 }, { "epoch": 0.6498238943122129, "grad_norm": 2.0099804401397705, "learning_rate": 1.8431045542014115e-06, "loss": 0.7556, "step": 53320 }, { "epoch": 0.6498848305363606, "grad_norm": 2.440723419189453, "learning_rate": 1.8427838357921745e-06, "loss": 0.8283, "step": 53325 }, { "epoch": 0.6499457667605084, "grad_norm": 2.3222122192382812, "learning_rate": 1.842463117382938e-06, "loss": 0.8253, "step": 53330 }, { "epoch": 0.6500067029846562, "grad_norm": 1.8121695518493652, "learning_rate": 1.8421423989737012e-06, "loss": 0.7945, "step": 53335 }, { "epoch": 0.6500676392088041, "grad_norm": 1.8040376901626587, "learning_rate": 1.8418216805644646e-06, "loss": 0.8871, "step": 53340 }, { "epoch": 0.6501285754329519, "grad_norm": 1.7266788482666016, "learning_rate": 1.8415009621552279e-06, "loss": 0.8216, "step": 53345 }, { "epoch": 0.6501895116570997, "grad_norm": 2.4763436317443848, "learning_rate": 1.841180243745991e-06, "loss": 0.9095, "step": 53350 }, { "epoch": 0.6502504478812475, "grad_norm": 2.0372390747070312, "learning_rate": 1.8408595253367545e-06, "loss": 0.8235, "step": 53355 }, { "epoch": 0.6503113841053952, "grad_norm": 2.1568603515625, "learning_rate": 1.840538806927518e-06, "loss": 0.8557, "step": 53360 }, { "epoch": 0.6503723203295431, "grad_norm": 1.95268714427948, "learning_rate": 1.840218088518281e-06, "loss": 0.8573, "step": 53365 }, { "epoch": 0.6504332565536909, "grad_norm": 1.9806469678878784, "learning_rate": 1.8398973701090444e-06, "loss": 0.8274, "step": 53370 }, { "epoch": 0.6504941927778387, "grad_norm": 1.9000234603881836, "learning_rate": 1.8395766516998076e-06, "loss": 0.7483, "step": 53375 }, { "epoch": 0.6505551290019865, "grad_norm": 2.0755615234375, "learning_rate": 1.839255933290571e-06, "loss": 0.8734, "step": 53380 }, { "epoch": 0.6506160652261344, "grad_norm": 1.993414044380188, "learning_rate": 1.8389352148813343e-06, "loss": 0.8454, "step": 53385 }, { "epoch": 0.6506770014502822, "grad_norm": 1.7888575792312622, "learning_rate": 1.8386144964720975e-06, "loss": 0.8646, "step": 53390 }, { "epoch": 0.6507379376744299, "grad_norm": 2.1017847061157227, "learning_rate": 1.838293778062861e-06, "loss": 0.8227, "step": 53395 }, { "epoch": 0.6507988738985777, "grad_norm": 2.199615955352783, "learning_rate": 1.8379730596536244e-06, "loss": 0.7896, "step": 53400 }, { "epoch": 0.6508598101227255, "grad_norm": 1.9888215065002441, "learning_rate": 1.8376523412443874e-06, "loss": 0.8293, "step": 53405 }, { "epoch": 0.6509207463468734, "grad_norm": 1.7780803442001343, "learning_rate": 1.8373316228351509e-06, "loss": 0.8643, "step": 53410 }, { "epoch": 0.6509816825710212, "grad_norm": 2.4726929664611816, "learning_rate": 1.8370109044259141e-06, "loss": 0.8055, "step": 53415 }, { "epoch": 0.651042618795169, "grad_norm": 2.4509432315826416, "learning_rate": 1.8366901860166776e-06, "loss": 0.8059, "step": 53420 }, { "epoch": 0.6511035550193168, "grad_norm": 1.8743879795074463, "learning_rate": 1.836369467607441e-06, "loss": 0.8579, "step": 53425 }, { "epoch": 0.6511644912434645, "grad_norm": 1.7968956232070923, "learning_rate": 1.836048749198204e-06, "loss": 0.723, "step": 53430 }, { "epoch": 0.6512254274676124, "grad_norm": 2.318885326385498, "learning_rate": 1.8357280307889675e-06, "loss": 0.7621, "step": 53435 }, { "epoch": 0.6512863636917602, "grad_norm": 1.8677347898483276, "learning_rate": 1.8354073123797309e-06, "loss": 0.8798, "step": 53440 }, { "epoch": 0.651347299915908, "grad_norm": 1.9857679605484009, "learning_rate": 1.835086593970494e-06, "loss": 0.829, "step": 53445 }, { "epoch": 0.6514082361400558, "grad_norm": 1.8521580696105957, "learning_rate": 1.8347658755612573e-06, "loss": 0.7649, "step": 53450 }, { "epoch": 0.6514691723642037, "grad_norm": 1.8745923042297363, "learning_rate": 1.8344451571520206e-06, "loss": 0.7489, "step": 53455 }, { "epoch": 0.6515301085883515, "grad_norm": 1.8179941177368164, "learning_rate": 1.834124438742784e-06, "loss": 0.8266, "step": 53460 }, { "epoch": 0.6515910448124992, "grad_norm": 2.5116333961486816, "learning_rate": 1.8338037203335475e-06, "loss": 0.7933, "step": 53465 }, { "epoch": 0.651651981036647, "grad_norm": 1.7392791509628296, "learning_rate": 1.8334830019243105e-06, "loss": 0.8652, "step": 53470 }, { "epoch": 0.6517129172607948, "grad_norm": 1.8731935024261475, "learning_rate": 1.833162283515074e-06, "loss": 0.8573, "step": 53475 }, { "epoch": 0.6517738534849427, "grad_norm": 1.9292551279067993, "learning_rate": 1.8328415651058374e-06, "loss": 0.8092, "step": 53480 }, { "epoch": 0.6518347897090905, "grad_norm": 1.9995664358139038, "learning_rate": 1.8325208466966004e-06, "loss": 0.924, "step": 53485 }, { "epoch": 0.6518957259332383, "grad_norm": 1.869480848312378, "learning_rate": 1.8322001282873638e-06, "loss": 0.824, "step": 53490 }, { "epoch": 0.6519566621573861, "grad_norm": 1.755042552947998, "learning_rate": 1.8318794098781273e-06, "loss": 0.7607, "step": 53495 }, { "epoch": 0.6520175983815338, "grad_norm": 1.9636385440826416, "learning_rate": 1.8315586914688905e-06, "loss": 0.8519, "step": 53500 }, { "epoch": 0.6520785346056817, "grad_norm": 1.9424468278884888, "learning_rate": 1.831237973059654e-06, "loss": 0.8274, "step": 53505 }, { "epoch": 0.6521394708298295, "grad_norm": 2.378312826156616, "learning_rate": 1.830917254650417e-06, "loss": 0.8163, "step": 53510 }, { "epoch": 0.6522004070539773, "grad_norm": 2.084336042404175, "learning_rate": 1.8305965362411804e-06, "loss": 0.8646, "step": 53515 }, { "epoch": 0.6522613432781251, "grad_norm": 1.869396448135376, "learning_rate": 1.8302758178319438e-06, "loss": 0.8449, "step": 53520 }, { "epoch": 0.652322279502273, "grad_norm": 1.984100341796875, "learning_rate": 1.8299550994227068e-06, "loss": 0.8304, "step": 53525 }, { "epoch": 0.6523832157264208, "grad_norm": 2.3262264728546143, "learning_rate": 1.8296343810134703e-06, "loss": 0.8579, "step": 53530 }, { "epoch": 0.6524441519505685, "grad_norm": 1.983794927597046, "learning_rate": 1.8293136626042337e-06, "loss": 0.8445, "step": 53535 }, { "epoch": 0.6525050881747163, "grad_norm": 2.08449125289917, "learning_rate": 1.828992944194997e-06, "loss": 0.8334, "step": 53540 }, { "epoch": 0.6525660243988641, "grad_norm": 2.098538875579834, "learning_rate": 1.8286722257857604e-06, "loss": 0.794, "step": 53545 }, { "epoch": 0.652626960623012, "grad_norm": 2.2118215560913086, "learning_rate": 1.8283515073765234e-06, "loss": 0.8645, "step": 53550 }, { "epoch": 0.6526878968471598, "grad_norm": 1.8166913986206055, "learning_rate": 1.8280307889672868e-06, "loss": 0.8703, "step": 53555 }, { "epoch": 0.6527488330713076, "grad_norm": 2.024258852005005, "learning_rate": 1.8277100705580503e-06, "loss": 0.882, "step": 53560 }, { "epoch": 0.6528097692954554, "grad_norm": 2.2892673015594482, "learning_rate": 1.8273893521488135e-06, "loss": 0.8055, "step": 53565 }, { "epoch": 0.6528707055196031, "grad_norm": 1.8590747117996216, "learning_rate": 1.8270686337395767e-06, "loss": 0.8768, "step": 53570 }, { "epoch": 0.652931641743751, "grad_norm": 2.0502700805664062, "learning_rate": 1.8267479153303402e-06, "loss": 0.7675, "step": 53575 }, { "epoch": 0.6529925779678988, "grad_norm": 2.0893490314483643, "learning_rate": 1.8264271969211034e-06, "loss": 0.8079, "step": 53580 }, { "epoch": 0.6530535141920466, "grad_norm": 2.0621840953826904, "learning_rate": 1.8261064785118668e-06, "loss": 0.8225, "step": 53585 }, { "epoch": 0.6531144504161944, "grad_norm": 2.0496952533721924, "learning_rate": 1.8257857601026299e-06, "loss": 0.859, "step": 53590 }, { "epoch": 0.6531753866403422, "grad_norm": 1.7401041984558105, "learning_rate": 1.8254650416933933e-06, "loss": 0.8855, "step": 53595 }, { "epoch": 0.6532363228644901, "grad_norm": 2.1463818550109863, "learning_rate": 1.8251443232841567e-06, "loss": 0.8741, "step": 53600 }, { "epoch": 0.6532972590886378, "grad_norm": 2.331714153289795, "learning_rate": 1.82482360487492e-06, "loss": 0.8923, "step": 53605 }, { "epoch": 0.6533581953127856, "grad_norm": 2.0130410194396973, "learning_rate": 1.8245028864656832e-06, "loss": 0.8736, "step": 53610 }, { "epoch": 0.6534191315369334, "grad_norm": 1.9984123706817627, "learning_rate": 1.8241821680564466e-06, "loss": 0.7692, "step": 53615 }, { "epoch": 0.6534800677610813, "grad_norm": 1.8364298343658447, "learning_rate": 1.8238614496472099e-06, "loss": 0.8355, "step": 53620 }, { "epoch": 0.6535410039852291, "grad_norm": 1.7889025211334229, "learning_rate": 1.8235407312379733e-06, "loss": 0.7367, "step": 53625 }, { "epoch": 0.6536019402093769, "grad_norm": 1.945455551147461, "learning_rate": 1.8232200128287363e-06, "loss": 0.7871, "step": 53630 }, { "epoch": 0.6536628764335247, "grad_norm": 1.8062427043914795, "learning_rate": 1.8228992944194998e-06, "loss": 0.8214, "step": 53635 }, { "epoch": 0.6537238126576724, "grad_norm": 1.8095712661743164, "learning_rate": 1.8225785760102632e-06, "loss": 0.8414, "step": 53640 }, { "epoch": 0.6537847488818203, "grad_norm": 1.8835737705230713, "learning_rate": 1.8222578576010264e-06, "loss": 0.7898, "step": 53645 }, { "epoch": 0.6538456851059681, "grad_norm": 2.009305477142334, "learning_rate": 1.8219371391917899e-06, "loss": 0.8499, "step": 53650 }, { "epoch": 0.6539066213301159, "grad_norm": 2.130458354949951, "learning_rate": 1.8216164207825531e-06, "loss": 0.79, "step": 53655 }, { "epoch": 0.6539675575542637, "grad_norm": 1.8456454277038574, "learning_rate": 1.8212957023733163e-06, "loss": 0.8144, "step": 53660 }, { "epoch": 0.6540284937784115, "grad_norm": 1.9241001605987549, "learning_rate": 1.8209749839640798e-06, "loss": 0.7962, "step": 53665 }, { "epoch": 0.6540894300025594, "grad_norm": 2.0438475608825684, "learning_rate": 1.8206542655548428e-06, "loss": 0.8843, "step": 53670 }, { "epoch": 0.6541503662267071, "grad_norm": 2.122166633605957, "learning_rate": 1.8203335471456062e-06, "loss": 0.8632, "step": 53675 }, { "epoch": 0.6542113024508549, "grad_norm": 2.0478827953338623, "learning_rate": 1.8200128287363697e-06, "loss": 0.7974, "step": 53680 }, { "epoch": 0.6542722386750027, "grad_norm": 2.2151105403900146, "learning_rate": 1.819692110327133e-06, "loss": 0.7708, "step": 53685 }, { "epoch": 0.6543331748991505, "grad_norm": 2.000584840774536, "learning_rate": 1.8193713919178963e-06, "loss": 0.8767, "step": 53690 }, { "epoch": 0.6543941111232984, "grad_norm": 1.7161070108413696, "learning_rate": 1.8190506735086596e-06, "loss": 0.8082, "step": 53695 }, { "epoch": 0.6544550473474462, "grad_norm": 1.8616304397583008, "learning_rate": 1.8187299550994228e-06, "loss": 0.8342, "step": 53700 }, { "epoch": 0.654515983571594, "grad_norm": 1.7141859531402588, "learning_rate": 1.8184092366901862e-06, "loss": 0.7831, "step": 53705 }, { "epoch": 0.6545769197957417, "grad_norm": 1.8663002252578735, "learning_rate": 1.8180885182809493e-06, "loss": 0.8168, "step": 53710 }, { "epoch": 0.6546378560198896, "grad_norm": 2.1228439807891846, "learning_rate": 1.8177677998717127e-06, "loss": 0.8585, "step": 53715 }, { "epoch": 0.6546987922440374, "grad_norm": 1.813618779182434, "learning_rate": 1.8174470814624761e-06, "loss": 0.8203, "step": 53720 }, { "epoch": 0.6547597284681852, "grad_norm": 2.047588586807251, "learning_rate": 1.8171263630532394e-06, "loss": 0.8495, "step": 53725 }, { "epoch": 0.654820664692333, "grad_norm": 2.254194498062134, "learning_rate": 1.8168056446440028e-06, "loss": 0.8315, "step": 53730 }, { "epoch": 0.6548816009164808, "grad_norm": 1.6757818460464478, "learning_rate": 1.816484926234766e-06, "loss": 0.7822, "step": 53735 }, { "epoch": 0.6549425371406287, "grad_norm": 1.9642279148101807, "learning_rate": 1.8161642078255293e-06, "loss": 0.8398, "step": 53740 }, { "epoch": 0.6550034733647764, "grad_norm": 1.8224574327468872, "learning_rate": 1.8158434894162927e-06, "loss": 0.8284, "step": 53745 }, { "epoch": 0.6550644095889242, "grad_norm": 1.8687039613723755, "learning_rate": 1.8155227710070557e-06, "loss": 0.8026, "step": 53750 }, { "epoch": 0.655125345813072, "grad_norm": 2.246840476989746, "learning_rate": 1.8152020525978192e-06, "loss": 0.9255, "step": 53755 }, { "epoch": 0.6551862820372198, "grad_norm": 2.030775547027588, "learning_rate": 1.8148813341885826e-06, "loss": 0.8358, "step": 53760 }, { "epoch": 0.6552472182613677, "grad_norm": 1.9572503566741943, "learning_rate": 1.8145606157793458e-06, "loss": 0.7499, "step": 53765 }, { "epoch": 0.6553081544855155, "grad_norm": 2.0997633934020996, "learning_rate": 1.8142398973701093e-06, "loss": 0.8002, "step": 53770 }, { "epoch": 0.6553690907096633, "grad_norm": 1.8489960432052612, "learning_rate": 1.8139191789608727e-06, "loss": 0.7869, "step": 53775 }, { "epoch": 0.655430026933811, "grad_norm": 2.0606577396392822, "learning_rate": 1.8135984605516357e-06, "loss": 0.8234, "step": 53780 }, { "epoch": 0.6554909631579589, "grad_norm": 2.2229344844818115, "learning_rate": 1.8132777421423992e-06, "loss": 0.8575, "step": 53785 }, { "epoch": 0.6555518993821067, "grad_norm": 1.8705717325210571, "learning_rate": 1.8129570237331626e-06, "loss": 0.8226, "step": 53790 }, { "epoch": 0.6556128356062545, "grad_norm": 2.3464066982269287, "learning_rate": 1.8126363053239256e-06, "loss": 0.7809, "step": 53795 }, { "epoch": 0.6556737718304023, "grad_norm": 2.072542667388916, "learning_rate": 1.812315586914689e-06, "loss": 0.7883, "step": 53800 }, { "epoch": 0.6557347080545501, "grad_norm": 2.2433817386627197, "learning_rate": 1.8119948685054523e-06, "loss": 0.8458, "step": 53805 }, { "epoch": 0.655795644278698, "grad_norm": 1.7396326065063477, "learning_rate": 1.8116741500962157e-06, "loss": 0.8149, "step": 53810 }, { "epoch": 0.6558565805028457, "grad_norm": 2.195552349090576, "learning_rate": 1.8113534316869792e-06, "loss": 0.779, "step": 53815 }, { "epoch": 0.6559175167269935, "grad_norm": 1.719993233680725, "learning_rate": 1.8110327132777422e-06, "loss": 0.7788, "step": 53820 }, { "epoch": 0.6559784529511413, "grad_norm": 2.0084755420684814, "learning_rate": 1.8107119948685056e-06, "loss": 0.8408, "step": 53825 }, { "epoch": 0.6560393891752891, "grad_norm": 2.1602559089660645, "learning_rate": 1.810391276459269e-06, "loss": 0.8295, "step": 53830 }, { "epoch": 0.656100325399437, "grad_norm": 1.9021316766738892, "learning_rate": 1.810070558050032e-06, "loss": 0.8195, "step": 53835 }, { "epoch": 0.6561612616235848, "grad_norm": 1.8890162706375122, "learning_rate": 1.8097498396407955e-06, "loss": 0.8007, "step": 53840 }, { "epoch": 0.6562221978477326, "grad_norm": 2.0740063190460205, "learning_rate": 1.8094291212315588e-06, "loss": 0.8278, "step": 53845 }, { "epoch": 0.6562831340718803, "grad_norm": 1.9956274032592773, "learning_rate": 1.8091084028223222e-06, "loss": 0.8068, "step": 53850 }, { "epoch": 0.6563440702960281, "grad_norm": 2.0729665756225586, "learning_rate": 1.8087876844130856e-06, "loss": 0.8268, "step": 53855 }, { "epoch": 0.656405006520176, "grad_norm": 2.2457947731018066, "learning_rate": 1.8084669660038487e-06, "loss": 0.7759, "step": 53860 }, { "epoch": 0.6564659427443238, "grad_norm": 2.1284046173095703, "learning_rate": 1.808146247594612e-06, "loss": 0.8136, "step": 53865 }, { "epoch": 0.6565268789684716, "grad_norm": 1.743536114692688, "learning_rate": 1.8078255291853755e-06, "loss": 0.7443, "step": 53870 }, { "epoch": 0.6565878151926194, "grad_norm": 2.05607533454895, "learning_rate": 1.8075048107761385e-06, "loss": 0.8393, "step": 53875 }, { "epoch": 0.6566487514167673, "grad_norm": 2.149766206741333, "learning_rate": 1.807184092366902e-06, "loss": 0.8532, "step": 53880 }, { "epoch": 0.656709687640915, "grad_norm": 2.020498275756836, "learning_rate": 1.8068633739576652e-06, "loss": 0.8633, "step": 53885 }, { "epoch": 0.6567706238650628, "grad_norm": 1.7784537076950073, "learning_rate": 1.8065426555484287e-06, "loss": 0.7845, "step": 53890 }, { "epoch": 0.6568315600892106, "grad_norm": 1.786745309829712, "learning_rate": 1.806221937139192e-06, "loss": 0.7717, "step": 53895 }, { "epoch": 0.6568924963133584, "grad_norm": 1.5240366458892822, "learning_rate": 1.8059012187299551e-06, "loss": 0.8187, "step": 53900 }, { "epoch": 0.6569534325375063, "grad_norm": 2.151829481124878, "learning_rate": 1.8055805003207186e-06, "loss": 0.8435, "step": 53905 }, { "epoch": 0.6570143687616541, "grad_norm": 1.979811429977417, "learning_rate": 1.805259781911482e-06, "loss": 0.7813, "step": 53910 }, { "epoch": 0.6570753049858019, "grad_norm": 2.0046334266662598, "learning_rate": 1.8049390635022452e-06, "loss": 0.8839, "step": 53915 }, { "epoch": 0.6571362412099496, "grad_norm": 2.1259562969207764, "learning_rate": 1.8046183450930085e-06, "loss": 0.7247, "step": 53920 }, { "epoch": 0.6571971774340974, "grad_norm": 1.9022417068481445, "learning_rate": 1.8042976266837717e-06, "loss": 0.8465, "step": 53925 }, { "epoch": 0.6572581136582453, "grad_norm": 1.8557943105697632, "learning_rate": 1.8039769082745351e-06, "loss": 0.7704, "step": 53930 }, { "epoch": 0.6573190498823931, "grad_norm": 1.991318702697754, "learning_rate": 1.8036561898652986e-06, "loss": 0.8329, "step": 53935 }, { "epoch": 0.6573799861065409, "grad_norm": 2.0643465518951416, "learning_rate": 1.8033354714560616e-06, "loss": 0.8127, "step": 53940 }, { "epoch": 0.6574409223306887, "grad_norm": 1.9941658973693848, "learning_rate": 1.803014753046825e-06, "loss": 0.7285, "step": 53945 }, { "epoch": 0.6575018585548366, "grad_norm": 2.2633326053619385, "learning_rate": 1.8026940346375885e-06, "loss": 0.9204, "step": 53950 }, { "epoch": 0.6575627947789843, "grad_norm": 2.098055839538574, "learning_rate": 1.8023733162283517e-06, "loss": 0.7525, "step": 53955 }, { "epoch": 0.6576237310031321, "grad_norm": 1.842622995376587, "learning_rate": 1.802052597819115e-06, "loss": 0.82, "step": 53960 }, { "epoch": 0.6576846672272799, "grad_norm": 2.2327828407287598, "learning_rate": 1.8017318794098781e-06, "loss": 0.8457, "step": 53965 }, { "epoch": 0.6577456034514277, "grad_norm": 2.1025173664093018, "learning_rate": 1.8014111610006416e-06, "loss": 0.8246, "step": 53970 }, { "epoch": 0.6578065396755756, "grad_norm": 2.2468857765197754, "learning_rate": 1.801090442591405e-06, "loss": 0.9226, "step": 53975 }, { "epoch": 0.6578674758997234, "grad_norm": 2.039787769317627, "learning_rate": 1.800769724182168e-06, "loss": 0.8713, "step": 53980 }, { "epoch": 0.6579284121238712, "grad_norm": 2.3348710536956787, "learning_rate": 1.8004490057729315e-06, "loss": 0.7947, "step": 53985 }, { "epoch": 0.6579893483480189, "grad_norm": 1.9899564981460571, "learning_rate": 1.800128287363695e-06, "loss": 0.7643, "step": 53990 }, { "epoch": 0.6580502845721667, "grad_norm": 1.9416091442108154, "learning_rate": 1.7998075689544582e-06, "loss": 0.792, "step": 53995 }, { "epoch": 0.6581112207963146, "grad_norm": 1.846073031425476, "learning_rate": 1.7994868505452214e-06, "loss": 0.7815, "step": 54000 }, { "epoch": 0.6581721570204624, "grad_norm": 2.024275541305542, "learning_rate": 1.7991661321359846e-06, "loss": 0.8719, "step": 54005 }, { "epoch": 0.6582330932446102, "grad_norm": 1.848722219467163, "learning_rate": 1.798845413726748e-06, "loss": 0.8182, "step": 54010 }, { "epoch": 0.658294029468758, "grad_norm": 2.0528502464294434, "learning_rate": 1.7985246953175115e-06, "loss": 0.7706, "step": 54015 }, { "epoch": 0.6583549656929059, "grad_norm": 1.7791460752487183, "learning_rate": 1.7982039769082745e-06, "loss": 0.9066, "step": 54020 }, { "epoch": 0.6584159019170536, "grad_norm": 2.1639559268951416, "learning_rate": 1.797883258499038e-06, "loss": 0.7647, "step": 54025 }, { "epoch": 0.6584768381412014, "grad_norm": 1.8916714191436768, "learning_rate": 1.7975625400898014e-06, "loss": 0.8107, "step": 54030 }, { "epoch": 0.6585377743653492, "grad_norm": 2.0711960792541504, "learning_rate": 1.7972418216805646e-06, "loss": 0.8584, "step": 54035 }, { "epoch": 0.658598710589497, "grad_norm": 2.041625738143921, "learning_rate": 1.796921103271328e-06, "loss": 0.8953, "step": 54040 }, { "epoch": 0.6586596468136449, "grad_norm": 1.8762978315353394, "learning_rate": 1.796600384862091e-06, "loss": 0.7913, "step": 54045 }, { "epoch": 0.6587205830377927, "grad_norm": 2.1231114864349365, "learning_rate": 1.7962796664528545e-06, "loss": 0.7809, "step": 54050 }, { "epoch": 0.6587815192619405, "grad_norm": 2.3675882816314697, "learning_rate": 1.795958948043618e-06, "loss": 0.9326, "step": 54055 }, { "epoch": 0.6588424554860882, "grad_norm": 2.0456461906433105, "learning_rate": 1.795638229634381e-06, "loss": 0.849, "step": 54060 }, { "epoch": 0.658903391710236, "grad_norm": 1.8818331956863403, "learning_rate": 1.7953175112251444e-06, "loss": 0.7947, "step": 54065 }, { "epoch": 0.6589643279343839, "grad_norm": 1.864892601966858, "learning_rate": 1.7949967928159079e-06, "loss": 0.8645, "step": 54070 }, { "epoch": 0.6590252641585317, "grad_norm": 1.9006685018539429, "learning_rate": 1.794676074406671e-06, "loss": 0.8022, "step": 54075 }, { "epoch": 0.6590862003826795, "grad_norm": 1.9382983446121216, "learning_rate": 1.7943553559974345e-06, "loss": 0.8682, "step": 54080 }, { "epoch": 0.6591471366068273, "grad_norm": 1.9553377628326416, "learning_rate": 1.7940346375881977e-06, "loss": 0.8901, "step": 54085 }, { "epoch": 0.6592080728309752, "grad_norm": 1.947516679763794, "learning_rate": 1.793713919178961e-06, "loss": 0.8255, "step": 54090 }, { "epoch": 0.6592690090551229, "grad_norm": 2.1029112339019775, "learning_rate": 1.7933932007697244e-06, "loss": 0.7748, "step": 54095 }, { "epoch": 0.6593299452792707, "grad_norm": 1.9546781778335571, "learning_rate": 1.7930724823604874e-06, "loss": 0.7896, "step": 54100 }, { "epoch": 0.6593908815034185, "grad_norm": 1.9808634519577026, "learning_rate": 1.7927517639512509e-06, "loss": 0.9406, "step": 54105 }, { "epoch": 0.6594518177275663, "grad_norm": 1.9522587060928345, "learning_rate": 1.7924310455420143e-06, "loss": 0.8292, "step": 54110 }, { "epoch": 0.6595127539517142, "grad_norm": 2.0506443977355957, "learning_rate": 1.7921103271327775e-06, "loss": 0.8531, "step": 54115 }, { "epoch": 0.659573690175862, "grad_norm": 2.11432147026062, "learning_rate": 1.791789608723541e-06, "loss": 0.8053, "step": 54120 }, { "epoch": 0.6596346264000098, "grad_norm": 1.9716310501098633, "learning_rate": 1.7914688903143044e-06, "loss": 0.8803, "step": 54125 }, { "epoch": 0.6596955626241575, "grad_norm": 1.9784640073776245, "learning_rate": 1.7911481719050674e-06, "loss": 0.7739, "step": 54130 }, { "epoch": 0.6597564988483053, "grad_norm": 1.694802165031433, "learning_rate": 1.7908274534958309e-06, "loss": 0.8227, "step": 54135 }, { "epoch": 0.6598174350724532, "grad_norm": 1.9599624872207642, "learning_rate": 1.7905067350865941e-06, "loss": 0.9047, "step": 54140 }, { "epoch": 0.659878371296601, "grad_norm": 2.1287784576416016, "learning_rate": 1.7901860166773573e-06, "loss": 0.8195, "step": 54145 }, { "epoch": 0.6599393075207488, "grad_norm": 2.0189945697784424, "learning_rate": 1.7898652982681208e-06, "loss": 0.8232, "step": 54150 }, { "epoch": 0.6600002437448966, "grad_norm": 2.1342575550079346, "learning_rate": 1.789544579858884e-06, "loss": 0.8132, "step": 54155 }, { "epoch": 0.6600611799690443, "grad_norm": 2.150630235671997, "learning_rate": 1.7892238614496474e-06, "loss": 0.8598, "step": 54160 }, { "epoch": 0.6601221161931922, "grad_norm": 1.8913118839263916, "learning_rate": 1.7889031430404109e-06, "loss": 0.8405, "step": 54165 }, { "epoch": 0.66018305241734, "grad_norm": 1.9115906953811646, "learning_rate": 1.788582424631174e-06, "loss": 0.8297, "step": 54170 }, { "epoch": 0.6602439886414878, "grad_norm": 1.8062429428100586, "learning_rate": 1.7882617062219373e-06, "loss": 0.7992, "step": 54175 }, { "epoch": 0.6603049248656356, "grad_norm": 2.3138515949249268, "learning_rate": 1.7879409878127006e-06, "loss": 0.8075, "step": 54180 }, { "epoch": 0.6603658610897835, "grad_norm": 1.9405889511108398, "learning_rate": 1.7876202694034638e-06, "loss": 0.7302, "step": 54185 }, { "epoch": 0.6604267973139313, "grad_norm": 2.321220874786377, "learning_rate": 1.7872995509942272e-06, "loss": 0.8581, "step": 54190 }, { "epoch": 0.660487733538079, "grad_norm": 2.1126046180725098, "learning_rate": 1.7869788325849905e-06, "loss": 0.9234, "step": 54195 }, { "epoch": 0.6605486697622268, "grad_norm": 2.341970205307007, "learning_rate": 1.786658114175754e-06, "loss": 0.8071, "step": 54200 }, { "epoch": 0.6606096059863746, "grad_norm": 1.6486483812332153, "learning_rate": 1.7863373957665174e-06, "loss": 0.8148, "step": 54205 }, { "epoch": 0.6606705422105225, "grad_norm": 2.3020150661468506, "learning_rate": 1.7860166773572804e-06, "loss": 0.819, "step": 54210 }, { "epoch": 0.6607314784346703, "grad_norm": 1.7627325057983398, "learning_rate": 1.7856959589480438e-06, "loss": 0.8723, "step": 54215 }, { "epoch": 0.6607924146588181, "grad_norm": 1.811284065246582, "learning_rate": 1.785375240538807e-06, "loss": 0.8092, "step": 54220 }, { "epoch": 0.6608533508829659, "grad_norm": 1.77583909034729, "learning_rate": 1.7850545221295703e-06, "loss": 0.8129, "step": 54225 }, { "epoch": 0.6609142871071136, "grad_norm": 1.9276491403579712, "learning_rate": 1.7847338037203337e-06, "loss": 0.8622, "step": 54230 }, { "epoch": 0.6609752233312615, "grad_norm": 1.9159774780273438, "learning_rate": 1.784413085311097e-06, "loss": 0.8876, "step": 54235 }, { "epoch": 0.6610361595554093, "grad_norm": 1.9149006605148315, "learning_rate": 1.7840923669018604e-06, "loss": 0.8537, "step": 54240 }, { "epoch": 0.6610970957795571, "grad_norm": 2.1040713787078857, "learning_rate": 1.7837716484926238e-06, "loss": 0.8232, "step": 54245 }, { "epoch": 0.6611580320037049, "grad_norm": 2.128608465194702, "learning_rate": 1.7834509300833868e-06, "loss": 0.8489, "step": 54250 }, { "epoch": 0.6612189682278528, "grad_norm": 1.6420183181762695, "learning_rate": 1.7831302116741503e-06, "loss": 0.7837, "step": 54255 }, { "epoch": 0.6612799044520006, "grad_norm": 1.8301242589950562, "learning_rate": 1.7828094932649135e-06, "loss": 0.7839, "step": 54260 }, { "epoch": 0.6613408406761483, "grad_norm": 1.9496411085128784, "learning_rate": 1.782488774855677e-06, "loss": 0.7885, "step": 54265 }, { "epoch": 0.6614017769002961, "grad_norm": 2.315192937850952, "learning_rate": 1.7821680564464402e-06, "loss": 0.7974, "step": 54270 }, { "epoch": 0.6614627131244439, "grad_norm": 2.0746593475341797, "learning_rate": 1.7818473380372034e-06, "loss": 0.8035, "step": 54275 }, { "epoch": 0.6615236493485918, "grad_norm": 2.230968713760376, "learning_rate": 1.7815266196279668e-06, "loss": 0.7934, "step": 54280 }, { "epoch": 0.6615845855727396, "grad_norm": 2.3194282054901123, "learning_rate": 1.7812059012187303e-06, "loss": 0.8545, "step": 54285 }, { "epoch": 0.6616455217968874, "grad_norm": 1.9349327087402344, "learning_rate": 1.7808851828094933e-06, "loss": 0.8004, "step": 54290 }, { "epoch": 0.6617064580210352, "grad_norm": 1.6679624319076538, "learning_rate": 1.7805644644002567e-06, "loss": 0.8253, "step": 54295 }, { "epoch": 0.6617673942451829, "grad_norm": 2.100206136703491, "learning_rate": 1.78024374599102e-06, "loss": 0.8527, "step": 54300 }, { "epoch": 0.6618283304693308, "grad_norm": 1.8231191635131836, "learning_rate": 1.7799230275817834e-06, "loss": 0.8101, "step": 54305 }, { "epoch": 0.6618892666934786, "grad_norm": 2.024474620819092, "learning_rate": 1.7796023091725466e-06, "loss": 0.8438, "step": 54310 }, { "epoch": 0.6619502029176264, "grad_norm": 1.7116426229476929, "learning_rate": 1.7792815907633099e-06, "loss": 0.8271, "step": 54315 }, { "epoch": 0.6620111391417742, "grad_norm": 1.9147558212280273, "learning_rate": 1.7789608723540733e-06, "loss": 0.8193, "step": 54320 }, { "epoch": 0.662072075365922, "grad_norm": 2.57200288772583, "learning_rate": 1.7786401539448367e-06, "loss": 0.8529, "step": 54325 }, { "epoch": 0.6621330115900699, "grad_norm": 2.316387176513672, "learning_rate": 1.7783194355355998e-06, "loss": 0.8681, "step": 54330 }, { "epoch": 0.6621939478142176, "grad_norm": 2.485846519470215, "learning_rate": 1.7779987171263632e-06, "loss": 0.8471, "step": 54335 }, { "epoch": 0.6622548840383654, "grad_norm": 2.5300452709198, "learning_rate": 1.7776779987171264e-06, "loss": 0.7835, "step": 54340 }, { "epoch": 0.6623158202625132, "grad_norm": 2.5364184379577637, "learning_rate": 1.7773572803078899e-06, "loss": 0.8089, "step": 54345 }, { "epoch": 0.6623767564866611, "grad_norm": 2.21512508392334, "learning_rate": 1.777036561898653e-06, "loss": 0.7356, "step": 54350 }, { "epoch": 0.6624376927108089, "grad_norm": 2.2229437828063965, "learning_rate": 1.7767158434894163e-06, "loss": 0.8461, "step": 54355 }, { "epoch": 0.6624986289349567, "grad_norm": 1.9814280271530151, "learning_rate": 1.7763951250801798e-06, "loss": 0.9049, "step": 54360 }, { "epoch": 0.6625595651591045, "grad_norm": 2.056734800338745, "learning_rate": 1.7760744066709432e-06, "loss": 0.8073, "step": 54365 }, { "epoch": 0.6626205013832522, "grad_norm": 2.0453953742980957, "learning_rate": 1.7757536882617062e-06, "loss": 0.8204, "step": 54370 }, { "epoch": 0.6626814376074001, "grad_norm": 1.8197290897369385, "learning_rate": 1.7754329698524697e-06, "loss": 0.8041, "step": 54375 }, { "epoch": 0.6627423738315479, "grad_norm": 1.9034627676010132, "learning_rate": 1.775112251443233e-06, "loss": 0.8211, "step": 54380 }, { "epoch": 0.6628033100556957, "grad_norm": 1.9847910404205322, "learning_rate": 1.7747915330339963e-06, "loss": 0.8556, "step": 54385 }, { "epoch": 0.6628642462798435, "grad_norm": 1.7274404764175415, "learning_rate": 1.7744708146247598e-06, "loss": 0.7821, "step": 54390 }, { "epoch": 0.6629251825039914, "grad_norm": 1.6971336603164673, "learning_rate": 1.7741500962155228e-06, "loss": 0.8336, "step": 54395 }, { "epoch": 0.6629861187281392, "grad_norm": 1.8996669054031372, "learning_rate": 1.7738293778062862e-06, "loss": 0.8001, "step": 54400 }, { "epoch": 0.6630470549522869, "grad_norm": 1.6393593549728394, "learning_rate": 1.7735086593970497e-06, "loss": 0.7775, "step": 54405 }, { "epoch": 0.6631079911764347, "grad_norm": 1.8641510009765625, "learning_rate": 1.7731879409878127e-06, "loss": 0.8258, "step": 54410 }, { "epoch": 0.6631689274005825, "grad_norm": 2.368767023086548, "learning_rate": 1.7728672225785761e-06, "loss": 0.8992, "step": 54415 }, { "epoch": 0.6632298636247304, "grad_norm": 2.1025586128234863, "learning_rate": 1.7725465041693396e-06, "loss": 0.8774, "step": 54420 }, { "epoch": 0.6632907998488782, "grad_norm": 1.8885042667388916, "learning_rate": 1.7722257857601028e-06, "loss": 0.8547, "step": 54425 }, { "epoch": 0.663351736073026, "grad_norm": 1.9531536102294922, "learning_rate": 1.7719050673508662e-06, "loss": 0.8253, "step": 54430 }, { "epoch": 0.6634126722971738, "grad_norm": 2.6768949031829834, "learning_rate": 1.7715843489416293e-06, "loss": 0.8088, "step": 54435 }, { "epoch": 0.6634736085213215, "grad_norm": 1.8841203451156616, "learning_rate": 1.7712636305323927e-06, "loss": 0.8274, "step": 54440 }, { "epoch": 0.6635345447454694, "grad_norm": 1.7622473239898682, "learning_rate": 1.7709429121231561e-06, "loss": 0.8466, "step": 54445 }, { "epoch": 0.6635954809696172, "grad_norm": 2.1873250007629395, "learning_rate": 1.7706221937139191e-06, "loss": 0.8433, "step": 54450 }, { "epoch": 0.663656417193765, "grad_norm": 2.5967776775360107, "learning_rate": 1.7703014753046826e-06, "loss": 0.8639, "step": 54455 }, { "epoch": 0.6637173534179128, "grad_norm": 1.8481968641281128, "learning_rate": 1.769980756895446e-06, "loss": 0.8837, "step": 54460 }, { "epoch": 0.6637782896420606, "grad_norm": 2.242527961730957, "learning_rate": 1.7696600384862093e-06, "loss": 0.759, "step": 54465 }, { "epoch": 0.6638392258662085, "grad_norm": 2.0824334621429443, "learning_rate": 1.7693393200769727e-06, "loss": 0.857, "step": 54470 }, { "epoch": 0.6639001620903562, "grad_norm": 1.83223295211792, "learning_rate": 1.7690186016677357e-06, "loss": 0.7613, "step": 54475 }, { "epoch": 0.663961098314504, "grad_norm": 1.912045955657959, "learning_rate": 1.7686978832584992e-06, "loss": 0.8246, "step": 54480 }, { "epoch": 0.6640220345386518, "grad_norm": 1.5882282257080078, "learning_rate": 1.7683771648492626e-06, "loss": 0.8035, "step": 54485 }, { "epoch": 0.6640829707627997, "grad_norm": 2.0219833850860596, "learning_rate": 1.7680564464400258e-06, "loss": 0.8778, "step": 54490 }, { "epoch": 0.6641439069869475, "grad_norm": 2.1936452388763428, "learning_rate": 1.767735728030789e-06, "loss": 0.8079, "step": 54495 }, { "epoch": 0.6642048432110953, "grad_norm": 1.7303093671798706, "learning_rate": 1.7674150096215525e-06, "loss": 0.8794, "step": 54500 }, { "epoch": 0.6642657794352431, "grad_norm": 2.1697967052459717, "learning_rate": 1.7670942912123157e-06, "loss": 0.7739, "step": 54505 }, { "epoch": 0.6643267156593908, "grad_norm": 2.3966851234436035, "learning_rate": 1.7667735728030792e-06, "loss": 0.7791, "step": 54510 }, { "epoch": 0.6643876518835387, "grad_norm": 1.7877788543701172, "learning_rate": 1.7664528543938422e-06, "loss": 0.9, "step": 54515 }, { "epoch": 0.6644485881076865, "grad_norm": 2.1373908519744873, "learning_rate": 1.7661321359846056e-06, "loss": 0.8326, "step": 54520 }, { "epoch": 0.6645095243318343, "grad_norm": 2.054434299468994, "learning_rate": 1.765811417575369e-06, "loss": 0.788, "step": 54525 }, { "epoch": 0.6645704605559821, "grad_norm": 1.9573990106582642, "learning_rate": 1.7654906991661323e-06, "loss": 0.8892, "step": 54530 }, { "epoch": 0.66463139678013, "grad_norm": 2.036679744720459, "learning_rate": 1.7651699807568955e-06, "loss": 0.8269, "step": 54535 }, { "epoch": 0.6646923330042778, "grad_norm": 2.529240369796753, "learning_rate": 1.764849262347659e-06, "loss": 0.864, "step": 54540 }, { "epoch": 0.6647532692284255, "grad_norm": 1.8134278059005737, "learning_rate": 1.7645285439384222e-06, "loss": 0.8099, "step": 54545 }, { "epoch": 0.6648142054525733, "grad_norm": 1.9227654933929443, "learning_rate": 1.7642078255291856e-06, "loss": 0.7694, "step": 54550 }, { "epoch": 0.6648751416767211, "grad_norm": 2.9453811645507812, "learning_rate": 1.7638871071199486e-06, "loss": 0.8565, "step": 54555 }, { "epoch": 0.664936077900869, "grad_norm": 2.093698740005493, "learning_rate": 1.763566388710712e-06, "loss": 0.8159, "step": 54560 }, { "epoch": 0.6649970141250168, "grad_norm": 1.9395883083343506, "learning_rate": 1.7632456703014755e-06, "loss": 0.8095, "step": 54565 }, { "epoch": 0.6650579503491646, "grad_norm": 1.9596657752990723, "learning_rate": 1.7629249518922388e-06, "loss": 0.7536, "step": 54570 }, { "epoch": 0.6651188865733124, "grad_norm": 1.8148231506347656, "learning_rate": 1.762604233483002e-06, "loss": 0.794, "step": 54575 }, { "epoch": 0.6651798227974601, "grad_norm": 1.8858321905136108, "learning_rate": 1.7622835150737654e-06, "loss": 0.7733, "step": 54580 }, { "epoch": 0.665240759021608, "grad_norm": 1.9702883958816528, "learning_rate": 1.7619627966645286e-06, "loss": 0.7939, "step": 54585 }, { "epoch": 0.6653016952457558, "grad_norm": 1.9607726335525513, "learning_rate": 1.761642078255292e-06, "loss": 0.8322, "step": 54590 }, { "epoch": 0.6653626314699036, "grad_norm": 1.8051892518997192, "learning_rate": 1.761321359846055e-06, "loss": 0.6901, "step": 54595 }, { "epoch": 0.6654235676940514, "grad_norm": 2.0733373165130615, "learning_rate": 1.7610006414368185e-06, "loss": 0.8441, "step": 54600 }, { "epoch": 0.6654845039181992, "grad_norm": 1.8182079792022705, "learning_rate": 1.760679923027582e-06, "loss": 0.8882, "step": 54605 }, { "epoch": 0.6655454401423471, "grad_norm": 2.047435998916626, "learning_rate": 1.7603592046183452e-06, "loss": 0.8614, "step": 54610 }, { "epoch": 0.6656063763664948, "grad_norm": 1.8887782096862793, "learning_rate": 1.7600384862091087e-06, "loss": 0.8345, "step": 54615 }, { "epoch": 0.6656673125906426, "grad_norm": 1.7496111392974854, "learning_rate": 1.7597177677998719e-06, "loss": 0.7419, "step": 54620 }, { "epoch": 0.6657282488147904, "grad_norm": 2.0080151557922363, "learning_rate": 1.7593970493906351e-06, "loss": 0.8379, "step": 54625 }, { "epoch": 0.6657891850389382, "grad_norm": 1.9588441848754883, "learning_rate": 1.7590763309813986e-06, "loss": 0.8383, "step": 54630 }, { "epoch": 0.6658501212630861, "grad_norm": 2.1182568073272705, "learning_rate": 1.7587556125721616e-06, "loss": 0.8566, "step": 54635 }, { "epoch": 0.6659110574872339, "grad_norm": 2.435206174850464, "learning_rate": 1.758434894162925e-06, "loss": 0.7796, "step": 54640 }, { "epoch": 0.6659719937113817, "grad_norm": 2.456813097000122, "learning_rate": 1.7581141757536884e-06, "loss": 0.8572, "step": 54645 }, { "epoch": 0.6660329299355294, "grad_norm": 2.076803207397461, "learning_rate": 1.7577934573444517e-06, "loss": 0.8396, "step": 54650 }, { "epoch": 0.6660938661596773, "grad_norm": 1.8700615167617798, "learning_rate": 1.7574727389352151e-06, "loss": 0.786, "step": 54655 }, { "epoch": 0.6661548023838251, "grad_norm": 1.90843665599823, "learning_rate": 1.7571520205259783e-06, "loss": 0.7889, "step": 54660 }, { "epoch": 0.6662157386079729, "grad_norm": 2.1631648540496826, "learning_rate": 1.7568313021167416e-06, "loss": 0.8957, "step": 54665 }, { "epoch": 0.6662766748321207, "grad_norm": 1.8039417266845703, "learning_rate": 1.756510583707505e-06, "loss": 0.7839, "step": 54670 }, { "epoch": 0.6663376110562685, "grad_norm": 2.0170421600341797, "learning_rate": 1.756189865298268e-06, "loss": 0.8485, "step": 54675 }, { "epoch": 0.6663985472804164, "grad_norm": 1.8764500617980957, "learning_rate": 1.7558691468890315e-06, "loss": 0.7985, "step": 54680 }, { "epoch": 0.6664594835045641, "grad_norm": 2.0111849308013916, "learning_rate": 1.755548428479795e-06, "loss": 0.8651, "step": 54685 }, { "epoch": 0.6665204197287119, "grad_norm": 2.254512071609497, "learning_rate": 1.7552277100705581e-06, "loss": 0.7871, "step": 54690 }, { "epoch": 0.6665813559528597, "grad_norm": 1.6254483461380005, "learning_rate": 1.7549069916613216e-06, "loss": 0.8312, "step": 54695 }, { "epoch": 0.6666422921770075, "grad_norm": 2.1620333194732666, "learning_rate": 1.7545862732520848e-06, "loss": 0.8414, "step": 54700 }, { "epoch": 0.6667032284011554, "grad_norm": 1.8389214277267456, "learning_rate": 1.754265554842848e-06, "loss": 0.7548, "step": 54705 }, { "epoch": 0.6667641646253032, "grad_norm": 1.7224222421646118, "learning_rate": 1.7539448364336115e-06, "loss": 0.8366, "step": 54710 }, { "epoch": 0.666825100849451, "grad_norm": 2.1567270755767822, "learning_rate": 1.753624118024375e-06, "loss": 0.908, "step": 54715 }, { "epoch": 0.6668860370735987, "grad_norm": 2.1547887325286865, "learning_rate": 1.753303399615138e-06, "loss": 0.865, "step": 54720 }, { "epoch": 0.6669469732977465, "grad_norm": 2.567275047302246, "learning_rate": 1.7529826812059014e-06, "loss": 0.8379, "step": 54725 }, { "epoch": 0.6670079095218944, "grad_norm": 2.192258358001709, "learning_rate": 1.7526619627966646e-06, "loss": 0.8457, "step": 54730 }, { "epoch": 0.6670688457460422, "grad_norm": 2.094745635986328, "learning_rate": 1.752341244387428e-06, "loss": 0.8351, "step": 54735 }, { "epoch": 0.66712978197019, "grad_norm": 2.1839711666107178, "learning_rate": 1.7520205259781915e-06, "loss": 0.8281, "step": 54740 }, { "epoch": 0.6671907181943378, "grad_norm": 1.9454963207244873, "learning_rate": 1.7516998075689545e-06, "loss": 0.8275, "step": 54745 }, { "epoch": 0.6672516544184857, "grad_norm": 2.0245449542999268, "learning_rate": 1.751379089159718e-06, "loss": 0.8744, "step": 54750 }, { "epoch": 0.6673125906426334, "grad_norm": 2.3439712524414062, "learning_rate": 1.7510583707504814e-06, "loss": 0.8268, "step": 54755 }, { "epoch": 0.6673735268667812, "grad_norm": 1.933370590209961, "learning_rate": 1.7507376523412444e-06, "loss": 0.8649, "step": 54760 }, { "epoch": 0.667434463090929, "grad_norm": 2.026319980621338, "learning_rate": 1.7504169339320078e-06, "loss": 0.8434, "step": 54765 }, { "epoch": 0.6674953993150768, "grad_norm": 1.9390597343444824, "learning_rate": 1.750096215522771e-06, "loss": 0.8415, "step": 54770 }, { "epoch": 0.6675563355392247, "grad_norm": 2.1231017112731934, "learning_rate": 1.7497754971135345e-06, "loss": 0.9049, "step": 54775 }, { "epoch": 0.6676172717633725, "grad_norm": 1.6212546825408936, "learning_rate": 1.749454778704298e-06, "loss": 0.8137, "step": 54780 }, { "epoch": 0.6676782079875203, "grad_norm": 1.8307827711105347, "learning_rate": 1.749134060295061e-06, "loss": 0.8283, "step": 54785 }, { "epoch": 0.667739144211668, "grad_norm": 2.145568370819092, "learning_rate": 1.7488133418858244e-06, "loss": 0.7964, "step": 54790 }, { "epoch": 0.6678000804358158, "grad_norm": 1.9592599868774414, "learning_rate": 1.7484926234765878e-06, "loss": 0.8597, "step": 54795 }, { "epoch": 0.6678610166599637, "grad_norm": 2.27237868309021, "learning_rate": 1.7481719050673509e-06, "loss": 0.8835, "step": 54800 }, { "epoch": 0.6679219528841115, "grad_norm": 2.397534132003784, "learning_rate": 1.7478511866581143e-06, "loss": 0.8508, "step": 54805 }, { "epoch": 0.6679828891082593, "grad_norm": 2.5386242866516113, "learning_rate": 1.7475304682488775e-06, "loss": 0.8662, "step": 54810 }, { "epoch": 0.6680438253324071, "grad_norm": 1.8170236349105835, "learning_rate": 1.747209749839641e-06, "loss": 0.8132, "step": 54815 }, { "epoch": 0.668104761556555, "grad_norm": 2.2073822021484375, "learning_rate": 1.7468890314304044e-06, "loss": 0.8386, "step": 54820 }, { "epoch": 0.6681656977807027, "grad_norm": 2.1455142498016357, "learning_rate": 1.7465683130211674e-06, "loss": 0.9062, "step": 54825 }, { "epoch": 0.6682266340048505, "grad_norm": 2.177091598510742, "learning_rate": 1.7462475946119309e-06, "loss": 0.8075, "step": 54830 }, { "epoch": 0.6682875702289983, "grad_norm": 2.580132246017456, "learning_rate": 1.7459268762026943e-06, "loss": 0.879, "step": 54835 }, { "epoch": 0.6683485064531461, "grad_norm": 1.8103227615356445, "learning_rate": 1.7456061577934575e-06, "loss": 0.7886, "step": 54840 }, { "epoch": 0.668409442677294, "grad_norm": 2.025712728500366, "learning_rate": 1.7452854393842208e-06, "loss": 0.7449, "step": 54845 }, { "epoch": 0.6684703789014418, "grad_norm": 1.9161015748977661, "learning_rate": 1.744964720974984e-06, "loss": 0.8322, "step": 54850 }, { "epoch": 0.6685313151255896, "grad_norm": 2.615563154220581, "learning_rate": 1.7446440025657474e-06, "loss": 0.8685, "step": 54855 }, { "epoch": 0.6685922513497373, "grad_norm": 2.6405599117279053, "learning_rate": 1.7443232841565109e-06, "loss": 0.7697, "step": 54860 }, { "epoch": 0.6686531875738851, "grad_norm": 1.82088303565979, "learning_rate": 1.7440025657472739e-06, "loss": 0.8534, "step": 54865 }, { "epoch": 0.668714123798033, "grad_norm": 1.7378613948822021, "learning_rate": 1.7436818473380373e-06, "loss": 0.7856, "step": 54870 }, { "epoch": 0.6687750600221808, "grad_norm": 2.022094249725342, "learning_rate": 1.7433611289288008e-06, "loss": 0.8055, "step": 54875 }, { "epoch": 0.6688359962463286, "grad_norm": 1.6122865676879883, "learning_rate": 1.743040410519564e-06, "loss": 0.8045, "step": 54880 }, { "epoch": 0.6688969324704764, "grad_norm": 2.1975271701812744, "learning_rate": 1.7427196921103272e-06, "loss": 0.8475, "step": 54885 }, { "epoch": 0.6689578686946243, "grad_norm": 2.139371871948242, "learning_rate": 1.7423989737010905e-06, "loss": 0.8215, "step": 54890 }, { "epoch": 0.669018804918772, "grad_norm": 2.050673246383667, "learning_rate": 1.742078255291854e-06, "loss": 0.8201, "step": 54895 }, { "epoch": 0.6690797411429198, "grad_norm": 2.063330888748169, "learning_rate": 1.7417575368826173e-06, "loss": 0.8182, "step": 54900 }, { "epoch": 0.6691406773670676, "grad_norm": 1.9574222564697266, "learning_rate": 1.7414368184733804e-06, "loss": 0.8344, "step": 54905 }, { "epoch": 0.6692016135912154, "grad_norm": 1.820297360420227, "learning_rate": 1.7411161000641438e-06, "loss": 0.8875, "step": 54910 }, { "epoch": 0.6692625498153633, "grad_norm": 2.1074635982513428, "learning_rate": 1.7407953816549072e-06, "loss": 0.793, "step": 54915 }, { "epoch": 0.6693234860395111, "grad_norm": 2.0481064319610596, "learning_rate": 1.7404746632456705e-06, "loss": 0.816, "step": 54920 }, { "epoch": 0.6693844222636589, "grad_norm": 1.9464138746261597, "learning_rate": 1.7401539448364337e-06, "loss": 0.7712, "step": 54925 }, { "epoch": 0.6694453584878066, "grad_norm": 2.376494884490967, "learning_rate": 1.739833226427197e-06, "loss": 0.7803, "step": 54930 }, { "epoch": 0.6695062947119544, "grad_norm": 2.053866147994995, "learning_rate": 1.7395125080179604e-06, "loss": 0.8236, "step": 54935 }, { "epoch": 0.6695672309361023, "grad_norm": 1.7525311708450317, "learning_rate": 1.7391917896087238e-06, "loss": 0.7993, "step": 54940 }, { "epoch": 0.6696281671602501, "grad_norm": 2.090294122695923, "learning_rate": 1.7388710711994868e-06, "loss": 0.7802, "step": 54945 }, { "epoch": 0.6696891033843979, "grad_norm": 2.2253193855285645, "learning_rate": 1.7385503527902503e-06, "loss": 0.9436, "step": 54950 }, { "epoch": 0.6697500396085457, "grad_norm": 2.3469653129577637, "learning_rate": 1.7382296343810137e-06, "loss": 0.8753, "step": 54955 }, { "epoch": 0.6698109758326936, "grad_norm": 1.9168565273284912, "learning_rate": 1.737908915971777e-06, "loss": 0.7875, "step": 54960 }, { "epoch": 0.6698719120568413, "grad_norm": 2.12668514251709, "learning_rate": 1.7375881975625404e-06, "loss": 0.7352, "step": 54965 }, { "epoch": 0.6699328482809891, "grad_norm": 1.8283342123031616, "learning_rate": 1.7372674791533034e-06, "loss": 0.8208, "step": 54970 }, { "epoch": 0.6699937845051369, "grad_norm": 2.1783945560455322, "learning_rate": 1.7369467607440668e-06, "loss": 0.848, "step": 54975 }, { "epoch": 0.6700547207292847, "grad_norm": 2.137974977493286, "learning_rate": 1.7366260423348303e-06, "loss": 0.7835, "step": 54980 }, { "epoch": 0.6701156569534326, "grad_norm": 2.056312322616577, "learning_rate": 1.7363053239255933e-06, "loss": 0.8925, "step": 54985 }, { "epoch": 0.6701765931775804, "grad_norm": 2.2290546894073486, "learning_rate": 1.7359846055163567e-06, "loss": 0.8922, "step": 54990 }, { "epoch": 0.6702375294017282, "grad_norm": 2.288241386413574, "learning_rate": 1.7356638871071202e-06, "loss": 0.8777, "step": 54995 }, { "epoch": 0.6702984656258759, "grad_norm": 1.9167406558990479, "learning_rate": 1.7353431686978834e-06, "loss": 0.8126, "step": 55000 }, { "epoch": 0.6703594018500237, "grad_norm": 1.8213635683059692, "learning_rate": 1.7350224502886468e-06, "loss": 0.8853, "step": 55005 }, { "epoch": 0.6704203380741716, "grad_norm": 1.7263296842575073, "learning_rate": 1.73470173187941e-06, "loss": 0.7498, "step": 55010 }, { "epoch": 0.6704812742983194, "grad_norm": 1.9428346157073975, "learning_rate": 1.7343810134701733e-06, "loss": 0.8195, "step": 55015 }, { "epoch": 0.6705422105224672, "grad_norm": 2.2981576919555664, "learning_rate": 1.7340602950609367e-06, "loss": 0.8076, "step": 55020 }, { "epoch": 0.670603146746615, "grad_norm": 1.8967808485031128, "learning_rate": 1.7337395766516997e-06, "loss": 0.8355, "step": 55025 }, { "epoch": 0.6706640829707629, "grad_norm": 2.214787483215332, "learning_rate": 1.7334188582424632e-06, "loss": 0.8281, "step": 55030 }, { "epoch": 0.6707250191949106, "grad_norm": 2.1236069202423096, "learning_rate": 1.7330981398332266e-06, "loss": 0.8844, "step": 55035 }, { "epoch": 0.6707859554190584, "grad_norm": 2.344053268432617, "learning_rate": 1.7327774214239899e-06, "loss": 0.7926, "step": 55040 }, { "epoch": 0.6708468916432062, "grad_norm": 2.204420804977417, "learning_rate": 1.7324567030147533e-06, "loss": 0.8635, "step": 55045 }, { "epoch": 0.670907827867354, "grad_norm": 1.929193139076233, "learning_rate": 1.7321359846055165e-06, "loss": 0.817, "step": 55050 }, { "epoch": 0.6709687640915019, "grad_norm": 1.7583602666854858, "learning_rate": 1.7318152661962798e-06, "loss": 0.8297, "step": 55055 }, { "epoch": 0.6710297003156497, "grad_norm": 1.9790176153182983, "learning_rate": 1.7314945477870432e-06, "loss": 0.8284, "step": 55060 }, { "epoch": 0.6710906365397975, "grad_norm": 2.0037503242492676, "learning_rate": 1.7311738293778062e-06, "loss": 0.8756, "step": 55065 }, { "epoch": 0.6711515727639452, "grad_norm": 2.2346081733703613, "learning_rate": 1.7308531109685697e-06, "loss": 0.787, "step": 55070 }, { "epoch": 0.671212508988093, "grad_norm": 2.0334858894348145, "learning_rate": 1.730532392559333e-06, "loss": 0.7663, "step": 55075 }, { "epoch": 0.6712734452122409, "grad_norm": 1.8968143463134766, "learning_rate": 1.7302116741500963e-06, "loss": 0.764, "step": 55080 }, { "epoch": 0.6713343814363887, "grad_norm": 1.9643479585647583, "learning_rate": 1.7298909557408598e-06, "loss": 0.7308, "step": 55085 }, { "epoch": 0.6713953176605365, "grad_norm": 1.6749943494796753, "learning_rate": 1.7295702373316232e-06, "loss": 0.8299, "step": 55090 }, { "epoch": 0.6714562538846843, "grad_norm": 1.7839243412017822, "learning_rate": 1.7292495189223862e-06, "loss": 0.8084, "step": 55095 }, { "epoch": 0.6715171901088322, "grad_norm": 1.8388111591339111, "learning_rate": 1.7289288005131497e-06, "loss": 0.8587, "step": 55100 }, { "epoch": 0.6715781263329799, "grad_norm": 1.9015765190124512, "learning_rate": 1.7286080821039129e-06, "loss": 0.8569, "step": 55105 }, { "epoch": 0.6716390625571277, "grad_norm": 1.9904111623764038, "learning_rate": 1.7282873636946761e-06, "loss": 0.8084, "step": 55110 }, { "epoch": 0.6716999987812755, "grad_norm": 2.261033058166504, "learning_rate": 1.7279666452854396e-06, "loss": 0.8407, "step": 55115 }, { "epoch": 0.6717609350054233, "grad_norm": 1.789271354675293, "learning_rate": 1.7276459268762028e-06, "loss": 0.7957, "step": 55120 }, { "epoch": 0.6718218712295712, "grad_norm": 1.9601564407348633, "learning_rate": 1.7273252084669662e-06, "loss": 0.8815, "step": 55125 }, { "epoch": 0.671882807453719, "grad_norm": 1.950199007987976, "learning_rate": 1.7270044900577297e-06, "loss": 0.8589, "step": 55130 }, { "epoch": 0.6719437436778667, "grad_norm": 1.8271011114120483, "learning_rate": 1.7266837716484927e-06, "loss": 0.8306, "step": 55135 }, { "epoch": 0.6720046799020145, "grad_norm": 2.2312629222869873, "learning_rate": 1.7263630532392561e-06, "loss": 0.8069, "step": 55140 }, { "epoch": 0.6720656161261623, "grad_norm": 1.8092347383499146, "learning_rate": 1.7260423348300193e-06, "loss": 0.7909, "step": 55145 }, { "epoch": 0.6721265523503102, "grad_norm": 2.067559003829956, "learning_rate": 1.7257216164207826e-06, "loss": 0.8434, "step": 55150 }, { "epoch": 0.672187488574458, "grad_norm": 2.1579222679138184, "learning_rate": 1.725400898011546e-06, "loss": 0.8481, "step": 55155 }, { "epoch": 0.6722484247986058, "grad_norm": 1.7996598482131958, "learning_rate": 1.7250801796023092e-06, "loss": 0.7518, "step": 55160 }, { "epoch": 0.6723093610227536, "grad_norm": 1.796360969543457, "learning_rate": 1.7247594611930727e-06, "loss": 0.8066, "step": 55165 }, { "epoch": 0.6723702972469013, "grad_norm": 2.3041975498199463, "learning_rate": 1.7244387427838361e-06, "loss": 0.7309, "step": 55170 }, { "epoch": 0.6724312334710492, "grad_norm": 1.7367275953292847, "learning_rate": 1.7241180243745991e-06, "loss": 0.8524, "step": 55175 }, { "epoch": 0.672492169695197, "grad_norm": 2.3389909267425537, "learning_rate": 1.7237973059653626e-06, "loss": 0.8296, "step": 55180 }, { "epoch": 0.6725531059193448, "grad_norm": 1.9638375043869019, "learning_rate": 1.7234765875561258e-06, "loss": 0.8669, "step": 55185 }, { "epoch": 0.6726140421434926, "grad_norm": 1.8207238912582397, "learning_rate": 1.7231558691468893e-06, "loss": 0.8763, "step": 55190 }, { "epoch": 0.6726749783676405, "grad_norm": 1.9569873809814453, "learning_rate": 1.7228351507376525e-06, "loss": 0.8864, "step": 55195 }, { "epoch": 0.6727359145917883, "grad_norm": 2.019423007965088, "learning_rate": 1.7225144323284157e-06, "loss": 0.844, "step": 55200 }, { "epoch": 0.672796850815936, "grad_norm": 1.9558342695236206, "learning_rate": 1.7221937139191792e-06, "loss": 0.8143, "step": 55205 }, { "epoch": 0.6728577870400838, "grad_norm": 1.930041790008545, "learning_rate": 1.7218729955099426e-06, "loss": 0.8653, "step": 55210 }, { "epoch": 0.6729187232642316, "grad_norm": 1.7534340620040894, "learning_rate": 1.7215522771007056e-06, "loss": 0.798, "step": 55215 }, { "epoch": 0.6729796594883795, "grad_norm": 1.686234474182129, "learning_rate": 1.721231558691469e-06, "loss": 0.8063, "step": 55220 }, { "epoch": 0.6730405957125273, "grad_norm": 2.0183463096618652, "learning_rate": 1.7209108402822323e-06, "loss": 0.8155, "step": 55225 }, { "epoch": 0.6731015319366751, "grad_norm": 1.8312076330184937, "learning_rate": 1.7205901218729957e-06, "loss": 0.8347, "step": 55230 }, { "epoch": 0.6731624681608229, "grad_norm": 1.8763186931610107, "learning_rate": 1.720269403463759e-06, "loss": 0.8594, "step": 55235 }, { "epoch": 0.6732234043849706, "grad_norm": 2.1062800884246826, "learning_rate": 1.7199486850545222e-06, "loss": 0.8263, "step": 55240 }, { "epoch": 0.6732843406091185, "grad_norm": 1.82598876953125, "learning_rate": 1.7196279666452856e-06, "loss": 0.8437, "step": 55245 }, { "epoch": 0.6733452768332663, "grad_norm": 2.1774654388427734, "learning_rate": 1.719307248236049e-06, "loss": 0.8549, "step": 55250 }, { "epoch": 0.6734062130574141, "grad_norm": 1.9715365171432495, "learning_rate": 1.718986529826812e-06, "loss": 0.869, "step": 55255 }, { "epoch": 0.6734671492815619, "grad_norm": 2.502852439880371, "learning_rate": 1.7186658114175755e-06, "loss": 0.7571, "step": 55260 }, { "epoch": 0.6735280855057098, "grad_norm": 1.9653141498565674, "learning_rate": 1.7183450930083387e-06, "loss": 0.8252, "step": 55265 }, { "epoch": 0.6735890217298576, "grad_norm": 2.1238365173339844, "learning_rate": 1.7180243745991022e-06, "loss": 0.8175, "step": 55270 }, { "epoch": 0.6736499579540053, "grad_norm": 1.93788743019104, "learning_rate": 1.7177036561898654e-06, "loss": 0.8539, "step": 55275 }, { "epoch": 0.6737108941781531, "grad_norm": 2.279332399368286, "learning_rate": 1.7173829377806286e-06, "loss": 0.8235, "step": 55280 }, { "epoch": 0.6737718304023009, "grad_norm": 2.034968137741089, "learning_rate": 1.717062219371392e-06, "loss": 0.7849, "step": 55285 }, { "epoch": 0.6738327666264488, "grad_norm": 2.181103467941284, "learning_rate": 1.7167415009621555e-06, "loss": 0.8117, "step": 55290 }, { "epoch": 0.6738937028505966, "grad_norm": 1.7061117887496948, "learning_rate": 1.7164207825529185e-06, "loss": 0.9049, "step": 55295 }, { "epoch": 0.6739546390747444, "grad_norm": 1.7988324165344238, "learning_rate": 1.716100064143682e-06, "loss": 0.8506, "step": 55300 }, { "epoch": 0.6740155752988922, "grad_norm": 1.899374008178711, "learning_rate": 1.7157793457344454e-06, "loss": 0.7949, "step": 55305 }, { "epoch": 0.6740765115230399, "grad_norm": 2.0057332515716553, "learning_rate": 1.7154586273252086e-06, "loss": 0.8916, "step": 55310 }, { "epoch": 0.6741374477471878, "grad_norm": 1.8897384405136108, "learning_rate": 1.715137908915972e-06, "loss": 0.7976, "step": 55315 }, { "epoch": 0.6741983839713356, "grad_norm": 1.7094794511795044, "learning_rate": 1.714817190506735e-06, "loss": 0.7894, "step": 55320 }, { "epoch": 0.6742593201954834, "grad_norm": 2.106187582015991, "learning_rate": 1.7144964720974985e-06, "loss": 0.8176, "step": 55325 }, { "epoch": 0.6743202564196312, "grad_norm": 1.7358770370483398, "learning_rate": 1.714175753688262e-06, "loss": 0.8685, "step": 55330 }, { "epoch": 0.674381192643779, "grad_norm": 2.0157716274261475, "learning_rate": 1.713855035279025e-06, "loss": 0.7024, "step": 55335 }, { "epoch": 0.6744421288679269, "grad_norm": 2.1680634021759033, "learning_rate": 1.7135343168697884e-06, "loss": 0.7988, "step": 55340 }, { "epoch": 0.6745030650920746, "grad_norm": 2.0880746841430664, "learning_rate": 1.7132135984605519e-06, "loss": 0.7939, "step": 55345 }, { "epoch": 0.6745640013162224, "grad_norm": 1.9995530843734741, "learning_rate": 1.7128928800513151e-06, "loss": 0.784, "step": 55350 }, { "epoch": 0.6746249375403702, "grad_norm": 2.071903944015503, "learning_rate": 1.7125721616420785e-06, "loss": 0.8118, "step": 55355 }, { "epoch": 0.674685873764518, "grad_norm": 1.832431435585022, "learning_rate": 1.7122514432328416e-06, "loss": 0.8286, "step": 55360 }, { "epoch": 0.6747468099886659, "grad_norm": 2.0025362968444824, "learning_rate": 1.711930724823605e-06, "loss": 0.7503, "step": 55365 }, { "epoch": 0.6748077462128137, "grad_norm": 1.6890921592712402, "learning_rate": 1.7116100064143684e-06, "loss": 0.7287, "step": 55370 }, { "epoch": 0.6748686824369615, "grad_norm": 1.803908109664917, "learning_rate": 1.7112892880051315e-06, "loss": 0.8544, "step": 55375 }, { "epoch": 0.6749296186611092, "grad_norm": 1.8892511129379272, "learning_rate": 1.710968569595895e-06, "loss": 0.8466, "step": 55380 }, { "epoch": 0.674990554885257, "grad_norm": 1.9181582927703857, "learning_rate": 1.7106478511866583e-06, "loss": 0.9449, "step": 55385 }, { "epoch": 0.6750514911094049, "grad_norm": 2.481539011001587, "learning_rate": 1.7103271327774216e-06, "loss": 0.8645, "step": 55390 }, { "epoch": 0.6751124273335527, "grad_norm": 2.061188220977783, "learning_rate": 1.710006414368185e-06, "loss": 0.7823, "step": 55395 }, { "epoch": 0.6751733635577005, "grad_norm": 1.9020391702651978, "learning_rate": 1.709685695958948e-06, "loss": 0.7782, "step": 55400 }, { "epoch": 0.6752342997818483, "grad_norm": 1.72856605052948, "learning_rate": 1.7093649775497115e-06, "loss": 0.8059, "step": 55405 }, { "epoch": 0.6752952360059962, "grad_norm": 2.1307215690612793, "learning_rate": 1.709044259140475e-06, "loss": 0.8653, "step": 55410 }, { "epoch": 0.6753561722301439, "grad_norm": 1.7161059379577637, "learning_rate": 1.708723540731238e-06, "loss": 0.8329, "step": 55415 }, { "epoch": 0.6754171084542917, "grad_norm": 1.760724663734436, "learning_rate": 1.7084028223220014e-06, "loss": 0.8256, "step": 55420 }, { "epoch": 0.6754780446784395, "grad_norm": 2.1639044284820557, "learning_rate": 1.7080821039127648e-06, "loss": 0.7974, "step": 55425 }, { "epoch": 0.6755389809025873, "grad_norm": 2.2028591632843018, "learning_rate": 1.707761385503528e-06, "loss": 0.8088, "step": 55430 }, { "epoch": 0.6755999171267352, "grad_norm": 2.2285194396972656, "learning_rate": 1.7074406670942915e-06, "loss": 0.8468, "step": 55435 }, { "epoch": 0.675660853350883, "grad_norm": 1.9023805856704712, "learning_rate": 1.7071199486850545e-06, "loss": 0.8124, "step": 55440 }, { "epoch": 0.6757217895750308, "grad_norm": 1.878275752067566, "learning_rate": 1.706799230275818e-06, "loss": 0.7965, "step": 55445 }, { "epoch": 0.6757827257991785, "grad_norm": 1.871612310409546, "learning_rate": 1.7064785118665814e-06, "loss": 0.8022, "step": 55450 }, { "epoch": 0.6758436620233264, "grad_norm": 1.894511103630066, "learning_rate": 1.7061577934573446e-06, "loss": 0.9062, "step": 55455 }, { "epoch": 0.6759045982474742, "grad_norm": 1.6836854219436646, "learning_rate": 1.7058370750481078e-06, "loss": 0.8596, "step": 55460 }, { "epoch": 0.675965534471622, "grad_norm": 2.246107339859009, "learning_rate": 1.7055163566388713e-06, "loss": 0.8223, "step": 55465 }, { "epoch": 0.6760264706957698, "grad_norm": 1.739688515663147, "learning_rate": 1.7051956382296345e-06, "loss": 0.8753, "step": 55470 }, { "epoch": 0.6760874069199176, "grad_norm": 1.757569670677185, "learning_rate": 1.704874919820398e-06, "loss": 0.7897, "step": 55475 }, { "epoch": 0.6761483431440655, "grad_norm": 2.6188464164733887, "learning_rate": 1.704554201411161e-06, "loss": 0.828, "step": 55480 }, { "epoch": 0.6762092793682132, "grad_norm": 1.9121854305267334, "learning_rate": 1.7042334830019244e-06, "loss": 0.8553, "step": 55485 }, { "epoch": 0.676270215592361, "grad_norm": 2.3610665798187256, "learning_rate": 1.7039127645926878e-06, "loss": 0.8417, "step": 55490 }, { "epoch": 0.6763311518165088, "grad_norm": 2.2749125957489014, "learning_rate": 1.703592046183451e-06, "loss": 0.8088, "step": 55495 }, { "epoch": 0.6763920880406566, "grad_norm": 2.425095319747925, "learning_rate": 1.7032713277742143e-06, "loss": 0.8319, "step": 55500 }, { "epoch": 0.6764530242648045, "grad_norm": 2.313937187194824, "learning_rate": 1.7029506093649777e-06, "loss": 0.839, "step": 55505 }, { "epoch": 0.6765139604889523, "grad_norm": 2.4600188732147217, "learning_rate": 1.702629890955741e-06, "loss": 0.8572, "step": 55510 }, { "epoch": 0.6765748967131001, "grad_norm": 2.0026941299438477, "learning_rate": 1.7023091725465044e-06, "loss": 0.7797, "step": 55515 }, { "epoch": 0.6766358329372478, "grad_norm": 1.9105435609817505, "learning_rate": 1.7019884541372674e-06, "loss": 0.8571, "step": 55520 }, { "epoch": 0.6766967691613957, "grad_norm": 1.6558858156204224, "learning_rate": 1.7016677357280309e-06, "loss": 0.8044, "step": 55525 }, { "epoch": 0.6767577053855435, "grad_norm": 1.9180355072021484, "learning_rate": 1.7013470173187943e-06, "loss": 0.8439, "step": 55530 }, { "epoch": 0.6768186416096913, "grad_norm": 1.8581572771072388, "learning_rate": 1.7010262989095575e-06, "loss": 0.9051, "step": 55535 }, { "epoch": 0.6768795778338391, "grad_norm": 2.1470699310302734, "learning_rate": 1.700705580500321e-06, "loss": 1.0269, "step": 55540 }, { "epoch": 0.6769405140579869, "grad_norm": 1.947749137878418, "learning_rate": 1.7003848620910842e-06, "loss": 0.8336, "step": 55545 }, { "epoch": 0.6770014502821348, "grad_norm": 2.2809793949127197, "learning_rate": 1.7000641436818474e-06, "loss": 0.8243, "step": 55550 }, { "epoch": 0.6770623865062825, "grad_norm": 2.2228915691375732, "learning_rate": 1.6997434252726109e-06, "loss": 0.7971, "step": 55555 }, { "epoch": 0.6771233227304303, "grad_norm": 1.6581058502197266, "learning_rate": 1.6994227068633739e-06, "loss": 0.807, "step": 55560 }, { "epoch": 0.6771842589545781, "grad_norm": 1.8417394161224365, "learning_rate": 1.6991019884541373e-06, "loss": 0.7902, "step": 55565 }, { "epoch": 0.677245195178726, "grad_norm": 1.7325773239135742, "learning_rate": 1.6987812700449008e-06, "loss": 0.7351, "step": 55570 }, { "epoch": 0.6773061314028738, "grad_norm": 1.7775298357009888, "learning_rate": 1.698460551635664e-06, "loss": 0.8484, "step": 55575 }, { "epoch": 0.6773670676270216, "grad_norm": 1.7170723676681519, "learning_rate": 1.6981398332264274e-06, "loss": 0.8326, "step": 55580 }, { "epoch": 0.6774280038511694, "grad_norm": 2.008092164993286, "learning_rate": 1.6978191148171907e-06, "loss": 0.8088, "step": 55585 }, { "epoch": 0.6774889400753171, "grad_norm": 1.6737182140350342, "learning_rate": 1.6974983964079539e-06, "loss": 0.8006, "step": 55590 }, { "epoch": 0.677549876299465, "grad_norm": 2.0079216957092285, "learning_rate": 1.6971776779987173e-06, "loss": 0.8496, "step": 55595 }, { "epoch": 0.6776108125236128, "grad_norm": 1.8225703239440918, "learning_rate": 1.6968569595894808e-06, "loss": 0.7367, "step": 55600 }, { "epoch": 0.6776717487477606, "grad_norm": 1.8669873476028442, "learning_rate": 1.6965362411802438e-06, "loss": 0.7888, "step": 55605 }, { "epoch": 0.6777326849719084, "grad_norm": 1.675047516822815, "learning_rate": 1.6962155227710072e-06, "loss": 0.7922, "step": 55610 }, { "epoch": 0.6777936211960562, "grad_norm": 2.219339609146118, "learning_rate": 1.6958948043617705e-06, "loss": 0.8275, "step": 55615 }, { "epoch": 0.6778545574202041, "grad_norm": 2.105156421661377, "learning_rate": 1.695574085952534e-06, "loss": 0.8532, "step": 55620 }, { "epoch": 0.6779154936443518, "grad_norm": 2.155909299850464, "learning_rate": 1.6952533675432971e-06, "loss": 0.7891, "step": 55625 }, { "epoch": 0.6779764298684996, "grad_norm": 2.1186227798461914, "learning_rate": 1.6949326491340604e-06, "loss": 0.7812, "step": 55630 }, { "epoch": 0.6780373660926474, "grad_norm": 1.6839319467544556, "learning_rate": 1.6946119307248238e-06, "loss": 0.8226, "step": 55635 }, { "epoch": 0.6780983023167952, "grad_norm": 2.366776943206787, "learning_rate": 1.6942912123155872e-06, "loss": 0.8441, "step": 55640 }, { "epoch": 0.6781592385409431, "grad_norm": 2.0555858612060547, "learning_rate": 1.6939704939063503e-06, "loss": 0.8413, "step": 55645 }, { "epoch": 0.6782201747650909, "grad_norm": 1.8258154392242432, "learning_rate": 1.6936497754971137e-06, "loss": 0.8345, "step": 55650 }, { "epoch": 0.6782811109892387, "grad_norm": 3.176929473876953, "learning_rate": 1.693329057087877e-06, "loss": 0.8373, "step": 55655 }, { "epoch": 0.6783420472133864, "grad_norm": 1.9080365896224976, "learning_rate": 1.6930083386786404e-06, "loss": 0.7827, "step": 55660 }, { "epoch": 0.6784029834375342, "grad_norm": 2.147789239883423, "learning_rate": 1.6926876202694038e-06, "loss": 0.8037, "step": 55665 }, { "epoch": 0.6784639196616821, "grad_norm": 2.2670087814331055, "learning_rate": 1.6923669018601668e-06, "loss": 0.8053, "step": 55670 }, { "epoch": 0.6785248558858299, "grad_norm": 1.9299139976501465, "learning_rate": 1.6920461834509303e-06, "loss": 0.8331, "step": 55675 }, { "epoch": 0.6785857921099777, "grad_norm": 2.8608205318450928, "learning_rate": 1.6917254650416937e-06, "loss": 0.8173, "step": 55680 }, { "epoch": 0.6786467283341255, "grad_norm": 2.3249762058258057, "learning_rate": 1.6914047466324567e-06, "loss": 0.8936, "step": 55685 }, { "epoch": 0.6787076645582734, "grad_norm": 1.9998611211776733, "learning_rate": 1.6910840282232202e-06, "loss": 0.9704, "step": 55690 }, { "epoch": 0.6787686007824211, "grad_norm": 1.9638667106628418, "learning_rate": 1.6907633098139834e-06, "loss": 0.7113, "step": 55695 }, { "epoch": 0.6788295370065689, "grad_norm": 2.0875589847564697, "learning_rate": 1.6904425914047468e-06, "loss": 0.86, "step": 55700 }, { "epoch": 0.6788904732307167, "grad_norm": 1.7328091859817505, "learning_rate": 1.6901218729955103e-06, "loss": 0.8256, "step": 55705 }, { "epoch": 0.6789514094548645, "grad_norm": 2.204167604446411, "learning_rate": 1.6898011545862733e-06, "loss": 0.8221, "step": 55710 }, { "epoch": 0.6790123456790124, "grad_norm": 2.5498580932617188, "learning_rate": 1.6894804361770367e-06, "loss": 0.8281, "step": 55715 }, { "epoch": 0.6790732819031602, "grad_norm": 1.7155662775039673, "learning_rate": 1.6891597177678002e-06, "loss": 0.9133, "step": 55720 }, { "epoch": 0.679134218127308, "grad_norm": 1.9834654331207275, "learning_rate": 1.6888389993585632e-06, "loss": 0.8264, "step": 55725 }, { "epoch": 0.6791951543514557, "grad_norm": 1.9893419742584229, "learning_rate": 1.6885182809493266e-06, "loss": 0.779, "step": 55730 }, { "epoch": 0.6792560905756035, "grad_norm": 2.7231619358062744, "learning_rate": 1.6881975625400898e-06, "loss": 0.8484, "step": 55735 }, { "epoch": 0.6793170267997514, "grad_norm": 2.6091957092285156, "learning_rate": 1.6878768441308533e-06, "loss": 0.7864, "step": 55740 }, { "epoch": 0.6793779630238992, "grad_norm": 1.9948413372039795, "learning_rate": 1.6875561257216167e-06, "loss": 0.8499, "step": 55745 }, { "epoch": 0.679438899248047, "grad_norm": 1.9508005380630493, "learning_rate": 1.6872354073123797e-06, "loss": 0.779, "step": 55750 }, { "epoch": 0.6794998354721948, "grad_norm": 2.0393412113189697, "learning_rate": 1.6869146889031432e-06, "loss": 0.8491, "step": 55755 }, { "epoch": 0.6795607716963427, "grad_norm": 2.1137826442718506, "learning_rate": 1.6865939704939066e-06, "loss": 0.9107, "step": 55760 }, { "epoch": 0.6796217079204904, "grad_norm": 1.9546494483947754, "learning_rate": 1.6862732520846696e-06, "loss": 0.7684, "step": 55765 }, { "epoch": 0.6796826441446382, "grad_norm": 1.8330343961715698, "learning_rate": 1.685952533675433e-06, "loss": 0.7753, "step": 55770 }, { "epoch": 0.679743580368786, "grad_norm": 1.935410737991333, "learning_rate": 1.6856318152661963e-06, "loss": 0.8059, "step": 55775 }, { "epoch": 0.6798045165929338, "grad_norm": 1.80449378490448, "learning_rate": 1.6853110968569597e-06, "loss": 0.763, "step": 55780 }, { "epoch": 0.6798654528170817, "grad_norm": 2.337460517883301, "learning_rate": 1.6849903784477232e-06, "loss": 0.8554, "step": 55785 }, { "epoch": 0.6799263890412295, "grad_norm": 2.0809173583984375, "learning_rate": 1.6846696600384862e-06, "loss": 0.8257, "step": 55790 }, { "epoch": 0.6799873252653773, "grad_norm": 1.7942479848861694, "learning_rate": 1.6843489416292496e-06, "loss": 0.8145, "step": 55795 }, { "epoch": 0.680048261489525, "grad_norm": 1.9685126543045044, "learning_rate": 1.684028223220013e-06, "loss": 0.7849, "step": 55800 }, { "epoch": 0.6801091977136728, "grad_norm": 1.9214953184127808, "learning_rate": 1.6837075048107763e-06, "loss": 0.8035, "step": 55805 }, { "epoch": 0.6801701339378207, "grad_norm": 1.9208464622497559, "learning_rate": 1.6833867864015395e-06, "loss": 0.8709, "step": 55810 }, { "epoch": 0.6802310701619685, "grad_norm": 2.004448652267456, "learning_rate": 1.6830660679923028e-06, "loss": 0.7969, "step": 55815 }, { "epoch": 0.6802920063861163, "grad_norm": 1.7808756828308105, "learning_rate": 1.6827453495830662e-06, "loss": 0.7951, "step": 55820 }, { "epoch": 0.6803529426102641, "grad_norm": 1.8809268474578857, "learning_rate": 1.6824246311738297e-06, "loss": 0.8425, "step": 55825 }, { "epoch": 0.680413878834412, "grad_norm": 2.3947484493255615, "learning_rate": 1.6821039127645927e-06, "loss": 0.8879, "step": 55830 }, { "epoch": 0.6804748150585597, "grad_norm": 2.140228509902954, "learning_rate": 1.6817831943553561e-06, "loss": 0.8169, "step": 55835 }, { "epoch": 0.6805357512827075, "grad_norm": 1.8224855661392212, "learning_rate": 1.6814624759461196e-06, "loss": 0.7973, "step": 55840 }, { "epoch": 0.6805966875068553, "grad_norm": 1.8567891120910645, "learning_rate": 1.6811417575368828e-06, "loss": 0.8435, "step": 55845 }, { "epoch": 0.6806576237310031, "grad_norm": 1.6061795949935913, "learning_rate": 1.680821039127646e-06, "loss": 0.8229, "step": 55850 }, { "epoch": 0.680718559955151, "grad_norm": 2.134819507598877, "learning_rate": 1.6805003207184092e-06, "loss": 0.8435, "step": 55855 }, { "epoch": 0.6807794961792988, "grad_norm": 1.8431252241134644, "learning_rate": 1.6801796023091727e-06, "loss": 0.8638, "step": 55860 }, { "epoch": 0.6808404324034466, "grad_norm": 1.972558856010437, "learning_rate": 1.6798588838999361e-06, "loss": 0.8002, "step": 55865 }, { "epoch": 0.6809013686275943, "grad_norm": 2.037966728210449, "learning_rate": 1.6795381654906991e-06, "loss": 0.8803, "step": 55870 }, { "epoch": 0.6809623048517421, "grad_norm": 2.0926477909088135, "learning_rate": 1.6792174470814626e-06, "loss": 0.8817, "step": 55875 }, { "epoch": 0.68102324107589, "grad_norm": 1.6073352098464966, "learning_rate": 1.678896728672226e-06, "loss": 0.8555, "step": 55880 }, { "epoch": 0.6810841773000378, "grad_norm": 2.1737353801727295, "learning_rate": 1.6785760102629892e-06, "loss": 0.8266, "step": 55885 }, { "epoch": 0.6811451135241856, "grad_norm": 2.0266571044921875, "learning_rate": 1.6782552918537525e-06, "loss": 0.7717, "step": 55890 }, { "epoch": 0.6812060497483334, "grad_norm": 2.2530341148376465, "learning_rate": 1.677934573444516e-06, "loss": 0.8264, "step": 55895 }, { "epoch": 0.6812669859724813, "grad_norm": 2.019505739212036, "learning_rate": 1.6776138550352791e-06, "loss": 0.7655, "step": 55900 }, { "epoch": 0.681327922196629, "grad_norm": 1.89944589138031, "learning_rate": 1.6772931366260426e-06, "loss": 0.8147, "step": 55905 }, { "epoch": 0.6813888584207768, "grad_norm": 2.067772626876831, "learning_rate": 1.6769724182168056e-06, "loss": 0.7525, "step": 55910 }, { "epoch": 0.6814497946449246, "grad_norm": 1.9661160707473755, "learning_rate": 1.676651699807569e-06, "loss": 0.8999, "step": 55915 }, { "epoch": 0.6815107308690724, "grad_norm": 1.5827875137329102, "learning_rate": 1.6763309813983325e-06, "loss": 0.7762, "step": 55920 }, { "epoch": 0.6815716670932203, "grad_norm": 2.2044785022735596, "learning_rate": 1.6760102629890957e-06, "loss": 0.8075, "step": 55925 }, { "epoch": 0.6816326033173681, "grad_norm": 1.8789461851119995, "learning_rate": 1.6756895445798591e-06, "loss": 0.8334, "step": 55930 }, { "epoch": 0.6816935395415159, "grad_norm": 2.054924726486206, "learning_rate": 1.6753688261706224e-06, "loss": 0.77, "step": 55935 }, { "epoch": 0.6817544757656636, "grad_norm": 1.8563356399536133, "learning_rate": 1.6750481077613856e-06, "loss": 0.8359, "step": 55940 }, { "epoch": 0.6818154119898114, "grad_norm": 1.7517136335372925, "learning_rate": 1.674727389352149e-06, "loss": 0.7734, "step": 55945 }, { "epoch": 0.6818763482139593, "grad_norm": 1.8676308393478394, "learning_rate": 1.674406670942912e-06, "loss": 0.8904, "step": 55950 }, { "epoch": 0.6819372844381071, "grad_norm": 2.074141502380371, "learning_rate": 1.6740859525336755e-06, "loss": 0.8576, "step": 55955 }, { "epoch": 0.6819982206622549, "grad_norm": 1.9863841533660889, "learning_rate": 1.673765234124439e-06, "loss": 0.8561, "step": 55960 }, { "epoch": 0.6820591568864027, "grad_norm": 2.1843085289001465, "learning_rate": 1.6734445157152022e-06, "loss": 0.7875, "step": 55965 }, { "epoch": 0.6821200931105506, "grad_norm": 2.1007931232452393, "learning_rate": 1.6731237973059656e-06, "loss": 0.8096, "step": 55970 }, { "epoch": 0.6821810293346983, "grad_norm": 2.153421640396118, "learning_rate": 1.6728030788967288e-06, "loss": 0.8549, "step": 55975 }, { "epoch": 0.6822419655588461, "grad_norm": 2.2553093433380127, "learning_rate": 1.672482360487492e-06, "loss": 0.7607, "step": 55980 }, { "epoch": 0.6823029017829939, "grad_norm": 2.0047218799591064, "learning_rate": 1.6721616420782555e-06, "loss": 0.8981, "step": 55985 }, { "epoch": 0.6823638380071417, "grad_norm": 2.232787609100342, "learning_rate": 1.6718409236690185e-06, "loss": 0.8385, "step": 55990 }, { "epoch": 0.6824247742312896, "grad_norm": 1.946855902671814, "learning_rate": 1.671520205259782e-06, "loss": 0.7873, "step": 55995 }, { "epoch": 0.6824857104554374, "grad_norm": 2.1414990425109863, "learning_rate": 1.6711994868505454e-06, "loss": 0.8044, "step": 56000 }, { "epoch": 0.6825466466795852, "grad_norm": 2.2312355041503906, "learning_rate": 1.6708787684413086e-06, "loss": 0.7765, "step": 56005 }, { "epoch": 0.6826075829037329, "grad_norm": 1.8770127296447754, "learning_rate": 1.670558050032072e-06, "loss": 0.8162, "step": 56010 }, { "epoch": 0.6826685191278807, "grad_norm": 2.3266029357910156, "learning_rate": 1.6702373316228355e-06, "loss": 0.8466, "step": 56015 }, { "epoch": 0.6827294553520286, "grad_norm": 2.0769357681274414, "learning_rate": 1.6699166132135985e-06, "loss": 0.7673, "step": 56020 }, { "epoch": 0.6827903915761764, "grad_norm": 2.0626635551452637, "learning_rate": 1.669595894804362e-06, "loss": 0.7447, "step": 56025 }, { "epoch": 0.6828513278003242, "grad_norm": 2.1189229488372803, "learning_rate": 1.6692751763951252e-06, "loss": 0.8057, "step": 56030 }, { "epoch": 0.682912264024472, "grad_norm": 2.044370174407959, "learning_rate": 1.6689544579858884e-06, "loss": 0.8354, "step": 56035 }, { "epoch": 0.6829732002486198, "grad_norm": 2.43638277053833, "learning_rate": 1.6686337395766519e-06, "loss": 0.9056, "step": 56040 }, { "epoch": 0.6830341364727676, "grad_norm": 1.7671719789505005, "learning_rate": 1.668313021167415e-06, "loss": 0.8209, "step": 56045 }, { "epoch": 0.6830950726969154, "grad_norm": 1.7960907220840454, "learning_rate": 1.6679923027581785e-06, "loss": 0.791, "step": 56050 }, { "epoch": 0.6831560089210632, "grad_norm": 1.9467647075653076, "learning_rate": 1.667671584348942e-06, "loss": 0.8508, "step": 56055 }, { "epoch": 0.683216945145211, "grad_norm": 1.7749674320220947, "learning_rate": 1.667350865939705e-06, "loss": 0.7529, "step": 56060 }, { "epoch": 0.6832778813693589, "grad_norm": 2.3831450939178467, "learning_rate": 1.6670301475304684e-06, "loss": 0.7601, "step": 56065 }, { "epoch": 0.6833388175935067, "grad_norm": 2.422760248184204, "learning_rate": 1.6667094291212317e-06, "loss": 0.8475, "step": 56070 }, { "epoch": 0.6833997538176545, "grad_norm": 2.2341384887695312, "learning_rate": 1.6663887107119949e-06, "loss": 0.8295, "step": 56075 }, { "epoch": 0.6834606900418022, "grad_norm": 1.9920926094055176, "learning_rate": 1.6660679923027583e-06, "loss": 0.7559, "step": 56080 }, { "epoch": 0.68352162626595, "grad_norm": 1.84884512424469, "learning_rate": 1.6657472738935216e-06, "loss": 0.8525, "step": 56085 }, { "epoch": 0.6835825624900979, "grad_norm": 1.9495563507080078, "learning_rate": 1.665426555484285e-06, "loss": 0.8006, "step": 56090 }, { "epoch": 0.6836434987142457, "grad_norm": 1.9572291374206543, "learning_rate": 1.6651058370750484e-06, "loss": 0.8341, "step": 56095 }, { "epoch": 0.6837044349383935, "grad_norm": 2.0742125511169434, "learning_rate": 1.6647851186658115e-06, "loss": 0.8029, "step": 56100 }, { "epoch": 0.6837653711625413, "grad_norm": 1.818820834159851, "learning_rate": 1.664464400256575e-06, "loss": 0.8465, "step": 56105 }, { "epoch": 0.683826307386689, "grad_norm": 1.7911112308502197, "learning_rate": 1.6641436818473381e-06, "loss": 0.8174, "step": 56110 }, { "epoch": 0.6838872436108369, "grad_norm": 1.67262864112854, "learning_rate": 1.6638229634381014e-06, "loss": 0.8287, "step": 56115 }, { "epoch": 0.6839481798349847, "grad_norm": 2.0774497985839844, "learning_rate": 1.6635022450288648e-06, "loss": 0.818, "step": 56120 }, { "epoch": 0.6840091160591325, "grad_norm": 2.2244741916656494, "learning_rate": 1.663181526619628e-06, "loss": 0.8518, "step": 56125 }, { "epoch": 0.6840700522832803, "grad_norm": 2.1369218826293945, "learning_rate": 1.6628608082103915e-06, "loss": 0.7931, "step": 56130 }, { "epoch": 0.6841309885074282, "grad_norm": 1.7091784477233887, "learning_rate": 1.662540089801155e-06, "loss": 0.8154, "step": 56135 }, { "epoch": 0.684191924731576, "grad_norm": 1.945225477218628, "learning_rate": 1.662219371391918e-06, "loss": 0.8243, "step": 56140 }, { "epoch": 0.6842528609557237, "grad_norm": 2.041778087615967, "learning_rate": 1.6618986529826814e-06, "loss": 0.8354, "step": 56145 }, { "epoch": 0.6843137971798715, "grad_norm": 2.1287646293640137, "learning_rate": 1.6615779345734446e-06, "loss": 0.8558, "step": 56150 }, { "epoch": 0.6843747334040193, "grad_norm": 1.776930809020996, "learning_rate": 1.661257216164208e-06, "loss": 0.7789, "step": 56155 }, { "epoch": 0.6844356696281672, "grad_norm": 2.291154146194458, "learning_rate": 1.6609364977549713e-06, "loss": 0.8096, "step": 56160 }, { "epoch": 0.684496605852315, "grad_norm": 1.846872091293335, "learning_rate": 1.6606157793457345e-06, "loss": 0.7697, "step": 56165 }, { "epoch": 0.6845575420764628, "grad_norm": 1.8701434135437012, "learning_rate": 1.660295060936498e-06, "loss": 0.7672, "step": 56170 }, { "epoch": 0.6846184783006106, "grad_norm": 1.5932207107543945, "learning_rate": 1.6599743425272614e-06, "loss": 0.8415, "step": 56175 }, { "epoch": 0.6846794145247583, "grad_norm": 1.7255859375, "learning_rate": 1.6596536241180244e-06, "loss": 0.8751, "step": 56180 }, { "epoch": 0.6847403507489062, "grad_norm": 1.9608185291290283, "learning_rate": 1.6593329057087878e-06, "loss": 0.7819, "step": 56185 }, { "epoch": 0.684801286973054, "grad_norm": 1.8601890802383423, "learning_rate": 1.659012187299551e-06, "loss": 0.7916, "step": 56190 }, { "epoch": 0.6848622231972018, "grad_norm": 2.1938440799713135, "learning_rate": 1.6586914688903145e-06, "loss": 0.862, "step": 56195 }, { "epoch": 0.6849231594213496, "grad_norm": 1.6936038732528687, "learning_rate": 1.6583707504810777e-06, "loss": 0.8323, "step": 56200 }, { "epoch": 0.6849840956454974, "grad_norm": 2.2948555946350098, "learning_rate": 1.658050032071841e-06, "loss": 0.7898, "step": 56205 }, { "epoch": 0.6850450318696453, "grad_norm": 2.015836238861084, "learning_rate": 1.6577293136626044e-06, "loss": 0.818, "step": 56210 }, { "epoch": 0.685105968093793, "grad_norm": 2.0190296173095703, "learning_rate": 1.6574085952533678e-06, "loss": 0.7846, "step": 56215 }, { "epoch": 0.6851669043179408, "grad_norm": 2.1385722160339355, "learning_rate": 1.6570878768441308e-06, "loss": 0.7802, "step": 56220 }, { "epoch": 0.6852278405420886, "grad_norm": 2.0971994400024414, "learning_rate": 1.6567671584348943e-06, "loss": 0.7485, "step": 56225 }, { "epoch": 0.6852887767662365, "grad_norm": 1.9690684080123901, "learning_rate": 1.6564464400256577e-06, "loss": 0.9089, "step": 56230 }, { "epoch": 0.6853497129903843, "grad_norm": 2.0743422508239746, "learning_rate": 1.656125721616421e-06, "loss": 0.901, "step": 56235 }, { "epoch": 0.6854106492145321, "grad_norm": 2.0132434368133545, "learning_rate": 1.6558050032071842e-06, "loss": 0.7991, "step": 56240 }, { "epoch": 0.6854715854386799, "grad_norm": 1.5458219051361084, "learning_rate": 1.6554842847979474e-06, "loss": 0.8251, "step": 56245 }, { "epoch": 0.6855325216628276, "grad_norm": 1.8140623569488525, "learning_rate": 1.6551635663887109e-06, "loss": 0.7652, "step": 56250 }, { "epoch": 0.6855934578869755, "grad_norm": 2.2112209796905518, "learning_rate": 1.6548428479794743e-06, "loss": 0.8285, "step": 56255 }, { "epoch": 0.6856543941111233, "grad_norm": 2.139029026031494, "learning_rate": 1.6545221295702373e-06, "loss": 0.8003, "step": 56260 }, { "epoch": 0.6857153303352711, "grad_norm": 1.9969102144241333, "learning_rate": 1.6542014111610008e-06, "loss": 0.8137, "step": 56265 }, { "epoch": 0.6857762665594189, "grad_norm": 1.9982277154922485, "learning_rate": 1.6538806927517642e-06, "loss": 0.8321, "step": 56270 }, { "epoch": 0.6858372027835667, "grad_norm": 1.8696082830429077, "learning_rate": 1.6535599743425274e-06, "loss": 0.79, "step": 56275 }, { "epoch": 0.6858981390077146, "grad_norm": 1.7719366550445557, "learning_rate": 1.6532392559332909e-06, "loss": 0.8411, "step": 56280 }, { "epoch": 0.6859590752318623, "grad_norm": 2.191647529602051, "learning_rate": 1.6529185375240539e-06, "loss": 0.8192, "step": 56285 }, { "epoch": 0.6860200114560101, "grad_norm": 1.8499526977539062, "learning_rate": 1.6525978191148173e-06, "loss": 0.7877, "step": 56290 }, { "epoch": 0.6860809476801579, "grad_norm": 2.5681824684143066, "learning_rate": 1.6522771007055808e-06, "loss": 0.7738, "step": 56295 }, { "epoch": 0.6861418839043057, "grad_norm": 2.1131069660186768, "learning_rate": 1.6519563822963438e-06, "loss": 0.7107, "step": 56300 }, { "epoch": 0.6862028201284536, "grad_norm": 2.22076153755188, "learning_rate": 1.6516356638871072e-06, "loss": 0.7763, "step": 56305 }, { "epoch": 0.6862637563526014, "grad_norm": 2.5210750102996826, "learning_rate": 1.6513149454778707e-06, "loss": 0.8065, "step": 56310 }, { "epoch": 0.6863246925767492, "grad_norm": 2.221973419189453, "learning_rate": 1.6509942270686339e-06, "loss": 0.8569, "step": 56315 }, { "epoch": 0.6863856288008969, "grad_norm": 1.8223134279251099, "learning_rate": 1.6506735086593973e-06, "loss": 0.8862, "step": 56320 }, { "epoch": 0.6864465650250448, "grad_norm": 1.9152354001998901, "learning_rate": 1.6503527902501603e-06, "loss": 0.8737, "step": 56325 }, { "epoch": 0.6865075012491926, "grad_norm": 1.6400452852249146, "learning_rate": 1.6500320718409238e-06, "loss": 0.8256, "step": 56330 }, { "epoch": 0.6865684374733404, "grad_norm": 2.021939992904663, "learning_rate": 1.6497113534316872e-06, "loss": 0.7698, "step": 56335 }, { "epoch": 0.6866293736974882, "grad_norm": 1.9907922744750977, "learning_rate": 1.6493906350224502e-06, "loss": 0.8459, "step": 56340 }, { "epoch": 0.686690309921636, "grad_norm": 2.049819231033325, "learning_rate": 1.6490699166132137e-06, "loss": 0.8666, "step": 56345 }, { "epoch": 0.6867512461457839, "grad_norm": 1.983168601989746, "learning_rate": 1.6487491982039771e-06, "loss": 0.8114, "step": 56350 }, { "epoch": 0.6868121823699316, "grad_norm": 1.9328304529190063, "learning_rate": 1.6484284797947403e-06, "loss": 0.8235, "step": 56355 }, { "epoch": 0.6868731185940794, "grad_norm": 1.8322445154190063, "learning_rate": 1.6481077613855038e-06, "loss": 0.7757, "step": 56360 }, { "epoch": 0.6869340548182272, "grad_norm": 2.0026044845581055, "learning_rate": 1.6477870429762668e-06, "loss": 0.8684, "step": 56365 }, { "epoch": 0.686994991042375, "grad_norm": 1.9332690238952637, "learning_rate": 1.6474663245670302e-06, "loss": 0.7779, "step": 56370 }, { "epoch": 0.6870559272665229, "grad_norm": 1.6883281469345093, "learning_rate": 1.6471456061577937e-06, "loss": 0.787, "step": 56375 }, { "epoch": 0.6871168634906707, "grad_norm": 1.8252500295639038, "learning_rate": 1.646824887748557e-06, "loss": 0.7945, "step": 56380 }, { "epoch": 0.6871777997148185, "grad_norm": 2.3271427154541016, "learning_rate": 1.6465041693393201e-06, "loss": 0.8614, "step": 56385 }, { "epoch": 0.6872387359389662, "grad_norm": 1.8988908529281616, "learning_rate": 1.6461834509300836e-06, "loss": 0.7834, "step": 56390 }, { "epoch": 0.687299672163114, "grad_norm": 2.264523506164551, "learning_rate": 1.6458627325208468e-06, "loss": 0.7905, "step": 56395 }, { "epoch": 0.6873606083872619, "grad_norm": 2.1945550441741943, "learning_rate": 1.6455420141116103e-06, "loss": 0.7977, "step": 56400 }, { "epoch": 0.6874215446114097, "grad_norm": 2.7286148071289062, "learning_rate": 1.6452212957023733e-06, "loss": 0.907, "step": 56405 }, { "epoch": 0.6874824808355575, "grad_norm": 1.7815806865692139, "learning_rate": 1.6449005772931367e-06, "loss": 0.8584, "step": 56410 }, { "epoch": 0.6875434170597053, "grad_norm": 2.0103414058685303, "learning_rate": 1.6445798588839001e-06, "loss": 0.8025, "step": 56415 }, { "epoch": 0.6876043532838532, "grad_norm": 2.0677525997161865, "learning_rate": 1.6442591404746634e-06, "loss": 0.8165, "step": 56420 }, { "epoch": 0.6876652895080009, "grad_norm": 1.8674118518829346, "learning_rate": 1.6439384220654266e-06, "loss": 0.8339, "step": 56425 }, { "epoch": 0.6877262257321487, "grad_norm": 1.7658659219741821, "learning_rate": 1.64361770365619e-06, "loss": 0.8391, "step": 56430 }, { "epoch": 0.6877871619562965, "grad_norm": 1.7812714576721191, "learning_rate": 1.6432969852469533e-06, "loss": 0.8108, "step": 56435 }, { "epoch": 0.6878480981804443, "grad_norm": 2.8732833862304688, "learning_rate": 1.6429762668377167e-06, "loss": 0.8061, "step": 56440 }, { "epoch": 0.6879090344045922, "grad_norm": 2.1510472297668457, "learning_rate": 1.6426555484284797e-06, "loss": 0.856, "step": 56445 }, { "epoch": 0.68796997062874, "grad_norm": 2.0364506244659424, "learning_rate": 1.6423348300192432e-06, "loss": 0.8726, "step": 56450 }, { "epoch": 0.6880309068528878, "grad_norm": 2.366995096206665, "learning_rate": 1.6420141116100066e-06, "loss": 0.8093, "step": 56455 }, { "epoch": 0.6880918430770355, "grad_norm": 1.9006620645523071, "learning_rate": 1.6416933932007698e-06, "loss": 0.8575, "step": 56460 }, { "epoch": 0.6881527793011833, "grad_norm": 2.4225451946258545, "learning_rate": 1.641372674791533e-06, "loss": 0.8849, "step": 56465 }, { "epoch": 0.6882137155253312, "grad_norm": 2.471017599105835, "learning_rate": 1.6410519563822965e-06, "loss": 0.8042, "step": 56470 }, { "epoch": 0.688274651749479, "grad_norm": 1.9991252422332764, "learning_rate": 1.6407312379730597e-06, "loss": 0.8397, "step": 56475 }, { "epoch": 0.6883355879736268, "grad_norm": 1.9389911890029907, "learning_rate": 1.6404105195638232e-06, "loss": 0.7999, "step": 56480 }, { "epoch": 0.6883965241977746, "grad_norm": 2.2453055381774902, "learning_rate": 1.6400898011545862e-06, "loss": 0.8276, "step": 56485 }, { "epoch": 0.6884574604219225, "grad_norm": 2.141716480255127, "learning_rate": 1.6397690827453496e-06, "loss": 0.8207, "step": 56490 }, { "epoch": 0.6885183966460702, "grad_norm": 1.8192706108093262, "learning_rate": 1.639448364336113e-06, "loss": 0.8471, "step": 56495 }, { "epoch": 0.688579332870218, "grad_norm": 1.8478646278381348, "learning_rate": 1.6391276459268763e-06, "loss": 0.8153, "step": 56500 }, { "epoch": 0.6886402690943658, "grad_norm": 1.8549892902374268, "learning_rate": 1.6388069275176397e-06, "loss": 0.7879, "step": 56505 }, { "epoch": 0.6887012053185136, "grad_norm": 1.844288945198059, "learning_rate": 1.638486209108403e-06, "loss": 0.7387, "step": 56510 }, { "epoch": 0.6887621415426615, "grad_norm": 2.145496129989624, "learning_rate": 1.6381654906991662e-06, "loss": 0.842, "step": 56515 }, { "epoch": 0.6888230777668093, "grad_norm": 1.7084938287734985, "learning_rate": 1.6378447722899296e-06, "loss": 0.7881, "step": 56520 }, { "epoch": 0.6888840139909571, "grad_norm": 1.9400444030761719, "learning_rate": 1.637524053880693e-06, "loss": 0.8119, "step": 56525 }, { "epoch": 0.6889449502151048, "grad_norm": 1.9211169481277466, "learning_rate": 1.637203335471456e-06, "loss": 0.8522, "step": 56530 }, { "epoch": 0.6890058864392526, "grad_norm": 2.1964685916900635, "learning_rate": 1.6368826170622195e-06, "loss": 0.7663, "step": 56535 }, { "epoch": 0.6890668226634005, "grad_norm": 2.063110589981079, "learning_rate": 1.6365618986529828e-06, "loss": 0.9054, "step": 56540 }, { "epoch": 0.6891277588875483, "grad_norm": 1.988966941833496, "learning_rate": 1.6362411802437462e-06, "loss": 0.8334, "step": 56545 }, { "epoch": 0.6891886951116961, "grad_norm": 2.118466854095459, "learning_rate": 1.6359204618345094e-06, "loss": 0.7944, "step": 56550 }, { "epoch": 0.6892496313358439, "grad_norm": 1.9238218069076538, "learning_rate": 1.6355997434252727e-06, "loss": 0.7656, "step": 56555 }, { "epoch": 0.6893105675599918, "grad_norm": 2.38728404045105, "learning_rate": 1.6352790250160361e-06, "loss": 0.7946, "step": 56560 }, { "epoch": 0.6893715037841395, "grad_norm": 1.7892820835113525, "learning_rate": 1.6349583066067995e-06, "loss": 0.8614, "step": 56565 }, { "epoch": 0.6894324400082873, "grad_norm": 1.9750845432281494, "learning_rate": 1.6346375881975626e-06, "loss": 0.858, "step": 56570 }, { "epoch": 0.6894933762324351, "grad_norm": 1.9549779891967773, "learning_rate": 1.634316869788326e-06, "loss": 0.8518, "step": 56575 }, { "epoch": 0.6895543124565829, "grad_norm": 1.9386950731277466, "learning_rate": 1.6339961513790892e-06, "loss": 0.8068, "step": 56580 }, { "epoch": 0.6896152486807308, "grad_norm": 2.682974100112915, "learning_rate": 1.6336754329698527e-06, "loss": 0.8159, "step": 56585 }, { "epoch": 0.6896761849048786, "grad_norm": 1.7675124406814575, "learning_rate": 1.633354714560616e-06, "loss": 0.8112, "step": 56590 }, { "epoch": 0.6897371211290264, "grad_norm": 2.0404112339019775, "learning_rate": 1.6330339961513791e-06, "loss": 0.7844, "step": 56595 }, { "epoch": 0.6897980573531741, "grad_norm": 2.186577320098877, "learning_rate": 1.6327132777421426e-06, "loss": 0.8277, "step": 56600 }, { "epoch": 0.6898589935773219, "grad_norm": 2.30037784576416, "learning_rate": 1.632392559332906e-06, "loss": 0.8074, "step": 56605 }, { "epoch": 0.6899199298014698, "grad_norm": 1.9895268678665161, "learning_rate": 1.632071840923669e-06, "loss": 0.8673, "step": 56610 }, { "epoch": 0.6899808660256176, "grad_norm": 1.9171123504638672, "learning_rate": 1.6317511225144325e-06, "loss": 0.772, "step": 56615 }, { "epoch": 0.6900418022497654, "grad_norm": 2.086120843887329, "learning_rate": 1.6314304041051957e-06, "loss": 0.8151, "step": 56620 }, { "epoch": 0.6901027384739132, "grad_norm": 1.936207890510559, "learning_rate": 1.6311096856959591e-06, "loss": 0.8047, "step": 56625 }, { "epoch": 0.6901636746980611, "grad_norm": 1.882616400718689, "learning_rate": 1.6307889672867226e-06, "loss": 0.8776, "step": 56630 }, { "epoch": 0.6902246109222088, "grad_norm": 2.0588135719299316, "learning_rate": 1.6304682488774856e-06, "loss": 0.7967, "step": 56635 }, { "epoch": 0.6902855471463566, "grad_norm": 1.838289499282837, "learning_rate": 1.630147530468249e-06, "loss": 0.8084, "step": 56640 }, { "epoch": 0.6903464833705044, "grad_norm": 2.0709409713745117, "learning_rate": 1.6298268120590125e-06, "loss": 0.8157, "step": 56645 }, { "epoch": 0.6904074195946522, "grad_norm": 1.924236536026001, "learning_rate": 1.6295060936497755e-06, "loss": 0.8858, "step": 56650 }, { "epoch": 0.6904683558188001, "grad_norm": 1.894546627998352, "learning_rate": 1.629185375240539e-06, "loss": 0.7696, "step": 56655 }, { "epoch": 0.6905292920429479, "grad_norm": 1.812379240989685, "learning_rate": 1.6288646568313022e-06, "loss": 0.7406, "step": 56660 }, { "epoch": 0.6905902282670957, "grad_norm": 1.995760202407837, "learning_rate": 1.6285439384220656e-06, "loss": 0.8168, "step": 56665 }, { "epoch": 0.6906511644912434, "grad_norm": 2.018644332885742, "learning_rate": 1.628223220012829e-06, "loss": 0.7833, "step": 56670 }, { "epoch": 0.6907121007153912, "grad_norm": 2.1683812141418457, "learning_rate": 1.627902501603592e-06, "loss": 0.8913, "step": 56675 }, { "epoch": 0.6907730369395391, "grad_norm": 2.1224772930145264, "learning_rate": 1.6275817831943555e-06, "loss": 0.7601, "step": 56680 }, { "epoch": 0.6908339731636869, "grad_norm": 1.9826902151107788, "learning_rate": 1.627261064785119e-06, "loss": 0.83, "step": 56685 }, { "epoch": 0.6908949093878347, "grad_norm": 1.9546061754226685, "learning_rate": 1.626940346375882e-06, "loss": 0.834, "step": 56690 }, { "epoch": 0.6909558456119825, "grad_norm": 1.9366602897644043, "learning_rate": 1.6266196279666454e-06, "loss": 0.7901, "step": 56695 }, { "epoch": 0.6910167818361304, "grad_norm": 2.151256799697876, "learning_rate": 1.6262989095574086e-06, "loss": 0.8743, "step": 56700 }, { "epoch": 0.6910777180602781, "grad_norm": 1.8581730127334595, "learning_rate": 1.625978191148172e-06, "loss": 0.7741, "step": 56705 }, { "epoch": 0.6911386542844259, "grad_norm": 2.443248987197876, "learning_rate": 1.6256574727389355e-06, "loss": 0.8219, "step": 56710 }, { "epoch": 0.6911995905085737, "grad_norm": 1.873949646949768, "learning_rate": 1.6253367543296985e-06, "loss": 0.7779, "step": 56715 }, { "epoch": 0.6912605267327215, "grad_norm": 2.0251152515411377, "learning_rate": 1.625016035920462e-06, "loss": 0.8382, "step": 56720 }, { "epoch": 0.6913214629568694, "grad_norm": 2.0490071773529053, "learning_rate": 1.6246953175112254e-06, "loss": 0.7917, "step": 56725 }, { "epoch": 0.6913823991810172, "grad_norm": 1.7019389867782593, "learning_rate": 1.6243745991019886e-06, "loss": 0.8506, "step": 56730 }, { "epoch": 0.691443335405165, "grad_norm": 2.458120584487915, "learning_rate": 1.6240538806927519e-06, "loss": 0.834, "step": 56735 }, { "epoch": 0.6915042716293127, "grad_norm": 1.7283227443695068, "learning_rate": 1.623733162283515e-06, "loss": 0.7983, "step": 56740 }, { "epoch": 0.6915652078534605, "grad_norm": 2.282952308654785, "learning_rate": 1.6234124438742785e-06, "loss": 0.8431, "step": 56745 }, { "epoch": 0.6916261440776084, "grad_norm": 2.0621337890625, "learning_rate": 1.623091725465042e-06, "loss": 0.7847, "step": 56750 }, { "epoch": 0.6916870803017562, "grad_norm": 2.171368360519409, "learning_rate": 1.622771007055805e-06, "loss": 0.8228, "step": 56755 }, { "epoch": 0.691748016525904, "grad_norm": 1.9404313564300537, "learning_rate": 1.6224502886465684e-06, "loss": 0.877, "step": 56760 }, { "epoch": 0.6918089527500518, "grad_norm": 1.63313889503479, "learning_rate": 1.6221295702373319e-06, "loss": 0.757, "step": 56765 }, { "epoch": 0.6918698889741997, "grad_norm": 1.7767869234085083, "learning_rate": 1.621808851828095e-06, "loss": 0.7795, "step": 56770 }, { "epoch": 0.6919308251983474, "grad_norm": 1.864888310432434, "learning_rate": 1.6214881334188583e-06, "loss": 0.8241, "step": 56775 }, { "epoch": 0.6919917614224952, "grad_norm": 2.2778878211975098, "learning_rate": 1.6211674150096216e-06, "loss": 0.8855, "step": 56780 }, { "epoch": 0.692052697646643, "grad_norm": 2.3644375801086426, "learning_rate": 1.620846696600385e-06, "loss": 0.8554, "step": 56785 }, { "epoch": 0.6921136338707908, "grad_norm": 2.1028294563293457, "learning_rate": 1.6205259781911484e-06, "loss": 0.8366, "step": 56790 }, { "epoch": 0.6921745700949387, "grad_norm": 2.4205634593963623, "learning_rate": 1.6202052597819114e-06, "loss": 0.7518, "step": 56795 }, { "epoch": 0.6922355063190865, "grad_norm": 2.3364391326904297, "learning_rate": 1.6198845413726749e-06, "loss": 0.8109, "step": 56800 }, { "epoch": 0.6922964425432343, "grad_norm": 1.954100251197815, "learning_rate": 1.6195638229634383e-06, "loss": 0.778, "step": 56805 }, { "epoch": 0.692357378767382, "grad_norm": 2.313389539718628, "learning_rate": 1.6192431045542016e-06, "loss": 0.7695, "step": 56810 }, { "epoch": 0.6924183149915298, "grad_norm": 1.8038198947906494, "learning_rate": 1.6189223861449648e-06, "loss": 0.8322, "step": 56815 }, { "epoch": 0.6924792512156777, "grad_norm": 1.9308725595474243, "learning_rate": 1.6186016677357282e-06, "loss": 0.8269, "step": 56820 }, { "epoch": 0.6925401874398255, "grad_norm": 1.9383771419525146, "learning_rate": 1.6182809493264915e-06, "loss": 0.7977, "step": 56825 }, { "epoch": 0.6926011236639733, "grad_norm": 1.967758059501648, "learning_rate": 1.617960230917255e-06, "loss": 0.7697, "step": 56830 }, { "epoch": 0.6926620598881211, "grad_norm": 2.0521488189697266, "learning_rate": 1.617639512508018e-06, "loss": 0.7957, "step": 56835 }, { "epoch": 0.692722996112269, "grad_norm": 2.0482113361358643, "learning_rate": 1.6173187940987814e-06, "loss": 0.8467, "step": 56840 }, { "epoch": 0.6927839323364167, "grad_norm": 1.9854164123535156, "learning_rate": 1.6169980756895448e-06, "loss": 0.8338, "step": 56845 }, { "epoch": 0.6928448685605645, "grad_norm": 1.6513878107070923, "learning_rate": 1.616677357280308e-06, "loss": 0.7851, "step": 56850 }, { "epoch": 0.6929058047847123, "grad_norm": 2.200448513031006, "learning_rate": 1.6163566388710715e-06, "loss": 0.8904, "step": 56855 }, { "epoch": 0.6929667410088601, "grad_norm": 1.9243395328521729, "learning_rate": 1.6160359204618347e-06, "loss": 0.8325, "step": 56860 }, { "epoch": 0.693027677233008, "grad_norm": 1.9824849367141724, "learning_rate": 1.615715202052598e-06, "loss": 0.7999, "step": 56865 }, { "epoch": 0.6930886134571558, "grad_norm": 1.9860470294952393, "learning_rate": 1.6153944836433614e-06, "loss": 0.8162, "step": 56870 }, { "epoch": 0.6931495496813036, "grad_norm": 2.1822328567504883, "learning_rate": 1.6150737652341244e-06, "loss": 0.8213, "step": 56875 }, { "epoch": 0.6932104859054513, "grad_norm": 1.9381113052368164, "learning_rate": 1.6147530468248878e-06, "loss": 0.8603, "step": 56880 }, { "epoch": 0.6932714221295991, "grad_norm": 1.8632996082305908, "learning_rate": 1.6144323284156513e-06, "loss": 0.8071, "step": 56885 }, { "epoch": 0.693332358353747, "grad_norm": 1.884050965309143, "learning_rate": 1.6141116100064145e-06, "loss": 0.7499, "step": 56890 }, { "epoch": 0.6933932945778948, "grad_norm": 1.6133710145950317, "learning_rate": 1.613790891597178e-06, "loss": 0.8397, "step": 56895 }, { "epoch": 0.6934542308020426, "grad_norm": 1.606187105178833, "learning_rate": 1.6134701731879412e-06, "loss": 0.7965, "step": 56900 }, { "epoch": 0.6935151670261904, "grad_norm": 1.7974976301193237, "learning_rate": 1.6131494547787044e-06, "loss": 0.8424, "step": 56905 }, { "epoch": 0.6935761032503382, "grad_norm": 1.8959094285964966, "learning_rate": 1.6128287363694678e-06, "loss": 0.7885, "step": 56910 }, { "epoch": 0.693637039474486, "grad_norm": 1.815131425857544, "learning_rate": 1.6125080179602308e-06, "loss": 0.8205, "step": 56915 }, { "epoch": 0.6936979756986338, "grad_norm": 1.9052456617355347, "learning_rate": 1.6121872995509943e-06, "loss": 0.8134, "step": 56920 }, { "epoch": 0.6937589119227816, "grad_norm": 1.927565574645996, "learning_rate": 1.6118665811417577e-06, "loss": 0.8511, "step": 56925 }, { "epoch": 0.6938198481469294, "grad_norm": 1.7236037254333496, "learning_rate": 1.611545862732521e-06, "loss": 0.7504, "step": 56930 }, { "epoch": 0.6938807843710773, "grad_norm": 2.0742340087890625, "learning_rate": 1.6112251443232844e-06, "loss": 0.8482, "step": 56935 }, { "epoch": 0.6939417205952251, "grad_norm": 1.6925010681152344, "learning_rate": 1.6109044259140476e-06, "loss": 0.7896, "step": 56940 }, { "epoch": 0.6940026568193729, "grad_norm": 1.9142625331878662, "learning_rate": 1.6105837075048108e-06, "loss": 0.8106, "step": 56945 }, { "epoch": 0.6940635930435206, "grad_norm": 2.2758829593658447, "learning_rate": 1.6102629890955743e-06, "loss": 0.8105, "step": 56950 }, { "epoch": 0.6941245292676684, "grad_norm": 1.7694580554962158, "learning_rate": 1.6099422706863373e-06, "loss": 0.8576, "step": 56955 }, { "epoch": 0.6941854654918163, "grad_norm": 1.7124569416046143, "learning_rate": 1.6096215522771007e-06, "loss": 0.8078, "step": 56960 }, { "epoch": 0.6942464017159641, "grad_norm": 1.9570786952972412, "learning_rate": 1.6093008338678642e-06, "loss": 0.8028, "step": 56965 }, { "epoch": 0.6943073379401119, "grad_norm": 1.7453703880310059, "learning_rate": 1.6089801154586274e-06, "loss": 0.7372, "step": 56970 }, { "epoch": 0.6943682741642597, "grad_norm": 1.8312627077102661, "learning_rate": 1.6086593970493909e-06, "loss": 0.8525, "step": 56975 }, { "epoch": 0.6944292103884075, "grad_norm": 1.9438945055007935, "learning_rate": 1.6083386786401543e-06, "loss": 0.8067, "step": 56980 }, { "epoch": 0.6944901466125553, "grad_norm": 1.7258793115615845, "learning_rate": 1.6080179602309173e-06, "loss": 0.7446, "step": 56985 }, { "epoch": 0.6945510828367031, "grad_norm": 1.984964370727539, "learning_rate": 1.6076972418216807e-06, "loss": 0.7658, "step": 56990 }, { "epoch": 0.6946120190608509, "grad_norm": 1.9817039966583252, "learning_rate": 1.607376523412444e-06, "loss": 0.8599, "step": 56995 }, { "epoch": 0.6946729552849987, "grad_norm": 1.9286859035491943, "learning_rate": 1.6070558050032072e-06, "loss": 0.8407, "step": 57000 }, { "epoch": 0.6947338915091466, "grad_norm": 1.8945105075836182, "learning_rate": 1.6067350865939706e-06, "loss": 0.7781, "step": 57005 }, { "epoch": 0.6947948277332944, "grad_norm": 2.3297767639160156, "learning_rate": 1.6064143681847339e-06, "loss": 0.826, "step": 57010 }, { "epoch": 0.6948557639574422, "grad_norm": 1.8383556604385376, "learning_rate": 1.6060936497754973e-06, "loss": 0.9053, "step": 57015 }, { "epoch": 0.6949167001815899, "grad_norm": 2.336988687515259, "learning_rate": 1.6057729313662608e-06, "loss": 0.8784, "step": 57020 }, { "epoch": 0.6949776364057377, "grad_norm": 2.2657690048217773, "learning_rate": 1.6054522129570238e-06, "loss": 0.8519, "step": 57025 }, { "epoch": 0.6950385726298856, "grad_norm": 1.856864094734192, "learning_rate": 1.6051314945477872e-06, "loss": 0.8385, "step": 57030 }, { "epoch": 0.6950995088540334, "grad_norm": 2.0028812885284424, "learning_rate": 1.6048107761385504e-06, "loss": 0.8142, "step": 57035 }, { "epoch": 0.6951604450781812, "grad_norm": 2.0799500942230225, "learning_rate": 1.6044900577293137e-06, "loss": 0.8089, "step": 57040 }, { "epoch": 0.695221381302329, "grad_norm": 1.9962456226348877, "learning_rate": 1.6041693393200771e-06, "loss": 0.8208, "step": 57045 }, { "epoch": 0.6952823175264767, "grad_norm": 1.9752167463302612, "learning_rate": 1.6038486209108403e-06, "loss": 0.8367, "step": 57050 }, { "epoch": 0.6953432537506246, "grad_norm": 2.387350559234619, "learning_rate": 1.6035279025016038e-06, "loss": 0.7677, "step": 57055 }, { "epoch": 0.6954041899747724, "grad_norm": 1.9627360105514526, "learning_rate": 1.6032071840923672e-06, "loss": 0.794, "step": 57060 }, { "epoch": 0.6954651261989202, "grad_norm": 1.8609516620635986, "learning_rate": 1.6028864656831302e-06, "loss": 0.8068, "step": 57065 }, { "epoch": 0.695526062423068, "grad_norm": 1.9488563537597656, "learning_rate": 1.6025657472738937e-06, "loss": 0.8508, "step": 57070 }, { "epoch": 0.6955869986472158, "grad_norm": 1.6186968088150024, "learning_rate": 1.602245028864657e-06, "loss": 0.824, "step": 57075 }, { "epoch": 0.6956479348713637, "grad_norm": 2.1263792514801025, "learning_rate": 1.6019243104554203e-06, "loss": 0.9031, "step": 57080 }, { "epoch": 0.6957088710955114, "grad_norm": 1.8632105588912964, "learning_rate": 1.6016035920461836e-06, "loss": 0.8357, "step": 57085 }, { "epoch": 0.6957698073196592, "grad_norm": 2.3649420738220215, "learning_rate": 1.6012828736369468e-06, "loss": 0.7784, "step": 57090 }, { "epoch": 0.695830743543807, "grad_norm": 1.5174875259399414, "learning_rate": 1.6009621552277102e-06, "loss": 0.7943, "step": 57095 }, { "epoch": 0.6958916797679549, "grad_norm": 1.9771671295166016, "learning_rate": 1.6006414368184737e-06, "loss": 0.7922, "step": 57100 }, { "epoch": 0.6959526159921027, "grad_norm": 1.919111728668213, "learning_rate": 1.6003207184092367e-06, "loss": 0.7706, "step": 57105 }, { "epoch": 0.6960135522162505, "grad_norm": 1.9458377361297607, "learning_rate": 1.6000000000000001e-06, "loss": 0.7901, "step": 57110 }, { "epoch": 0.6960744884403983, "grad_norm": 1.7648066282272339, "learning_rate": 1.5996792815907636e-06, "loss": 0.8615, "step": 57115 }, { "epoch": 0.696135424664546, "grad_norm": 1.9567118883132935, "learning_rate": 1.5993585631815268e-06, "loss": 0.8932, "step": 57120 }, { "epoch": 0.6961963608886939, "grad_norm": 2.034034490585327, "learning_rate": 1.59903784477229e-06, "loss": 0.8247, "step": 57125 }, { "epoch": 0.6962572971128417, "grad_norm": 2.0088143348693848, "learning_rate": 1.5987171263630533e-06, "loss": 0.8132, "step": 57130 }, { "epoch": 0.6963182333369895, "grad_norm": 1.8184725046157837, "learning_rate": 1.5983964079538167e-06, "loss": 0.8184, "step": 57135 }, { "epoch": 0.6963791695611373, "grad_norm": 1.8564878702163696, "learning_rate": 1.5980756895445801e-06, "loss": 0.826, "step": 57140 }, { "epoch": 0.6964401057852851, "grad_norm": 1.8626649379730225, "learning_rate": 1.5977549711353432e-06, "loss": 0.7999, "step": 57145 }, { "epoch": 0.696501042009433, "grad_norm": 2.077732563018799, "learning_rate": 1.5974342527261066e-06, "loss": 0.8178, "step": 57150 }, { "epoch": 0.6965619782335807, "grad_norm": 2.305418014526367, "learning_rate": 1.59711353431687e-06, "loss": 0.8416, "step": 57155 }, { "epoch": 0.6966229144577285, "grad_norm": 2.058941125869751, "learning_rate": 1.5967928159076333e-06, "loss": 0.8326, "step": 57160 }, { "epoch": 0.6966838506818763, "grad_norm": 2.001615285873413, "learning_rate": 1.5964720974983965e-06, "loss": 0.8181, "step": 57165 }, { "epoch": 0.6967447869060241, "grad_norm": 2.287135362625122, "learning_rate": 1.5961513790891597e-06, "loss": 0.8877, "step": 57170 }, { "epoch": 0.696805723130172, "grad_norm": 1.8356173038482666, "learning_rate": 1.5958306606799232e-06, "loss": 0.8281, "step": 57175 }, { "epoch": 0.6968666593543198, "grad_norm": 1.9171500205993652, "learning_rate": 1.5955099422706866e-06, "loss": 0.8134, "step": 57180 }, { "epoch": 0.6969275955784676, "grad_norm": 1.8348439931869507, "learning_rate": 1.5951892238614496e-06, "loss": 0.8311, "step": 57185 }, { "epoch": 0.6969885318026153, "grad_norm": 2.1199519634246826, "learning_rate": 1.594868505452213e-06, "loss": 0.8494, "step": 57190 }, { "epoch": 0.6970494680267632, "grad_norm": 2.267646074295044, "learning_rate": 1.5945477870429765e-06, "loss": 0.7829, "step": 57195 }, { "epoch": 0.697110404250911, "grad_norm": 1.8500064611434937, "learning_rate": 1.5942270686337397e-06, "loss": 0.8697, "step": 57200 }, { "epoch": 0.6971713404750588, "grad_norm": 2.027904748916626, "learning_rate": 1.5939063502245032e-06, "loss": 0.8502, "step": 57205 }, { "epoch": 0.6972322766992066, "grad_norm": 1.795812726020813, "learning_rate": 1.5935856318152662e-06, "loss": 0.8217, "step": 57210 }, { "epoch": 0.6972932129233544, "grad_norm": 1.9549435377120972, "learning_rate": 1.5932649134060296e-06, "loss": 0.8109, "step": 57215 }, { "epoch": 0.6973541491475023, "grad_norm": 1.8572883605957031, "learning_rate": 1.592944194996793e-06, "loss": 0.8585, "step": 57220 }, { "epoch": 0.69741508537165, "grad_norm": 2.005797863006592, "learning_rate": 1.592623476587556e-06, "loss": 0.841, "step": 57225 }, { "epoch": 0.6974760215957978, "grad_norm": 2.200575828552246, "learning_rate": 1.5923027581783195e-06, "loss": 0.7911, "step": 57230 }, { "epoch": 0.6975369578199456, "grad_norm": 1.9283030033111572, "learning_rate": 1.591982039769083e-06, "loss": 0.8187, "step": 57235 }, { "epoch": 0.6975978940440934, "grad_norm": 1.7533321380615234, "learning_rate": 1.5916613213598462e-06, "loss": 0.785, "step": 57240 }, { "epoch": 0.6976588302682413, "grad_norm": 1.9281545877456665, "learning_rate": 1.5913406029506096e-06, "loss": 0.8291, "step": 57245 }, { "epoch": 0.6977197664923891, "grad_norm": 1.8983657360076904, "learning_rate": 1.5910198845413727e-06, "loss": 0.8501, "step": 57250 }, { "epoch": 0.6977807027165369, "grad_norm": 1.957205891609192, "learning_rate": 1.590699166132136e-06, "loss": 0.8848, "step": 57255 }, { "epoch": 0.6978416389406846, "grad_norm": 1.8416799306869507, "learning_rate": 1.5903784477228995e-06, "loss": 0.85, "step": 57260 }, { "epoch": 0.6979025751648325, "grad_norm": 1.9943783283233643, "learning_rate": 1.5900577293136626e-06, "loss": 0.8072, "step": 57265 }, { "epoch": 0.6979635113889803, "grad_norm": 2.3643405437469482, "learning_rate": 1.589737010904426e-06, "loss": 0.8536, "step": 57270 }, { "epoch": 0.6980244476131281, "grad_norm": 1.9823641777038574, "learning_rate": 1.5894162924951894e-06, "loss": 0.8769, "step": 57275 }, { "epoch": 0.6980853838372759, "grad_norm": 2.3326311111450195, "learning_rate": 1.5890955740859527e-06, "loss": 0.845, "step": 57280 }, { "epoch": 0.6981463200614237, "grad_norm": 2.0576882362365723, "learning_rate": 1.588774855676716e-06, "loss": 0.931, "step": 57285 }, { "epoch": 0.6982072562855716, "grad_norm": 2.219627857208252, "learning_rate": 1.5884541372674791e-06, "loss": 0.8693, "step": 57290 }, { "epoch": 0.6982681925097193, "grad_norm": 2.212597131729126, "learning_rate": 1.5881334188582426e-06, "loss": 0.8161, "step": 57295 }, { "epoch": 0.6983291287338671, "grad_norm": 2.140230655670166, "learning_rate": 1.587812700449006e-06, "loss": 0.7809, "step": 57300 }, { "epoch": 0.6983900649580149, "grad_norm": 1.8692288398742676, "learning_rate": 1.587491982039769e-06, "loss": 0.8151, "step": 57305 }, { "epoch": 0.6984510011821627, "grad_norm": 1.7956827878952026, "learning_rate": 1.5871712636305325e-06, "loss": 0.7492, "step": 57310 }, { "epoch": 0.6985119374063106, "grad_norm": 2.0800089836120605, "learning_rate": 1.586850545221296e-06, "loss": 0.8054, "step": 57315 }, { "epoch": 0.6985728736304584, "grad_norm": 2.1861255168914795, "learning_rate": 1.5865298268120591e-06, "loss": 0.7604, "step": 57320 }, { "epoch": 0.6986338098546062, "grad_norm": 1.9017640352249146, "learning_rate": 1.5862091084028226e-06, "loss": 0.8134, "step": 57325 }, { "epoch": 0.6986947460787539, "grad_norm": 2.1081292629241943, "learning_rate": 1.5858883899935856e-06, "loss": 0.8154, "step": 57330 }, { "epoch": 0.6987556823029017, "grad_norm": 1.7543987035751343, "learning_rate": 1.585567671584349e-06, "loss": 0.7915, "step": 57335 }, { "epoch": 0.6988166185270496, "grad_norm": 1.882572889328003, "learning_rate": 1.5852469531751125e-06, "loss": 0.8452, "step": 57340 }, { "epoch": 0.6988775547511974, "grad_norm": 1.5984218120574951, "learning_rate": 1.5849262347658757e-06, "loss": 0.8007, "step": 57345 }, { "epoch": 0.6989384909753452, "grad_norm": 1.7077223062515259, "learning_rate": 1.584605516356639e-06, "loss": 0.7901, "step": 57350 }, { "epoch": 0.698999427199493, "grad_norm": 2.0917768478393555, "learning_rate": 1.5842847979474024e-06, "loss": 0.8315, "step": 57355 }, { "epoch": 0.6990603634236409, "grad_norm": 1.8597214221954346, "learning_rate": 1.5839640795381656e-06, "loss": 0.8466, "step": 57360 }, { "epoch": 0.6991212996477886, "grad_norm": 2.0284180641174316, "learning_rate": 1.583643361128929e-06, "loss": 0.7786, "step": 57365 }, { "epoch": 0.6991822358719364, "grad_norm": 2.0213820934295654, "learning_rate": 1.583322642719692e-06, "loss": 0.8432, "step": 57370 }, { "epoch": 0.6992431720960842, "grad_norm": 2.0106050968170166, "learning_rate": 1.5830019243104555e-06, "loss": 0.8596, "step": 57375 }, { "epoch": 0.699304108320232, "grad_norm": 2.104660987854004, "learning_rate": 1.582681205901219e-06, "loss": 0.7547, "step": 57380 }, { "epoch": 0.6993650445443799, "grad_norm": 2.699420690536499, "learning_rate": 1.5823604874919822e-06, "loss": 0.8447, "step": 57385 }, { "epoch": 0.6994259807685277, "grad_norm": 1.8165111541748047, "learning_rate": 1.5820397690827454e-06, "loss": 0.751, "step": 57390 }, { "epoch": 0.6994869169926755, "grad_norm": 1.996073842048645, "learning_rate": 1.5817190506735088e-06, "loss": 0.7545, "step": 57395 }, { "epoch": 0.6995478532168232, "grad_norm": 2.6003682613372803, "learning_rate": 1.581398332264272e-06, "loss": 0.8266, "step": 57400 }, { "epoch": 0.699608789440971, "grad_norm": 2.1780238151550293, "learning_rate": 1.5810776138550355e-06, "loss": 0.8556, "step": 57405 }, { "epoch": 0.6996697256651189, "grad_norm": 1.9353997707366943, "learning_rate": 1.580756895445799e-06, "loss": 0.8268, "step": 57410 }, { "epoch": 0.6997306618892667, "grad_norm": 2.374255895614624, "learning_rate": 1.580436177036562e-06, "loss": 0.8391, "step": 57415 }, { "epoch": 0.6997915981134145, "grad_norm": 2.2075934410095215, "learning_rate": 1.5801154586273254e-06, "loss": 0.816, "step": 57420 }, { "epoch": 0.6998525343375623, "grad_norm": 1.8098032474517822, "learning_rate": 1.5797947402180886e-06, "loss": 0.8021, "step": 57425 }, { "epoch": 0.6999134705617102, "grad_norm": 2.135303020477295, "learning_rate": 1.579474021808852e-06, "loss": 0.9096, "step": 57430 }, { "epoch": 0.6999744067858579, "grad_norm": 1.9914634227752686, "learning_rate": 1.5791533033996153e-06, "loss": 0.8522, "step": 57435 }, { "epoch": 0.7000353430100057, "grad_norm": 2.1199026107788086, "learning_rate": 1.5788325849903785e-06, "loss": 0.7895, "step": 57440 }, { "epoch": 0.7000962792341535, "grad_norm": 1.8774960041046143, "learning_rate": 1.578511866581142e-06, "loss": 0.8712, "step": 57445 }, { "epoch": 0.7001572154583013, "grad_norm": 1.8795216083526611, "learning_rate": 1.5781911481719054e-06, "loss": 0.8416, "step": 57450 }, { "epoch": 0.7002181516824492, "grad_norm": 2.2666125297546387, "learning_rate": 1.5778704297626684e-06, "loss": 0.7549, "step": 57455 }, { "epoch": 0.700279087906597, "grad_norm": 1.8572535514831543, "learning_rate": 1.5775497113534319e-06, "loss": 0.808, "step": 57460 }, { "epoch": 0.7003400241307448, "grad_norm": 1.9053363800048828, "learning_rate": 1.577228992944195e-06, "loss": 0.8304, "step": 57465 }, { "epoch": 0.7004009603548925, "grad_norm": 1.9242302179336548, "learning_rate": 1.5769082745349585e-06, "loss": 0.8868, "step": 57470 }, { "epoch": 0.7004618965790403, "grad_norm": 2.0873818397521973, "learning_rate": 1.5765875561257218e-06, "loss": 0.8531, "step": 57475 }, { "epoch": 0.7005228328031882, "grad_norm": 2.193920612335205, "learning_rate": 1.576266837716485e-06, "loss": 0.8612, "step": 57480 }, { "epoch": 0.700583769027336, "grad_norm": 2.126709461212158, "learning_rate": 1.5759461193072484e-06, "loss": 0.8204, "step": 57485 }, { "epoch": 0.7006447052514838, "grad_norm": 1.8011138439178467, "learning_rate": 1.5756254008980119e-06, "loss": 0.8208, "step": 57490 }, { "epoch": 0.7007056414756316, "grad_norm": 1.6269429922103882, "learning_rate": 1.5753046824887749e-06, "loss": 0.8482, "step": 57495 }, { "epoch": 0.7007665776997795, "grad_norm": 1.9312889575958252, "learning_rate": 1.5749839640795383e-06, "loss": 0.8283, "step": 57500 }, { "epoch": 0.7008275139239272, "grad_norm": 2.1211369037628174, "learning_rate": 1.5746632456703015e-06, "loss": 0.7308, "step": 57505 }, { "epoch": 0.700888450148075, "grad_norm": 1.7041871547698975, "learning_rate": 1.574342527261065e-06, "loss": 0.8461, "step": 57510 }, { "epoch": 0.7009493863722228, "grad_norm": 1.90019690990448, "learning_rate": 1.5740218088518282e-06, "loss": 0.7301, "step": 57515 }, { "epoch": 0.7010103225963706, "grad_norm": 1.7681983709335327, "learning_rate": 1.5737010904425914e-06, "loss": 0.8747, "step": 57520 }, { "epoch": 0.7010712588205185, "grad_norm": 2.394847869873047, "learning_rate": 1.5733803720333549e-06, "loss": 0.8617, "step": 57525 }, { "epoch": 0.7011321950446663, "grad_norm": 1.8282949924468994, "learning_rate": 1.5730596536241183e-06, "loss": 0.7879, "step": 57530 }, { "epoch": 0.7011931312688141, "grad_norm": 2.6593515872955322, "learning_rate": 1.5727389352148813e-06, "loss": 0.7825, "step": 57535 }, { "epoch": 0.7012540674929618, "grad_norm": 2.2633872032165527, "learning_rate": 1.5724182168056448e-06, "loss": 0.8702, "step": 57540 }, { "epoch": 0.7013150037171096, "grad_norm": 1.74734628200531, "learning_rate": 1.572097498396408e-06, "loss": 0.7421, "step": 57545 }, { "epoch": 0.7013759399412575, "grad_norm": 2.1112751960754395, "learning_rate": 1.5717767799871715e-06, "loss": 0.7461, "step": 57550 }, { "epoch": 0.7014368761654053, "grad_norm": 2.052567720413208, "learning_rate": 1.5714560615779349e-06, "loss": 0.8398, "step": 57555 }, { "epoch": 0.7014978123895531, "grad_norm": 2.269534111022949, "learning_rate": 1.571135343168698e-06, "loss": 0.8585, "step": 57560 }, { "epoch": 0.7015587486137009, "grad_norm": 1.9547901153564453, "learning_rate": 1.5708146247594613e-06, "loss": 0.8445, "step": 57565 }, { "epoch": 0.7016196848378488, "grad_norm": 2.6368398666381836, "learning_rate": 1.5704939063502248e-06, "loss": 0.8211, "step": 57570 }, { "epoch": 0.7016806210619965, "grad_norm": 1.7965668439865112, "learning_rate": 1.5701731879409878e-06, "loss": 0.8291, "step": 57575 }, { "epoch": 0.7017415572861443, "grad_norm": 1.7921984195709229, "learning_rate": 1.5698524695317512e-06, "loss": 0.8211, "step": 57580 }, { "epoch": 0.7018024935102921, "grad_norm": 2.2324466705322266, "learning_rate": 1.5695317511225145e-06, "loss": 0.8472, "step": 57585 }, { "epoch": 0.7018634297344399, "grad_norm": 1.6502413749694824, "learning_rate": 1.569211032713278e-06, "loss": 0.8564, "step": 57590 }, { "epoch": 0.7019243659585878, "grad_norm": 1.9389690160751343, "learning_rate": 1.5688903143040414e-06, "loss": 0.8204, "step": 57595 }, { "epoch": 0.7019853021827356, "grad_norm": 1.8545106649398804, "learning_rate": 1.5685695958948044e-06, "loss": 0.8483, "step": 57600 }, { "epoch": 0.7020462384068834, "grad_norm": 1.980559229850769, "learning_rate": 1.5682488774855678e-06, "loss": 0.8584, "step": 57605 }, { "epoch": 0.7021071746310311, "grad_norm": 1.815006971359253, "learning_rate": 1.5679281590763313e-06, "loss": 0.8088, "step": 57610 }, { "epoch": 0.7021681108551789, "grad_norm": 2.0721449851989746, "learning_rate": 1.5676074406670943e-06, "loss": 0.8302, "step": 57615 }, { "epoch": 0.7022290470793268, "grad_norm": 1.9994323253631592, "learning_rate": 1.5672867222578577e-06, "loss": 0.742, "step": 57620 }, { "epoch": 0.7022899833034746, "grad_norm": 1.8137587308883667, "learning_rate": 1.566966003848621e-06, "loss": 0.7489, "step": 57625 }, { "epoch": 0.7023509195276224, "grad_norm": 2.03556752204895, "learning_rate": 1.5666452854393844e-06, "loss": 0.8632, "step": 57630 }, { "epoch": 0.7024118557517702, "grad_norm": 2.0053627490997314, "learning_rate": 1.5663245670301478e-06, "loss": 0.7875, "step": 57635 }, { "epoch": 0.702472791975918, "grad_norm": 2.5469601154327393, "learning_rate": 1.5660038486209108e-06, "loss": 0.8821, "step": 57640 }, { "epoch": 0.7025337282000658, "grad_norm": 1.8485081195831299, "learning_rate": 1.5656831302116743e-06, "loss": 0.8099, "step": 57645 }, { "epoch": 0.7025946644242136, "grad_norm": 2.120204210281372, "learning_rate": 1.5653624118024377e-06, "loss": 0.8299, "step": 57650 }, { "epoch": 0.7026556006483614, "grad_norm": 2.1268417835235596, "learning_rate": 1.5650416933932007e-06, "loss": 0.7637, "step": 57655 }, { "epoch": 0.7027165368725092, "grad_norm": 2.095912218093872, "learning_rate": 1.5647209749839642e-06, "loss": 0.8117, "step": 57660 }, { "epoch": 0.7027774730966571, "grad_norm": 1.8127118349075317, "learning_rate": 1.5644002565747274e-06, "loss": 0.7873, "step": 57665 }, { "epoch": 0.7028384093208049, "grad_norm": 1.9376636743545532, "learning_rate": 1.5640795381654908e-06, "loss": 0.7933, "step": 57670 }, { "epoch": 0.7028993455449527, "grad_norm": 1.9105145931243896, "learning_rate": 1.5637588197562543e-06, "loss": 0.7973, "step": 57675 }, { "epoch": 0.7029602817691004, "grad_norm": 1.9606648683547974, "learning_rate": 1.5634381013470173e-06, "loss": 0.7812, "step": 57680 }, { "epoch": 0.7030212179932482, "grad_norm": 2.3633170127868652, "learning_rate": 1.5631173829377807e-06, "loss": 0.7696, "step": 57685 }, { "epoch": 0.7030821542173961, "grad_norm": 2.0006916522979736, "learning_rate": 1.5627966645285442e-06, "loss": 0.8086, "step": 57690 }, { "epoch": 0.7031430904415439, "grad_norm": 1.890268325805664, "learning_rate": 1.5624759461193074e-06, "loss": 0.8381, "step": 57695 }, { "epoch": 0.7032040266656917, "grad_norm": 1.8796942234039307, "learning_rate": 1.5621552277100706e-06, "loss": 0.7834, "step": 57700 }, { "epoch": 0.7032649628898395, "grad_norm": 1.899777889251709, "learning_rate": 1.561834509300834e-06, "loss": 0.7794, "step": 57705 }, { "epoch": 0.7033258991139874, "grad_norm": 1.7688677310943604, "learning_rate": 1.5615137908915973e-06, "loss": 0.8269, "step": 57710 }, { "epoch": 0.7033868353381351, "grad_norm": 1.6773078441619873, "learning_rate": 1.5611930724823607e-06, "loss": 0.7753, "step": 57715 }, { "epoch": 0.7034477715622829, "grad_norm": 1.968705415725708, "learning_rate": 1.5608723540731238e-06, "loss": 0.7703, "step": 57720 }, { "epoch": 0.7035087077864307, "grad_norm": 2.0084617137908936, "learning_rate": 1.5605516356638872e-06, "loss": 0.8779, "step": 57725 }, { "epoch": 0.7035696440105785, "grad_norm": 1.9800273180007935, "learning_rate": 1.5602309172546506e-06, "loss": 0.848, "step": 57730 }, { "epoch": 0.7036305802347264, "grad_norm": 1.9462039470672607, "learning_rate": 1.5599101988454139e-06, "loss": 0.8303, "step": 57735 }, { "epoch": 0.7036915164588742, "grad_norm": 1.7821170091629028, "learning_rate": 1.559589480436177e-06, "loss": 0.8348, "step": 57740 }, { "epoch": 0.703752452683022, "grad_norm": 2.2175045013427734, "learning_rate": 1.5592687620269405e-06, "loss": 0.8104, "step": 57745 }, { "epoch": 0.7038133889071697, "grad_norm": 1.987677812576294, "learning_rate": 1.5589480436177038e-06, "loss": 0.8126, "step": 57750 }, { "epoch": 0.7038743251313175, "grad_norm": 2.071803569793701, "learning_rate": 1.5586273252084672e-06, "loss": 0.9166, "step": 57755 }, { "epoch": 0.7039352613554654, "grad_norm": 1.9292608499526978, "learning_rate": 1.5583066067992302e-06, "loss": 0.7486, "step": 57760 }, { "epoch": 0.7039961975796132, "grad_norm": 1.782639741897583, "learning_rate": 1.5579858883899937e-06, "loss": 0.8413, "step": 57765 }, { "epoch": 0.704057133803761, "grad_norm": 1.894797921180725, "learning_rate": 1.557665169980757e-06, "loss": 0.7959, "step": 57770 }, { "epoch": 0.7041180700279088, "grad_norm": 1.957372784614563, "learning_rate": 1.5573444515715203e-06, "loss": 0.8468, "step": 57775 }, { "epoch": 0.7041790062520566, "grad_norm": 2.0754916667938232, "learning_rate": 1.5570237331622836e-06, "loss": 0.7756, "step": 57780 }, { "epoch": 0.7042399424762044, "grad_norm": 1.8870123624801636, "learning_rate": 1.556703014753047e-06, "loss": 0.8117, "step": 57785 }, { "epoch": 0.7043008787003522, "grad_norm": 2.3269271850585938, "learning_rate": 1.5563822963438102e-06, "loss": 0.8859, "step": 57790 }, { "epoch": 0.7043618149245, "grad_norm": 1.9556407928466797, "learning_rate": 1.5560615779345737e-06, "loss": 0.8731, "step": 57795 }, { "epoch": 0.7044227511486478, "grad_norm": 1.888946771621704, "learning_rate": 1.5557408595253367e-06, "loss": 0.8231, "step": 57800 }, { "epoch": 0.7044836873727957, "grad_norm": 1.9516384601593018, "learning_rate": 1.5554201411161001e-06, "loss": 0.8274, "step": 57805 }, { "epoch": 0.7045446235969435, "grad_norm": 2.054863929748535, "learning_rate": 1.5550994227068636e-06, "loss": 0.8437, "step": 57810 }, { "epoch": 0.7046055598210913, "grad_norm": 1.794517993927002, "learning_rate": 1.5547787042976268e-06, "loss": 0.8489, "step": 57815 }, { "epoch": 0.704666496045239, "grad_norm": 1.799264669418335, "learning_rate": 1.5544579858883902e-06, "loss": 0.7746, "step": 57820 }, { "epoch": 0.7047274322693868, "grad_norm": 2.0636751651763916, "learning_rate": 1.5541372674791535e-06, "loss": 0.8393, "step": 57825 }, { "epoch": 0.7047883684935347, "grad_norm": 1.6378133296966553, "learning_rate": 1.5538165490699167e-06, "loss": 0.8059, "step": 57830 }, { "epoch": 0.7048493047176825, "grad_norm": 1.7251957654953003, "learning_rate": 1.5534958306606801e-06, "loss": 0.8332, "step": 57835 }, { "epoch": 0.7049102409418303, "grad_norm": 1.7626066207885742, "learning_rate": 1.5531751122514432e-06, "loss": 0.8502, "step": 57840 }, { "epoch": 0.7049711771659781, "grad_norm": 2.059063196182251, "learning_rate": 1.5528543938422066e-06, "loss": 0.7728, "step": 57845 }, { "epoch": 0.705032113390126, "grad_norm": 2.0952324867248535, "learning_rate": 1.55253367543297e-06, "loss": 0.8031, "step": 57850 }, { "epoch": 0.7050930496142737, "grad_norm": 1.9158759117126465, "learning_rate": 1.5522129570237333e-06, "loss": 0.9101, "step": 57855 }, { "epoch": 0.7051539858384215, "grad_norm": 1.9873530864715576, "learning_rate": 1.5518922386144967e-06, "loss": 0.8636, "step": 57860 }, { "epoch": 0.7052149220625693, "grad_norm": 2.075680732727051, "learning_rate": 1.55157152020526e-06, "loss": 0.8375, "step": 57865 }, { "epoch": 0.7052758582867171, "grad_norm": 1.7375123500823975, "learning_rate": 1.5512508017960232e-06, "loss": 0.8564, "step": 57870 }, { "epoch": 0.705336794510865, "grad_norm": 2.0705769062042236, "learning_rate": 1.5509300833867866e-06, "loss": 0.7223, "step": 57875 }, { "epoch": 0.7053977307350128, "grad_norm": 1.9202399253845215, "learning_rate": 1.5506093649775496e-06, "loss": 0.7689, "step": 57880 }, { "epoch": 0.7054586669591606, "grad_norm": 1.7899779081344604, "learning_rate": 1.550288646568313e-06, "loss": 0.837, "step": 57885 }, { "epoch": 0.7055196031833083, "grad_norm": 2.1470305919647217, "learning_rate": 1.5499679281590765e-06, "loss": 0.821, "step": 57890 }, { "epoch": 0.7055805394074561, "grad_norm": 2.0233845710754395, "learning_rate": 1.5496472097498397e-06, "loss": 0.8217, "step": 57895 }, { "epoch": 0.705641475631604, "grad_norm": 1.983508586883545, "learning_rate": 1.5493264913406032e-06, "loss": 0.8334, "step": 57900 }, { "epoch": 0.7057024118557518, "grad_norm": 2.273155450820923, "learning_rate": 1.5490057729313666e-06, "loss": 0.7621, "step": 57905 }, { "epoch": 0.7057633480798996, "grad_norm": 1.9303661584854126, "learning_rate": 1.5486850545221296e-06, "loss": 0.8015, "step": 57910 }, { "epoch": 0.7058242843040474, "grad_norm": 1.759917140007019, "learning_rate": 1.548364336112893e-06, "loss": 0.8038, "step": 57915 }, { "epoch": 0.7058852205281952, "grad_norm": 1.8534244298934937, "learning_rate": 1.5480436177036563e-06, "loss": 0.8303, "step": 57920 }, { "epoch": 0.705946156752343, "grad_norm": 2.0494296550750732, "learning_rate": 1.5477228992944195e-06, "loss": 0.8568, "step": 57925 }, { "epoch": 0.7060070929764908, "grad_norm": 1.8529808521270752, "learning_rate": 1.547402180885183e-06, "loss": 0.7643, "step": 57930 }, { "epoch": 0.7060680292006386, "grad_norm": 2.463203191757202, "learning_rate": 1.5470814624759462e-06, "loss": 0.797, "step": 57935 }, { "epoch": 0.7061289654247864, "grad_norm": 2.009319305419922, "learning_rate": 1.5467607440667096e-06, "loss": 0.8493, "step": 57940 }, { "epoch": 0.7061899016489342, "grad_norm": 2.272653579711914, "learning_rate": 1.546440025657473e-06, "loss": 0.8804, "step": 57945 }, { "epoch": 0.7062508378730821, "grad_norm": 1.9297164678573608, "learning_rate": 1.546119307248236e-06, "loss": 0.7683, "step": 57950 }, { "epoch": 0.7063117740972299, "grad_norm": 2.0790188312530518, "learning_rate": 1.5457985888389995e-06, "loss": 0.8821, "step": 57955 }, { "epoch": 0.7063727103213776, "grad_norm": 2.165005922317505, "learning_rate": 1.5454778704297628e-06, "loss": 0.8425, "step": 57960 }, { "epoch": 0.7064336465455254, "grad_norm": 1.974434733390808, "learning_rate": 1.545157152020526e-06, "loss": 0.8555, "step": 57965 }, { "epoch": 0.7064945827696733, "grad_norm": 1.962428092956543, "learning_rate": 1.5448364336112894e-06, "loss": 0.8617, "step": 57970 }, { "epoch": 0.7065555189938211, "grad_norm": 1.7106014490127563, "learning_rate": 1.5445157152020527e-06, "loss": 0.7622, "step": 57975 }, { "epoch": 0.7066164552179689, "grad_norm": 1.8883132934570312, "learning_rate": 1.544194996792816e-06, "loss": 0.8122, "step": 57980 }, { "epoch": 0.7066773914421167, "grad_norm": 1.7500579357147217, "learning_rate": 1.5438742783835795e-06, "loss": 0.7756, "step": 57985 }, { "epoch": 0.7067383276662645, "grad_norm": 1.8024955987930298, "learning_rate": 1.5435535599743425e-06, "loss": 0.8261, "step": 57990 }, { "epoch": 0.7067992638904123, "grad_norm": 2.0057969093322754, "learning_rate": 1.543232841565106e-06, "loss": 0.9721, "step": 57995 }, { "epoch": 0.7068602001145601, "grad_norm": 1.8651124238967896, "learning_rate": 1.5429121231558692e-06, "loss": 0.7625, "step": 58000 }, { "epoch": 0.7069211363387079, "grad_norm": 1.9334523677825928, "learning_rate": 1.5425914047466324e-06, "loss": 0.819, "step": 58005 }, { "epoch": 0.7069820725628557, "grad_norm": 1.9187995195388794, "learning_rate": 1.5422706863373959e-06, "loss": 0.8753, "step": 58010 }, { "epoch": 0.7070430087870035, "grad_norm": 1.8121477365493774, "learning_rate": 1.5419499679281591e-06, "loss": 0.761, "step": 58015 }, { "epoch": 0.7071039450111514, "grad_norm": 1.9297188520431519, "learning_rate": 1.5416292495189226e-06, "loss": 0.8543, "step": 58020 }, { "epoch": 0.7071648812352991, "grad_norm": 2.0204670429229736, "learning_rate": 1.541308531109686e-06, "loss": 0.8599, "step": 58025 }, { "epoch": 0.7072258174594469, "grad_norm": 1.9245940446853638, "learning_rate": 1.540987812700449e-06, "loss": 0.8871, "step": 58030 }, { "epoch": 0.7072867536835947, "grad_norm": 1.8553087711334229, "learning_rate": 1.5406670942912125e-06, "loss": 0.837, "step": 58035 }, { "epoch": 0.7073476899077425, "grad_norm": 1.986572504043579, "learning_rate": 1.5403463758819759e-06, "loss": 0.8251, "step": 58040 }, { "epoch": 0.7074086261318904, "grad_norm": 2.3259739875793457, "learning_rate": 1.5400256574727391e-06, "loss": 0.6797, "step": 58045 }, { "epoch": 0.7074695623560382, "grad_norm": 1.8800182342529297, "learning_rate": 1.5397049390635024e-06, "loss": 0.8412, "step": 58050 }, { "epoch": 0.707530498580186, "grad_norm": 2.02382755279541, "learning_rate": 1.5393842206542656e-06, "loss": 0.8694, "step": 58055 }, { "epoch": 0.7075914348043337, "grad_norm": 2.2405054569244385, "learning_rate": 1.539063502245029e-06, "loss": 0.7966, "step": 58060 }, { "epoch": 0.7076523710284816, "grad_norm": 1.8170678615570068, "learning_rate": 1.5387427838357925e-06, "loss": 0.7961, "step": 58065 }, { "epoch": 0.7077133072526294, "grad_norm": 1.7028732299804688, "learning_rate": 1.5384220654265555e-06, "loss": 0.8819, "step": 58070 }, { "epoch": 0.7077742434767772, "grad_norm": 1.8061890602111816, "learning_rate": 1.538101347017319e-06, "loss": 0.8143, "step": 58075 }, { "epoch": 0.707835179700925, "grad_norm": 1.8929566144943237, "learning_rate": 1.5377806286080824e-06, "loss": 0.8327, "step": 58080 }, { "epoch": 0.7078961159250728, "grad_norm": 2.2033987045288086, "learning_rate": 1.5374599101988456e-06, "loss": 0.8361, "step": 58085 }, { "epoch": 0.7079570521492207, "grad_norm": 2.0486037731170654, "learning_rate": 1.5371391917896088e-06, "loss": 0.7134, "step": 58090 }, { "epoch": 0.7080179883733684, "grad_norm": 2.1435563564300537, "learning_rate": 1.536818473380372e-06, "loss": 0.8992, "step": 58095 }, { "epoch": 0.7080789245975162, "grad_norm": 2.3419740200042725, "learning_rate": 1.5364977549711355e-06, "loss": 0.7923, "step": 58100 }, { "epoch": 0.708139860821664, "grad_norm": 2.4002997875213623, "learning_rate": 1.536177036561899e-06, "loss": 0.8426, "step": 58105 }, { "epoch": 0.7082007970458118, "grad_norm": 1.9367183446884155, "learning_rate": 1.535856318152662e-06, "loss": 0.7408, "step": 58110 }, { "epoch": 0.7082617332699597, "grad_norm": 1.8680980205535889, "learning_rate": 1.5355355997434254e-06, "loss": 0.7568, "step": 58115 }, { "epoch": 0.7083226694941075, "grad_norm": 1.6608134508132935, "learning_rate": 1.5352148813341888e-06, "loss": 0.7973, "step": 58120 }, { "epoch": 0.7083836057182553, "grad_norm": 1.8200736045837402, "learning_rate": 1.534894162924952e-06, "loss": 0.8265, "step": 58125 }, { "epoch": 0.708444541942403, "grad_norm": 1.8588299751281738, "learning_rate": 1.5345734445157153e-06, "loss": 0.7334, "step": 58130 }, { "epoch": 0.7085054781665509, "grad_norm": 2.2856333255767822, "learning_rate": 1.5342527261064785e-06, "loss": 0.7959, "step": 58135 }, { "epoch": 0.7085664143906987, "grad_norm": 1.9506566524505615, "learning_rate": 1.533932007697242e-06, "loss": 0.8, "step": 58140 }, { "epoch": 0.7086273506148465, "grad_norm": 1.910021424293518, "learning_rate": 1.5336112892880054e-06, "loss": 0.8164, "step": 58145 }, { "epoch": 0.7086882868389943, "grad_norm": 2.012831687927246, "learning_rate": 1.5332905708787684e-06, "loss": 0.8507, "step": 58150 }, { "epoch": 0.7087492230631421, "grad_norm": 2.3624911308288574, "learning_rate": 1.5329698524695318e-06, "loss": 0.8465, "step": 58155 }, { "epoch": 0.70881015928729, "grad_norm": 2.532360792160034, "learning_rate": 1.5326491340602953e-06, "loss": 0.7797, "step": 58160 }, { "epoch": 0.7088710955114377, "grad_norm": 2.2532880306243896, "learning_rate": 1.5323284156510585e-06, "loss": 0.8353, "step": 58165 }, { "epoch": 0.7089320317355855, "grad_norm": 1.8169605731964111, "learning_rate": 1.532007697241822e-06, "loss": 0.8408, "step": 58170 }, { "epoch": 0.7089929679597333, "grad_norm": 2.1482937335968018, "learning_rate": 1.531686978832585e-06, "loss": 0.7858, "step": 58175 }, { "epoch": 0.7090539041838811, "grad_norm": 1.755723476409912, "learning_rate": 1.5313662604233484e-06, "loss": 0.8067, "step": 58180 }, { "epoch": 0.709114840408029, "grad_norm": 1.9205143451690674, "learning_rate": 1.5310455420141119e-06, "loss": 0.7653, "step": 58185 }, { "epoch": 0.7091757766321768, "grad_norm": 1.7012908458709717, "learning_rate": 1.5307248236048749e-06, "loss": 0.835, "step": 58190 }, { "epoch": 0.7092367128563246, "grad_norm": 2.0240864753723145, "learning_rate": 1.5304041051956383e-06, "loss": 0.8284, "step": 58195 }, { "epoch": 0.7092976490804723, "grad_norm": 1.8143353462219238, "learning_rate": 1.5300833867864017e-06, "loss": 0.8274, "step": 58200 }, { "epoch": 0.7093585853046201, "grad_norm": 1.770471453666687, "learning_rate": 1.529762668377165e-06, "loss": 0.7933, "step": 58205 }, { "epoch": 0.709419521528768, "grad_norm": 1.8853775262832642, "learning_rate": 1.5294419499679284e-06, "loss": 0.8532, "step": 58210 }, { "epoch": 0.7094804577529158, "grad_norm": 1.9412291049957275, "learning_rate": 1.5291212315586914e-06, "loss": 0.8663, "step": 58215 }, { "epoch": 0.7095413939770636, "grad_norm": 1.8965049982070923, "learning_rate": 1.5288005131494549e-06, "loss": 0.8479, "step": 58220 }, { "epoch": 0.7096023302012114, "grad_norm": 1.7915691137313843, "learning_rate": 1.5284797947402183e-06, "loss": 0.7693, "step": 58225 }, { "epoch": 0.7096632664253593, "grad_norm": 1.9817372560501099, "learning_rate": 1.5281590763309813e-06, "loss": 0.8324, "step": 58230 }, { "epoch": 0.709724202649507, "grad_norm": 2.023423671722412, "learning_rate": 1.5278383579217448e-06, "loss": 0.8049, "step": 58235 }, { "epoch": 0.7097851388736548, "grad_norm": 1.77305269241333, "learning_rate": 1.5275176395125082e-06, "loss": 0.7828, "step": 58240 }, { "epoch": 0.7098460750978026, "grad_norm": 1.90569007396698, "learning_rate": 1.5271969211032714e-06, "loss": 0.8492, "step": 58245 }, { "epoch": 0.7099070113219504, "grad_norm": 2.1420443058013916, "learning_rate": 1.5268762026940349e-06, "loss": 0.7292, "step": 58250 }, { "epoch": 0.7099679475460983, "grad_norm": 1.8830622434616089, "learning_rate": 1.526555484284798e-06, "loss": 0.7984, "step": 58255 }, { "epoch": 0.7100288837702461, "grad_norm": 1.9269436597824097, "learning_rate": 1.5262347658755613e-06, "loss": 0.7852, "step": 58260 }, { "epoch": 0.7100898199943939, "grad_norm": 2.1199562549591064, "learning_rate": 1.5259140474663248e-06, "loss": 0.8419, "step": 58265 }, { "epoch": 0.7101507562185416, "grad_norm": 2.18422269821167, "learning_rate": 1.525593329057088e-06, "loss": 0.8451, "step": 58270 }, { "epoch": 0.7102116924426894, "grad_norm": 1.9863938093185425, "learning_rate": 1.5252726106478512e-06, "loss": 0.8398, "step": 58275 }, { "epoch": 0.7102726286668373, "grad_norm": 2.1403419971466064, "learning_rate": 1.5249518922386147e-06, "loss": 0.8898, "step": 58280 }, { "epoch": 0.7103335648909851, "grad_norm": 1.7422115802764893, "learning_rate": 1.524631173829378e-06, "loss": 0.8064, "step": 58285 }, { "epoch": 0.7103945011151329, "grad_norm": 1.9402642250061035, "learning_rate": 1.5243104554201413e-06, "loss": 0.8553, "step": 58290 }, { "epoch": 0.7104554373392807, "grad_norm": 2.4168784618377686, "learning_rate": 1.5239897370109044e-06, "loss": 0.8787, "step": 58295 }, { "epoch": 0.7105163735634286, "grad_norm": 2.007368326187134, "learning_rate": 1.5236690186016678e-06, "loss": 0.7744, "step": 58300 }, { "epoch": 0.7105773097875763, "grad_norm": 2.1034634113311768, "learning_rate": 1.5233483001924312e-06, "loss": 0.7688, "step": 58305 }, { "epoch": 0.7106382460117241, "grad_norm": 1.9445722103118896, "learning_rate": 1.5230275817831945e-06, "loss": 0.8479, "step": 58310 }, { "epoch": 0.7106991822358719, "grad_norm": 2.2402729988098145, "learning_rate": 1.5227068633739577e-06, "loss": 0.8901, "step": 58315 }, { "epoch": 0.7107601184600197, "grad_norm": 2.316359043121338, "learning_rate": 1.5223861449647211e-06, "loss": 0.8489, "step": 58320 }, { "epoch": 0.7108210546841676, "grad_norm": 2.038912057876587, "learning_rate": 1.5220654265554844e-06, "loss": 0.8612, "step": 58325 }, { "epoch": 0.7108819909083154, "grad_norm": 1.7721548080444336, "learning_rate": 1.5217447081462478e-06, "loss": 0.8988, "step": 58330 }, { "epoch": 0.7109429271324632, "grad_norm": 1.9163821935653687, "learning_rate": 1.5214239897370112e-06, "loss": 0.8552, "step": 58335 }, { "epoch": 0.7110038633566109, "grad_norm": 1.9972399473190308, "learning_rate": 1.5211032713277743e-06, "loss": 0.8807, "step": 58340 }, { "epoch": 0.7110647995807587, "grad_norm": 1.8182520866394043, "learning_rate": 1.5207825529185377e-06, "loss": 0.7937, "step": 58345 }, { "epoch": 0.7111257358049066, "grad_norm": 1.8366655111312866, "learning_rate": 1.520461834509301e-06, "loss": 0.7657, "step": 58350 }, { "epoch": 0.7111866720290544, "grad_norm": 1.76362144947052, "learning_rate": 1.5201411161000642e-06, "loss": 0.8104, "step": 58355 }, { "epoch": 0.7112476082532022, "grad_norm": 1.9591972827911377, "learning_rate": 1.5198203976908276e-06, "loss": 0.822, "step": 58360 }, { "epoch": 0.71130854447735, "grad_norm": 1.979477047920227, "learning_rate": 1.5194996792815908e-06, "loss": 0.7971, "step": 58365 }, { "epoch": 0.7113694807014979, "grad_norm": 1.7222939729690552, "learning_rate": 1.5191789608723543e-06, "loss": 0.898, "step": 58370 }, { "epoch": 0.7114304169256456, "grad_norm": 2.0584890842437744, "learning_rate": 1.5188582424631177e-06, "loss": 0.7794, "step": 58375 }, { "epoch": 0.7114913531497934, "grad_norm": 1.9287121295928955, "learning_rate": 1.5185375240538807e-06, "loss": 0.8461, "step": 58380 }, { "epoch": 0.7115522893739412, "grad_norm": 1.9623982906341553, "learning_rate": 1.5182168056446442e-06, "loss": 0.8002, "step": 58385 }, { "epoch": 0.711613225598089, "grad_norm": 2.1084537506103516, "learning_rate": 1.5178960872354074e-06, "loss": 0.8784, "step": 58390 }, { "epoch": 0.7116741618222369, "grad_norm": 1.814376950263977, "learning_rate": 1.5175753688261708e-06, "loss": 0.7857, "step": 58395 }, { "epoch": 0.7117350980463847, "grad_norm": 2.2541515827178955, "learning_rate": 1.517254650416934e-06, "loss": 0.793, "step": 58400 }, { "epoch": 0.7117960342705325, "grad_norm": 1.9958816766738892, "learning_rate": 1.5169339320076973e-06, "loss": 0.7831, "step": 58405 }, { "epoch": 0.7118569704946802, "grad_norm": 1.9959791898727417, "learning_rate": 1.5166132135984607e-06, "loss": 0.7456, "step": 58410 }, { "epoch": 0.711917906718828, "grad_norm": 2.1611602306365967, "learning_rate": 1.5162924951892242e-06, "loss": 0.9215, "step": 58415 }, { "epoch": 0.7119788429429759, "grad_norm": 2.829868793487549, "learning_rate": 1.5159717767799872e-06, "loss": 0.9286, "step": 58420 }, { "epoch": 0.7120397791671237, "grad_norm": 1.9920941591262817, "learning_rate": 1.5156510583707506e-06, "loss": 0.7802, "step": 58425 }, { "epoch": 0.7121007153912715, "grad_norm": 1.9048234224319458, "learning_rate": 1.5153303399615139e-06, "loss": 0.8494, "step": 58430 }, { "epoch": 0.7121616516154193, "grad_norm": 2.3536083698272705, "learning_rate": 1.5150096215522773e-06, "loss": 0.8211, "step": 58435 }, { "epoch": 0.7122225878395672, "grad_norm": 1.8784501552581787, "learning_rate": 1.5146889031430405e-06, "loss": 0.8771, "step": 58440 }, { "epoch": 0.7122835240637149, "grad_norm": 1.9432368278503418, "learning_rate": 1.5143681847338038e-06, "loss": 0.8235, "step": 58445 }, { "epoch": 0.7123444602878627, "grad_norm": 1.921638011932373, "learning_rate": 1.5140474663245672e-06, "loss": 0.8587, "step": 58450 }, { "epoch": 0.7124053965120105, "grad_norm": 1.8858816623687744, "learning_rate": 1.5137267479153306e-06, "loss": 0.8223, "step": 58455 }, { "epoch": 0.7124663327361583, "grad_norm": 1.7083544731140137, "learning_rate": 1.5134060295060937e-06, "loss": 0.77, "step": 58460 }, { "epoch": 0.7125272689603062, "grad_norm": 2.11828875541687, "learning_rate": 1.513085311096857e-06, "loss": 0.8873, "step": 58465 }, { "epoch": 0.712588205184454, "grad_norm": 2.5933656692504883, "learning_rate": 1.5127645926876203e-06, "loss": 0.8034, "step": 58470 }, { "epoch": 0.7126491414086018, "grad_norm": 1.9237655401229858, "learning_rate": 1.5124438742783838e-06, "loss": 0.7822, "step": 58475 }, { "epoch": 0.7127100776327495, "grad_norm": 1.8962843418121338, "learning_rate": 1.512123155869147e-06, "loss": 0.8595, "step": 58480 }, { "epoch": 0.7127710138568973, "grad_norm": 1.7764822244644165, "learning_rate": 1.5118024374599102e-06, "loss": 0.7946, "step": 58485 }, { "epoch": 0.7128319500810452, "grad_norm": 1.9860666990280151, "learning_rate": 1.5114817190506737e-06, "loss": 0.8861, "step": 58490 }, { "epoch": 0.712892886305193, "grad_norm": 2.104313850402832, "learning_rate": 1.511161000641437e-06, "loss": 0.8358, "step": 58495 }, { "epoch": 0.7129538225293408, "grad_norm": 1.895514726638794, "learning_rate": 1.5108402822322001e-06, "loss": 0.8209, "step": 58500 }, { "epoch": 0.7130147587534886, "grad_norm": 1.9223660230636597, "learning_rate": 1.5105195638229636e-06, "loss": 0.8144, "step": 58505 }, { "epoch": 0.7130756949776365, "grad_norm": 1.798058032989502, "learning_rate": 1.5101988454137268e-06, "loss": 0.7502, "step": 58510 }, { "epoch": 0.7131366312017842, "grad_norm": 2.0157501697540283, "learning_rate": 1.5098781270044902e-06, "loss": 0.8567, "step": 58515 }, { "epoch": 0.713197567425932, "grad_norm": 1.6933356523513794, "learning_rate": 1.5095574085952537e-06, "loss": 0.8376, "step": 58520 }, { "epoch": 0.7132585036500798, "grad_norm": 2.165433168411255, "learning_rate": 1.5092366901860167e-06, "loss": 0.8508, "step": 58525 }, { "epoch": 0.7133194398742276, "grad_norm": 1.8708593845367432, "learning_rate": 1.5089159717767801e-06, "loss": 0.7707, "step": 58530 }, { "epoch": 0.7133803760983755, "grad_norm": 1.73488450050354, "learning_rate": 1.5085952533675436e-06, "loss": 0.733, "step": 58535 }, { "epoch": 0.7134413123225233, "grad_norm": 2.004551887512207, "learning_rate": 1.5082745349583066e-06, "loss": 0.834, "step": 58540 }, { "epoch": 0.7135022485466711, "grad_norm": 1.8612662553787231, "learning_rate": 1.50795381654907e-06, "loss": 0.9044, "step": 58545 }, { "epoch": 0.7135631847708188, "grad_norm": 2.314119338989258, "learning_rate": 1.5076330981398333e-06, "loss": 0.7774, "step": 58550 }, { "epoch": 0.7136241209949666, "grad_norm": 1.8298876285552979, "learning_rate": 1.5073123797305967e-06, "loss": 0.8931, "step": 58555 }, { "epoch": 0.7136850572191145, "grad_norm": 1.8317668437957764, "learning_rate": 1.5069916613213601e-06, "loss": 0.8759, "step": 58560 }, { "epoch": 0.7137459934432623, "grad_norm": 1.8467363119125366, "learning_rate": 1.5066709429121231e-06, "loss": 0.8523, "step": 58565 }, { "epoch": 0.7138069296674101, "grad_norm": 1.715232014656067, "learning_rate": 1.5063502245028866e-06, "loss": 0.7893, "step": 58570 }, { "epoch": 0.7138678658915579, "grad_norm": 1.9821261167526245, "learning_rate": 1.50602950609365e-06, "loss": 0.8627, "step": 58575 }, { "epoch": 0.7139288021157058, "grad_norm": 2.00927472114563, "learning_rate": 1.505708787684413e-06, "loss": 0.7892, "step": 58580 }, { "epoch": 0.7139897383398535, "grad_norm": 2.1074626445770264, "learning_rate": 1.5053880692751765e-06, "loss": 0.762, "step": 58585 }, { "epoch": 0.7140506745640013, "grad_norm": 2.019625186920166, "learning_rate": 1.5050673508659397e-06, "loss": 0.7707, "step": 58590 }, { "epoch": 0.7141116107881491, "grad_norm": 1.8483446836471558, "learning_rate": 1.5047466324567032e-06, "loss": 0.8189, "step": 58595 }, { "epoch": 0.7141725470122969, "grad_norm": 2.057616710662842, "learning_rate": 1.5044259140474666e-06, "loss": 0.8618, "step": 58600 }, { "epoch": 0.7142334832364448, "grad_norm": 2.1141839027404785, "learning_rate": 1.5041051956382296e-06, "loss": 0.7666, "step": 58605 }, { "epoch": 0.7142944194605926, "grad_norm": 2.0148942470550537, "learning_rate": 1.503784477228993e-06, "loss": 0.8009, "step": 58610 }, { "epoch": 0.7143553556847404, "grad_norm": 2.001718521118164, "learning_rate": 1.5034637588197565e-06, "loss": 0.7773, "step": 58615 }, { "epoch": 0.7144162919088881, "grad_norm": 2.063282012939453, "learning_rate": 1.5031430404105197e-06, "loss": 0.9041, "step": 58620 }, { "epoch": 0.7144772281330359, "grad_norm": 2.36087703704834, "learning_rate": 1.502822322001283e-06, "loss": 0.8877, "step": 58625 }, { "epoch": 0.7145381643571838, "grad_norm": 1.917944312095642, "learning_rate": 1.5025016035920464e-06, "loss": 0.7359, "step": 58630 }, { "epoch": 0.7145991005813316, "grad_norm": 2.3099775314331055, "learning_rate": 1.5021808851828096e-06, "loss": 0.8167, "step": 58635 }, { "epoch": 0.7146600368054794, "grad_norm": 2.0927250385284424, "learning_rate": 1.501860166773573e-06, "loss": 0.7934, "step": 58640 }, { "epoch": 0.7147209730296272, "grad_norm": 1.9902000427246094, "learning_rate": 1.501539448364336e-06, "loss": 0.8578, "step": 58645 }, { "epoch": 0.714781909253775, "grad_norm": 1.709976315498352, "learning_rate": 1.5012187299550995e-06, "loss": 0.805, "step": 58650 }, { "epoch": 0.7148428454779228, "grad_norm": 2.3303449153900146, "learning_rate": 1.500898011545863e-06, "loss": 0.8123, "step": 58655 }, { "epoch": 0.7149037817020706, "grad_norm": 1.9959375858306885, "learning_rate": 1.5005772931366262e-06, "loss": 0.8359, "step": 58660 }, { "epoch": 0.7149647179262184, "grad_norm": 2.017832040786743, "learning_rate": 1.5002565747273894e-06, "loss": 0.8271, "step": 58665 }, { "epoch": 0.7150256541503662, "grad_norm": 1.5966262817382812, "learning_rate": 1.4999358563181529e-06, "loss": 0.7643, "step": 58670 }, { "epoch": 0.715086590374514, "grad_norm": 1.8964495658874512, "learning_rate": 1.499615137908916e-06, "loss": 0.7815, "step": 58675 }, { "epoch": 0.7151475265986619, "grad_norm": 2.217296838760376, "learning_rate": 1.4992944194996795e-06, "loss": 0.8241, "step": 58680 }, { "epoch": 0.7152084628228097, "grad_norm": 1.7565797567367554, "learning_rate": 1.4989737010904425e-06, "loss": 0.7955, "step": 58685 }, { "epoch": 0.7152693990469574, "grad_norm": 1.7880967855453491, "learning_rate": 1.498652982681206e-06, "loss": 0.8326, "step": 58690 }, { "epoch": 0.7153303352711052, "grad_norm": 2.725052833557129, "learning_rate": 1.4983322642719694e-06, "loss": 0.8106, "step": 58695 }, { "epoch": 0.7153912714952531, "grad_norm": 1.827135682106018, "learning_rate": 1.4980115458627326e-06, "loss": 0.9094, "step": 58700 }, { "epoch": 0.7154522077194009, "grad_norm": 2.056648015975952, "learning_rate": 1.4976908274534959e-06, "loss": 0.8086, "step": 58705 }, { "epoch": 0.7155131439435487, "grad_norm": 1.5864461660385132, "learning_rate": 1.4973701090442593e-06, "loss": 0.7857, "step": 58710 }, { "epoch": 0.7155740801676965, "grad_norm": 1.9118237495422363, "learning_rate": 1.4970493906350225e-06, "loss": 0.752, "step": 58715 }, { "epoch": 0.7156350163918443, "grad_norm": 1.878096580505371, "learning_rate": 1.496728672225786e-06, "loss": 0.8219, "step": 58720 }, { "epoch": 0.7156959526159921, "grad_norm": 2.3499879837036133, "learning_rate": 1.496407953816549e-06, "loss": 0.8479, "step": 58725 }, { "epoch": 0.7157568888401399, "grad_norm": 1.868427038192749, "learning_rate": 1.4960872354073124e-06, "loss": 0.868, "step": 58730 }, { "epoch": 0.7158178250642877, "grad_norm": 1.871855616569519, "learning_rate": 1.4957665169980759e-06, "loss": 0.8521, "step": 58735 }, { "epoch": 0.7158787612884355, "grad_norm": 2.1404366493225098, "learning_rate": 1.4954457985888391e-06, "loss": 0.8717, "step": 58740 }, { "epoch": 0.7159396975125834, "grad_norm": 1.9223774671554565, "learning_rate": 1.4951250801796026e-06, "loss": 0.8379, "step": 58745 }, { "epoch": 0.7160006337367312, "grad_norm": 2.1635143756866455, "learning_rate": 1.4948043617703658e-06, "loss": 0.8424, "step": 58750 }, { "epoch": 0.716061569960879, "grad_norm": 2.0499398708343506, "learning_rate": 1.494483643361129e-06, "loss": 0.8117, "step": 58755 }, { "epoch": 0.7161225061850267, "grad_norm": 1.8930929899215698, "learning_rate": 1.4941629249518924e-06, "loss": 0.863, "step": 58760 }, { "epoch": 0.7161834424091745, "grad_norm": 1.824416995048523, "learning_rate": 1.4938422065426555e-06, "loss": 0.8031, "step": 58765 }, { "epoch": 0.7162443786333224, "grad_norm": 1.816909909248352, "learning_rate": 1.493521488133419e-06, "loss": 0.8309, "step": 58770 }, { "epoch": 0.7163053148574702, "grad_norm": 2.009984016418457, "learning_rate": 1.4932007697241823e-06, "loss": 0.76, "step": 58775 }, { "epoch": 0.716366251081618, "grad_norm": 1.81658136844635, "learning_rate": 1.4928800513149456e-06, "loss": 0.877, "step": 58780 }, { "epoch": 0.7164271873057658, "grad_norm": 2.996952533721924, "learning_rate": 1.492559332905709e-06, "loss": 0.8445, "step": 58785 }, { "epoch": 0.7164881235299136, "grad_norm": 1.9427213668823242, "learning_rate": 1.4922386144964722e-06, "loss": 0.852, "step": 58790 }, { "epoch": 0.7165490597540614, "grad_norm": 1.9628331661224365, "learning_rate": 1.4919178960872355e-06, "loss": 0.8101, "step": 58795 }, { "epoch": 0.7166099959782092, "grad_norm": 2.103351593017578, "learning_rate": 1.491597177677999e-06, "loss": 0.8796, "step": 58800 }, { "epoch": 0.716670932202357, "grad_norm": 1.9951419830322266, "learning_rate": 1.491276459268762e-06, "loss": 0.7671, "step": 58805 }, { "epoch": 0.7167318684265048, "grad_norm": 1.8040852546691895, "learning_rate": 1.4909557408595254e-06, "loss": 0.8089, "step": 58810 }, { "epoch": 0.7167928046506526, "grad_norm": 2.002925395965576, "learning_rate": 1.4906350224502888e-06, "loss": 0.8194, "step": 58815 }, { "epoch": 0.7168537408748005, "grad_norm": 1.850508213043213, "learning_rate": 1.490314304041052e-06, "loss": 0.8663, "step": 58820 }, { "epoch": 0.7169146770989483, "grad_norm": 1.8281158208847046, "learning_rate": 1.4899935856318155e-06, "loss": 0.7773, "step": 58825 }, { "epoch": 0.716975613323096, "grad_norm": 2.1937692165374756, "learning_rate": 1.4896728672225787e-06, "loss": 0.8375, "step": 58830 }, { "epoch": 0.7170365495472438, "grad_norm": 1.8599436283111572, "learning_rate": 1.489352148813342e-06, "loss": 0.8187, "step": 58835 }, { "epoch": 0.7170974857713917, "grad_norm": 2.0791118144989014, "learning_rate": 1.4890314304041054e-06, "loss": 0.8443, "step": 58840 }, { "epoch": 0.7171584219955395, "grad_norm": 1.7939807176589966, "learning_rate": 1.4887107119948684e-06, "loss": 0.7933, "step": 58845 }, { "epoch": 0.7172193582196873, "grad_norm": 2.126157760620117, "learning_rate": 1.4883899935856318e-06, "loss": 0.7506, "step": 58850 }, { "epoch": 0.7172802944438351, "grad_norm": 1.9822977781295776, "learning_rate": 1.4880692751763953e-06, "loss": 0.8763, "step": 58855 }, { "epoch": 0.7173412306679829, "grad_norm": 1.9847526550292969, "learning_rate": 1.4877485567671585e-06, "loss": 0.7208, "step": 58860 }, { "epoch": 0.7174021668921307, "grad_norm": 2.207725763320923, "learning_rate": 1.487427838357922e-06, "loss": 0.887, "step": 58865 }, { "epoch": 0.7174631031162785, "grad_norm": 2.2090835571289062, "learning_rate": 1.4871071199486854e-06, "loss": 0.8293, "step": 58870 }, { "epoch": 0.7175240393404263, "grad_norm": 2.024928092956543, "learning_rate": 1.4867864015394484e-06, "loss": 0.8018, "step": 58875 }, { "epoch": 0.7175849755645741, "grad_norm": 2.017871379852295, "learning_rate": 1.4864656831302118e-06, "loss": 0.8356, "step": 58880 }, { "epoch": 0.717645911788722, "grad_norm": 2.0482401847839355, "learning_rate": 1.486144964720975e-06, "loss": 0.8292, "step": 58885 }, { "epoch": 0.7177068480128698, "grad_norm": 1.9428590536117554, "learning_rate": 1.4858242463117383e-06, "loss": 0.7818, "step": 58890 }, { "epoch": 0.7177677842370176, "grad_norm": 1.9798847436904907, "learning_rate": 1.4855035279025017e-06, "loss": 0.739, "step": 58895 }, { "epoch": 0.7178287204611653, "grad_norm": 2.1061179637908936, "learning_rate": 1.485182809493265e-06, "loss": 0.7946, "step": 58900 }, { "epoch": 0.7178896566853131, "grad_norm": 1.9034982919692993, "learning_rate": 1.4848620910840284e-06, "loss": 0.8334, "step": 58905 }, { "epoch": 0.717950592909461, "grad_norm": 1.9332174062728882, "learning_rate": 1.4845413726747918e-06, "loss": 0.8328, "step": 58910 }, { "epoch": 0.7180115291336088, "grad_norm": 2.0554282665252686, "learning_rate": 1.4842206542655549e-06, "loss": 0.7148, "step": 58915 }, { "epoch": 0.7180724653577566, "grad_norm": 1.7722082138061523, "learning_rate": 1.4838999358563183e-06, "loss": 0.7421, "step": 58920 }, { "epoch": 0.7181334015819044, "grad_norm": 1.867804765701294, "learning_rate": 1.4835792174470817e-06, "loss": 0.7532, "step": 58925 }, { "epoch": 0.7181943378060522, "grad_norm": 2.2531344890594482, "learning_rate": 1.4832584990378448e-06, "loss": 0.8602, "step": 58930 }, { "epoch": 0.7182552740302, "grad_norm": 1.9846774339675903, "learning_rate": 1.4829377806286082e-06, "loss": 0.8707, "step": 58935 }, { "epoch": 0.7183162102543478, "grad_norm": 2.2402749061584473, "learning_rate": 1.4826170622193714e-06, "loss": 0.769, "step": 58940 }, { "epoch": 0.7183771464784956, "grad_norm": 1.970083236694336, "learning_rate": 1.4822963438101349e-06, "loss": 0.8732, "step": 58945 }, { "epoch": 0.7184380827026434, "grad_norm": 1.9751026630401611, "learning_rate": 1.4819756254008983e-06, "loss": 0.818, "step": 58950 }, { "epoch": 0.7184990189267912, "grad_norm": 2.308927297592163, "learning_rate": 1.4816549069916613e-06, "loss": 0.831, "step": 58955 }, { "epoch": 0.7185599551509391, "grad_norm": 2.019812822341919, "learning_rate": 1.4813341885824248e-06, "loss": 0.7732, "step": 58960 }, { "epoch": 0.7186208913750868, "grad_norm": 1.8106846809387207, "learning_rate": 1.4810134701731882e-06, "loss": 0.7845, "step": 58965 }, { "epoch": 0.7186818275992346, "grad_norm": 2.2689459323883057, "learning_rate": 1.4806927517639514e-06, "loss": 0.776, "step": 58970 }, { "epoch": 0.7187427638233824, "grad_norm": 2.1128389835357666, "learning_rate": 1.4803720333547147e-06, "loss": 0.7951, "step": 58975 }, { "epoch": 0.7188037000475302, "grad_norm": 2.2594618797302246, "learning_rate": 1.4800513149454779e-06, "loss": 0.8641, "step": 58980 }, { "epoch": 0.7188646362716781, "grad_norm": 1.6581138372421265, "learning_rate": 1.4797305965362413e-06, "loss": 0.8462, "step": 58985 }, { "epoch": 0.7189255724958259, "grad_norm": 1.9474809169769287, "learning_rate": 1.4794098781270048e-06, "loss": 0.8332, "step": 58990 }, { "epoch": 0.7189865087199737, "grad_norm": 2.1689398288726807, "learning_rate": 1.4790891597177678e-06, "loss": 0.7885, "step": 58995 }, { "epoch": 0.7190474449441214, "grad_norm": 1.84114408493042, "learning_rate": 1.4787684413085312e-06, "loss": 0.8495, "step": 59000 }, { "epoch": 0.7191083811682693, "grad_norm": 1.9421241283416748, "learning_rate": 1.4784477228992947e-06, "loss": 0.8314, "step": 59005 }, { "epoch": 0.7191693173924171, "grad_norm": 1.9128552675247192, "learning_rate": 1.478127004490058e-06, "loss": 0.7763, "step": 59010 }, { "epoch": 0.7192302536165649, "grad_norm": 1.8281134366989136, "learning_rate": 1.4778062860808211e-06, "loss": 0.8095, "step": 59015 }, { "epoch": 0.7192911898407127, "grad_norm": 2.0466837882995605, "learning_rate": 1.4774855676715844e-06, "loss": 0.7983, "step": 59020 }, { "epoch": 0.7193521260648605, "grad_norm": 2.041598320007324, "learning_rate": 1.4771648492623478e-06, "loss": 0.8121, "step": 59025 }, { "epoch": 0.7194130622890084, "grad_norm": 2.0709660053253174, "learning_rate": 1.4768441308531112e-06, "loss": 0.7882, "step": 59030 }, { "epoch": 0.7194739985131561, "grad_norm": 1.858997106552124, "learning_rate": 1.4765234124438743e-06, "loss": 0.8364, "step": 59035 }, { "epoch": 0.7195349347373039, "grad_norm": 1.7470088005065918, "learning_rate": 1.4762026940346377e-06, "loss": 0.7705, "step": 59040 }, { "epoch": 0.7195958709614517, "grad_norm": 2.166752815246582, "learning_rate": 1.4758819756254011e-06, "loss": 0.833, "step": 59045 }, { "epoch": 0.7196568071855995, "grad_norm": 2.164886713027954, "learning_rate": 1.4755612572161644e-06, "loss": 0.9027, "step": 59050 }, { "epoch": 0.7197177434097474, "grad_norm": 1.9373211860656738, "learning_rate": 1.4752405388069276e-06, "loss": 0.8762, "step": 59055 }, { "epoch": 0.7197786796338952, "grad_norm": 2.0215604305267334, "learning_rate": 1.4749198203976908e-06, "loss": 0.8819, "step": 59060 }, { "epoch": 0.719839615858043, "grad_norm": 1.9527709484100342, "learning_rate": 1.4745991019884543e-06, "loss": 0.8047, "step": 59065 }, { "epoch": 0.7199005520821907, "grad_norm": 1.936309814453125, "learning_rate": 1.4742783835792177e-06, "loss": 0.8297, "step": 59070 }, { "epoch": 0.7199614883063385, "grad_norm": 2.0042431354522705, "learning_rate": 1.4739576651699807e-06, "loss": 0.8094, "step": 59075 }, { "epoch": 0.7200224245304864, "grad_norm": 1.8872267007827759, "learning_rate": 1.4736369467607442e-06, "loss": 0.8264, "step": 59080 }, { "epoch": 0.7200833607546342, "grad_norm": 2.274251699447632, "learning_rate": 1.4733162283515076e-06, "loss": 0.796, "step": 59085 }, { "epoch": 0.720144296978782, "grad_norm": 2.0160067081451416, "learning_rate": 1.4729955099422708e-06, "loss": 0.8196, "step": 59090 }, { "epoch": 0.7202052332029298, "grad_norm": 2.3194797039031982, "learning_rate": 1.4726747915330343e-06, "loss": 0.8613, "step": 59095 }, { "epoch": 0.7202661694270777, "grad_norm": 1.8818941116333008, "learning_rate": 1.4723540731237973e-06, "loss": 0.8176, "step": 59100 }, { "epoch": 0.7203271056512254, "grad_norm": 2.081040382385254, "learning_rate": 1.4720333547145607e-06, "loss": 0.8433, "step": 59105 }, { "epoch": 0.7203880418753732, "grad_norm": 1.5997484922409058, "learning_rate": 1.4717126363053242e-06, "loss": 0.8719, "step": 59110 }, { "epoch": 0.720448978099521, "grad_norm": 1.9029940366744995, "learning_rate": 1.4713919178960872e-06, "loss": 0.7468, "step": 59115 }, { "epoch": 0.7205099143236688, "grad_norm": 1.8007621765136719, "learning_rate": 1.4710711994868506e-06, "loss": 0.7577, "step": 59120 }, { "epoch": 0.7205708505478167, "grad_norm": 1.9810293912887573, "learning_rate": 1.470750481077614e-06, "loss": 0.8033, "step": 59125 }, { "epoch": 0.7206317867719645, "grad_norm": 2.1155946254730225, "learning_rate": 1.4704297626683773e-06, "loss": 0.7666, "step": 59130 }, { "epoch": 0.7206927229961123, "grad_norm": 2.065988302230835, "learning_rate": 1.4701090442591407e-06, "loss": 0.83, "step": 59135 }, { "epoch": 0.72075365922026, "grad_norm": 1.903160572052002, "learning_rate": 1.4697883258499037e-06, "loss": 0.8434, "step": 59140 }, { "epoch": 0.7208145954444078, "grad_norm": 2.0978355407714844, "learning_rate": 1.4694676074406672e-06, "loss": 0.8052, "step": 59145 }, { "epoch": 0.7208755316685557, "grad_norm": 2.077488899230957, "learning_rate": 1.4691468890314306e-06, "loss": 0.8461, "step": 59150 }, { "epoch": 0.7209364678927035, "grad_norm": 2.0293118953704834, "learning_rate": 1.4688261706221936e-06, "loss": 0.8208, "step": 59155 }, { "epoch": 0.7209974041168513, "grad_norm": 1.8660616874694824, "learning_rate": 1.468505452212957e-06, "loss": 0.8512, "step": 59160 }, { "epoch": 0.7210583403409991, "grad_norm": 1.970017671585083, "learning_rate": 1.4681847338037205e-06, "loss": 0.823, "step": 59165 }, { "epoch": 0.721119276565147, "grad_norm": 2.3558428287506104, "learning_rate": 1.4678640153944838e-06, "loss": 0.8439, "step": 59170 }, { "epoch": 0.7211802127892947, "grad_norm": 2.1830742359161377, "learning_rate": 1.4675432969852472e-06, "loss": 0.8468, "step": 59175 }, { "epoch": 0.7212411490134425, "grad_norm": 1.9113199710845947, "learning_rate": 1.4672225785760102e-06, "loss": 0.8225, "step": 59180 }, { "epoch": 0.7213020852375903, "grad_norm": 2.003713607788086, "learning_rate": 1.4669018601667737e-06, "loss": 0.8431, "step": 59185 }, { "epoch": 0.7213630214617381, "grad_norm": 1.9788825511932373, "learning_rate": 1.466581141757537e-06, "loss": 0.7697, "step": 59190 }, { "epoch": 0.721423957685886, "grad_norm": 1.933224081993103, "learning_rate": 1.4662604233483001e-06, "loss": 0.8239, "step": 59195 }, { "epoch": 0.7214848939100338, "grad_norm": 2.3613476753234863, "learning_rate": 1.4659397049390635e-06, "loss": 0.793, "step": 59200 }, { "epoch": 0.7215458301341816, "grad_norm": 1.8247531652450562, "learning_rate": 1.465618986529827e-06, "loss": 0.8079, "step": 59205 }, { "epoch": 0.7216067663583293, "grad_norm": 1.7514843940734863, "learning_rate": 1.4652982681205902e-06, "loss": 0.7641, "step": 59210 }, { "epoch": 0.7216677025824771, "grad_norm": 1.7473253011703491, "learning_rate": 1.4649775497113537e-06, "loss": 0.8033, "step": 59215 }, { "epoch": 0.721728638806625, "grad_norm": 2.005549669265747, "learning_rate": 1.464656831302117e-06, "loss": 0.8001, "step": 59220 }, { "epoch": 0.7217895750307728, "grad_norm": 1.8754408359527588, "learning_rate": 1.4643361128928801e-06, "loss": 0.8347, "step": 59225 }, { "epoch": 0.7218505112549206, "grad_norm": 2.0698208808898926, "learning_rate": 1.4640153944836436e-06, "loss": 0.8016, "step": 59230 }, { "epoch": 0.7219114474790684, "grad_norm": 2.529090166091919, "learning_rate": 1.4636946760744068e-06, "loss": 0.7986, "step": 59235 }, { "epoch": 0.7219723837032163, "grad_norm": 1.790834903717041, "learning_rate": 1.46337395766517e-06, "loss": 0.848, "step": 59240 }, { "epoch": 0.722033319927364, "grad_norm": 2.155909538269043, "learning_rate": 1.4630532392559335e-06, "loss": 0.7963, "step": 59245 }, { "epoch": 0.7220942561515118, "grad_norm": 1.7820854187011719, "learning_rate": 1.4627325208466967e-06, "loss": 0.7944, "step": 59250 }, { "epoch": 0.7221551923756596, "grad_norm": 2.120242118835449, "learning_rate": 1.4624118024374601e-06, "loss": 0.8517, "step": 59255 }, { "epoch": 0.7222161285998074, "grad_norm": 1.955504298210144, "learning_rate": 1.4620910840282236e-06, "loss": 0.8044, "step": 59260 }, { "epoch": 0.7222770648239553, "grad_norm": 1.852545142173767, "learning_rate": 1.4617703656189866e-06, "loss": 0.7958, "step": 59265 }, { "epoch": 0.7223380010481031, "grad_norm": 1.8728549480438232, "learning_rate": 1.46144964720975e-06, "loss": 0.7655, "step": 59270 }, { "epoch": 0.7223989372722509, "grad_norm": 2.7980310916900635, "learning_rate": 1.4611289288005132e-06, "loss": 0.7685, "step": 59275 }, { "epoch": 0.7224598734963986, "grad_norm": 1.7830249071121216, "learning_rate": 1.4608082103912765e-06, "loss": 0.8762, "step": 59280 }, { "epoch": 0.7225208097205464, "grad_norm": 1.78606379032135, "learning_rate": 1.46048749198204e-06, "loss": 0.8151, "step": 59285 }, { "epoch": 0.7225817459446943, "grad_norm": 1.99983549118042, "learning_rate": 1.4601667735728031e-06, "loss": 0.8053, "step": 59290 }, { "epoch": 0.7226426821688421, "grad_norm": 2.321726083755493, "learning_rate": 1.4598460551635666e-06, "loss": 0.8134, "step": 59295 }, { "epoch": 0.7227036183929899, "grad_norm": 1.9719215631484985, "learning_rate": 1.45952533675433e-06, "loss": 0.804, "step": 59300 }, { "epoch": 0.7227645546171377, "grad_norm": 2.4085264205932617, "learning_rate": 1.459204618345093e-06, "loss": 0.8955, "step": 59305 }, { "epoch": 0.7228254908412856, "grad_norm": 2.0728132724761963, "learning_rate": 1.4588838999358565e-06, "loss": 0.8262, "step": 59310 }, { "epoch": 0.7228864270654333, "grad_norm": 1.91020667552948, "learning_rate": 1.4585631815266197e-06, "loss": 0.8299, "step": 59315 }, { "epoch": 0.7229473632895811, "grad_norm": 1.5345369577407837, "learning_rate": 1.4582424631173832e-06, "loss": 0.8005, "step": 59320 }, { "epoch": 0.7230082995137289, "grad_norm": 1.9202542304992676, "learning_rate": 1.4579217447081464e-06, "loss": 0.8204, "step": 59325 }, { "epoch": 0.7230692357378767, "grad_norm": 1.8712900876998901, "learning_rate": 1.4576010262989096e-06, "loss": 0.7803, "step": 59330 }, { "epoch": 0.7231301719620246, "grad_norm": 1.9866645336151123, "learning_rate": 1.457280307889673e-06, "loss": 0.8127, "step": 59335 }, { "epoch": 0.7231911081861724, "grad_norm": 1.8639171123504639, "learning_rate": 1.4569595894804365e-06, "loss": 0.8177, "step": 59340 }, { "epoch": 0.7232520444103202, "grad_norm": 1.8766835927963257, "learning_rate": 1.4566388710711995e-06, "loss": 0.8624, "step": 59345 }, { "epoch": 0.7233129806344679, "grad_norm": 1.7222938537597656, "learning_rate": 1.456318152661963e-06, "loss": 0.7631, "step": 59350 }, { "epoch": 0.7233739168586157, "grad_norm": 1.9887821674346924, "learning_rate": 1.4559974342527262e-06, "loss": 0.8941, "step": 59355 }, { "epoch": 0.7234348530827636, "grad_norm": 2.2464962005615234, "learning_rate": 1.4556767158434896e-06, "loss": 0.771, "step": 59360 }, { "epoch": 0.7234957893069114, "grad_norm": 1.9313026666641235, "learning_rate": 1.4553559974342528e-06, "loss": 0.8643, "step": 59365 }, { "epoch": 0.7235567255310592, "grad_norm": 2.0568606853485107, "learning_rate": 1.455035279025016e-06, "loss": 0.8493, "step": 59370 }, { "epoch": 0.723617661755207, "grad_norm": 1.8036669492721558, "learning_rate": 1.4547145606157795e-06, "loss": 0.8494, "step": 59375 }, { "epoch": 0.7236785979793549, "grad_norm": 2.3746166229248047, "learning_rate": 1.454393842206543e-06, "loss": 0.8719, "step": 59380 }, { "epoch": 0.7237395342035026, "grad_norm": 1.7817647457122803, "learning_rate": 1.454073123797306e-06, "loss": 0.7728, "step": 59385 }, { "epoch": 0.7238004704276504, "grad_norm": 2.246886730194092, "learning_rate": 1.4537524053880694e-06, "loss": 0.7563, "step": 59390 }, { "epoch": 0.7238614066517982, "grad_norm": 1.782477855682373, "learning_rate": 1.4534316869788326e-06, "loss": 0.8458, "step": 59395 }, { "epoch": 0.723922342875946, "grad_norm": 1.9759811162948608, "learning_rate": 1.453110968569596e-06, "loss": 0.9015, "step": 59400 }, { "epoch": 0.7239832791000939, "grad_norm": 1.8554115295410156, "learning_rate": 1.4527902501603593e-06, "loss": 0.8167, "step": 59405 }, { "epoch": 0.7240442153242417, "grad_norm": 1.6850073337554932, "learning_rate": 1.4524695317511225e-06, "loss": 0.8013, "step": 59410 }, { "epoch": 0.7241051515483895, "grad_norm": 1.8232141733169556, "learning_rate": 1.452148813341886e-06, "loss": 0.7798, "step": 59415 }, { "epoch": 0.7241660877725372, "grad_norm": 2.0094826221466064, "learning_rate": 1.4518280949326494e-06, "loss": 0.723, "step": 59420 }, { "epoch": 0.724227023996685, "grad_norm": 2.02284836769104, "learning_rate": 1.4515073765234124e-06, "loss": 0.7946, "step": 59425 }, { "epoch": 0.7242879602208329, "grad_norm": 2.20515775680542, "learning_rate": 1.4511866581141759e-06, "loss": 0.8275, "step": 59430 }, { "epoch": 0.7243488964449807, "grad_norm": 1.6649972200393677, "learning_rate": 1.450865939704939e-06, "loss": 0.7834, "step": 59435 }, { "epoch": 0.7244098326691285, "grad_norm": 1.9424946308135986, "learning_rate": 1.4505452212957025e-06, "loss": 0.8492, "step": 59440 }, { "epoch": 0.7244707688932763, "grad_norm": 1.9904826879501343, "learning_rate": 1.450224502886466e-06, "loss": 0.8536, "step": 59445 }, { "epoch": 0.7245317051174242, "grad_norm": 1.8846246004104614, "learning_rate": 1.449903784477229e-06, "loss": 0.8548, "step": 59450 }, { "epoch": 0.7245926413415719, "grad_norm": 2.2531511783599854, "learning_rate": 1.4495830660679924e-06, "loss": 0.8168, "step": 59455 }, { "epoch": 0.7246535775657197, "grad_norm": 2.471942901611328, "learning_rate": 1.4492623476587559e-06, "loss": 0.8282, "step": 59460 }, { "epoch": 0.7247145137898675, "grad_norm": 1.741669774055481, "learning_rate": 1.448941629249519e-06, "loss": 0.8653, "step": 59465 }, { "epoch": 0.7247754500140153, "grad_norm": 1.8228293657302856, "learning_rate": 1.4486209108402823e-06, "loss": 0.7522, "step": 59470 }, { "epoch": 0.7248363862381632, "grad_norm": 1.8293156623840332, "learning_rate": 1.4483001924310456e-06, "loss": 0.7917, "step": 59475 }, { "epoch": 0.724897322462311, "grad_norm": 2.4848735332489014, "learning_rate": 1.447979474021809e-06, "loss": 0.8547, "step": 59480 }, { "epoch": 0.7249582586864588, "grad_norm": 1.963159203529358, "learning_rate": 1.4476587556125724e-06, "loss": 0.7639, "step": 59485 }, { "epoch": 0.7250191949106065, "grad_norm": 2.058626413345337, "learning_rate": 1.4473380372033355e-06, "loss": 0.8484, "step": 59490 }, { "epoch": 0.7250801311347543, "grad_norm": 1.7903066873550415, "learning_rate": 1.447017318794099e-06, "loss": 0.875, "step": 59495 }, { "epoch": 0.7251410673589022, "grad_norm": 1.8661420345306396, "learning_rate": 1.4466966003848623e-06, "loss": 0.7911, "step": 59500 }, { "epoch": 0.72520200358305, "grad_norm": 2.173542022705078, "learning_rate": 1.4463758819756254e-06, "loss": 0.8632, "step": 59505 }, { "epoch": 0.7252629398071978, "grad_norm": 1.896871566772461, "learning_rate": 1.4460551635663888e-06, "loss": 0.8305, "step": 59510 }, { "epoch": 0.7253238760313456, "grad_norm": 2.0828442573547363, "learning_rate": 1.4457344451571522e-06, "loss": 0.8866, "step": 59515 }, { "epoch": 0.7253848122554934, "grad_norm": 2.0102832317352295, "learning_rate": 1.4454137267479155e-06, "loss": 0.8402, "step": 59520 }, { "epoch": 0.7254457484796412, "grad_norm": 1.9127920866012573, "learning_rate": 1.445093008338679e-06, "loss": 0.8376, "step": 59525 }, { "epoch": 0.725506684703789, "grad_norm": 1.8820363283157349, "learning_rate": 1.444772289929442e-06, "loss": 0.8404, "step": 59530 }, { "epoch": 0.7255676209279368, "grad_norm": 2.136394739151001, "learning_rate": 1.4444515715202054e-06, "loss": 0.8367, "step": 59535 }, { "epoch": 0.7256285571520846, "grad_norm": 1.8412748575210571, "learning_rate": 1.4441308531109688e-06, "loss": 0.795, "step": 59540 }, { "epoch": 0.7256894933762325, "grad_norm": 1.8240361213684082, "learning_rate": 1.4438101347017318e-06, "loss": 0.8726, "step": 59545 }, { "epoch": 0.7257504296003803, "grad_norm": 1.939745545387268, "learning_rate": 1.4434894162924953e-06, "loss": 0.8284, "step": 59550 }, { "epoch": 0.7258113658245281, "grad_norm": 2.0327024459838867, "learning_rate": 1.4431686978832587e-06, "loss": 0.8158, "step": 59555 }, { "epoch": 0.7258723020486758, "grad_norm": 2.154383897781372, "learning_rate": 1.442847979474022e-06, "loss": 0.8643, "step": 59560 }, { "epoch": 0.7259332382728236, "grad_norm": 1.676557183265686, "learning_rate": 1.4425272610647854e-06, "loss": 0.8591, "step": 59565 }, { "epoch": 0.7259941744969715, "grad_norm": 1.7594308853149414, "learning_rate": 1.4422065426555484e-06, "loss": 0.7885, "step": 59570 }, { "epoch": 0.7260551107211193, "grad_norm": 2.1408586502075195, "learning_rate": 1.4418858242463118e-06, "loss": 0.8154, "step": 59575 }, { "epoch": 0.7261160469452671, "grad_norm": 1.8375025987625122, "learning_rate": 1.4415651058370753e-06, "loss": 0.8121, "step": 59580 }, { "epoch": 0.7261769831694149, "grad_norm": 1.8316962718963623, "learning_rate": 1.4412443874278385e-06, "loss": 0.8264, "step": 59585 }, { "epoch": 0.7262379193935627, "grad_norm": 2.1700563430786133, "learning_rate": 1.4409236690186017e-06, "loss": 0.8466, "step": 59590 }, { "epoch": 0.7262988556177105, "grad_norm": 1.79660964012146, "learning_rate": 1.4406029506093652e-06, "loss": 0.8435, "step": 59595 }, { "epoch": 0.7263597918418583, "grad_norm": 1.9023422002792358, "learning_rate": 1.4402822322001284e-06, "loss": 0.8116, "step": 59600 }, { "epoch": 0.7264207280660061, "grad_norm": 2.635218858718872, "learning_rate": 1.4399615137908918e-06, "loss": 0.8357, "step": 59605 }, { "epoch": 0.7264816642901539, "grad_norm": 1.7024776935577393, "learning_rate": 1.4396407953816549e-06, "loss": 0.8224, "step": 59610 }, { "epoch": 0.7265426005143018, "grad_norm": 2.0961971282958984, "learning_rate": 1.4393200769724183e-06, "loss": 0.7522, "step": 59615 }, { "epoch": 0.7266035367384496, "grad_norm": 1.8943281173706055, "learning_rate": 1.4389993585631817e-06, "loss": 0.7963, "step": 59620 }, { "epoch": 0.7266644729625974, "grad_norm": 2.046818256378174, "learning_rate": 1.438678640153945e-06, "loss": 0.7784, "step": 59625 }, { "epoch": 0.7267254091867451, "grad_norm": 2.122563123703003, "learning_rate": 1.4383579217447082e-06, "loss": 0.8136, "step": 59630 }, { "epoch": 0.7267863454108929, "grad_norm": 1.9963127374649048, "learning_rate": 1.4380372033354716e-06, "loss": 0.7695, "step": 59635 }, { "epoch": 0.7268472816350408, "grad_norm": 1.9608416557312012, "learning_rate": 1.4377164849262349e-06, "loss": 0.7954, "step": 59640 }, { "epoch": 0.7269082178591886, "grad_norm": 1.9205214977264404, "learning_rate": 1.4373957665169983e-06, "loss": 0.7986, "step": 59645 }, { "epoch": 0.7269691540833364, "grad_norm": 1.9659596681594849, "learning_rate": 1.4370750481077613e-06, "loss": 0.8118, "step": 59650 }, { "epoch": 0.7270300903074842, "grad_norm": 1.8496190309524536, "learning_rate": 1.4367543296985248e-06, "loss": 0.8125, "step": 59655 }, { "epoch": 0.727091026531632, "grad_norm": 1.6624407768249512, "learning_rate": 1.4364336112892882e-06, "loss": 0.8244, "step": 59660 }, { "epoch": 0.7271519627557798, "grad_norm": 2.4695422649383545, "learning_rate": 1.4361128928800514e-06, "loss": 0.8308, "step": 59665 }, { "epoch": 0.7272128989799276, "grad_norm": 1.8333656787872314, "learning_rate": 1.4357921744708147e-06, "loss": 0.7959, "step": 59670 }, { "epoch": 0.7272738352040754, "grad_norm": 1.7671750783920288, "learning_rate": 1.435471456061578e-06, "loss": 0.8047, "step": 59675 }, { "epoch": 0.7273347714282232, "grad_norm": 2.041048526763916, "learning_rate": 1.4351507376523413e-06, "loss": 0.806, "step": 59680 }, { "epoch": 0.727395707652371, "grad_norm": 3.168989896774292, "learning_rate": 1.4348300192431048e-06, "loss": 0.8691, "step": 59685 }, { "epoch": 0.7274566438765189, "grad_norm": 1.9908390045166016, "learning_rate": 1.4345093008338678e-06, "loss": 0.8526, "step": 59690 }, { "epoch": 0.7275175801006667, "grad_norm": 2.1931943893432617, "learning_rate": 1.4341885824246312e-06, "loss": 0.7823, "step": 59695 }, { "epoch": 0.7275785163248144, "grad_norm": 2.029318332672119, "learning_rate": 1.4338678640153947e-06, "loss": 0.8025, "step": 59700 }, { "epoch": 0.7276394525489622, "grad_norm": 1.836298942565918, "learning_rate": 1.4335471456061579e-06, "loss": 0.8106, "step": 59705 }, { "epoch": 0.72770038877311, "grad_norm": 1.8379782438278198, "learning_rate": 1.4332264271969213e-06, "loss": 0.7872, "step": 59710 }, { "epoch": 0.7277613249972579, "grad_norm": 1.799187183380127, "learning_rate": 1.4329057087876846e-06, "loss": 0.7723, "step": 59715 }, { "epoch": 0.7278222612214057, "grad_norm": 2.0905566215515137, "learning_rate": 1.4325849903784478e-06, "loss": 0.7794, "step": 59720 }, { "epoch": 0.7278831974455535, "grad_norm": 1.975927472114563, "learning_rate": 1.4322642719692112e-06, "loss": 0.7909, "step": 59725 }, { "epoch": 0.7279441336697013, "grad_norm": 1.935433030128479, "learning_rate": 1.4319435535599742e-06, "loss": 0.794, "step": 59730 }, { "epoch": 0.728005069893849, "grad_norm": 1.642286777496338, "learning_rate": 1.4316228351507377e-06, "loss": 0.79, "step": 59735 }, { "epoch": 0.7280660061179969, "grad_norm": 2.0150468349456787, "learning_rate": 1.4313021167415011e-06, "loss": 0.8371, "step": 59740 }, { "epoch": 0.7281269423421447, "grad_norm": 2.0405938625335693, "learning_rate": 1.4309813983322644e-06, "loss": 0.7614, "step": 59745 }, { "epoch": 0.7281878785662925, "grad_norm": 1.8465039730072021, "learning_rate": 1.4306606799230278e-06, "loss": 0.7437, "step": 59750 }, { "epoch": 0.7282488147904403, "grad_norm": 1.7006468772888184, "learning_rate": 1.430339961513791e-06, "loss": 0.8279, "step": 59755 }, { "epoch": 0.7283097510145882, "grad_norm": 1.8396393060684204, "learning_rate": 1.4300192431045542e-06, "loss": 0.7846, "step": 59760 }, { "epoch": 0.728370687238736, "grad_norm": 1.8866676092147827, "learning_rate": 1.4296985246953177e-06, "loss": 0.8431, "step": 59765 }, { "epoch": 0.7284316234628837, "grad_norm": 1.9124575853347778, "learning_rate": 1.4293778062860807e-06, "loss": 0.8498, "step": 59770 }, { "epoch": 0.7284925596870315, "grad_norm": 1.9824838638305664, "learning_rate": 1.4290570878768441e-06, "loss": 0.8856, "step": 59775 }, { "epoch": 0.7285534959111793, "grad_norm": 1.833173155784607, "learning_rate": 1.4287363694676076e-06, "loss": 0.8659, "step": 59780 }, { "epoch": 0.7286144321353272, "grad_norm": 2.3506808280944824, "learning_rate": 1.4284156510583708e-06, "loss": 0.783, "step": 59785 }, { "epoch": 0.728675368359475, "grad_norm": 2.184565305709839, "learning_rate": 1.4280949326491343e-06, "loss": 0.8477, "step": 59790 }, { "epoch": 0.7287363045836228, "grad_norm": 2.0817360877990723, "learning_rate": 1.4277742142398977e-06, "loss": 0.8212, "step": 59795 }, { "epoch": 0.7287972408077706, "grad_norm": 2.3319666385650635, "learning_rate": 1.4274534958306607e-06, "loss": 0.8138, "step": 59800 }, { "epoch": 0.7288581770319184, "grad_norm": 1.7808914184570312, "learning_rate": 1.4271327774214242e-06, "loss": 0.8673, "step": 59805 }, { "epoch": 0.7289191132560662, "grad_norm": 1.7087076902389526, "learning_rate": 1.4268120590121874e-06, "loss": 0.7796, "step": 59810 }, { "epoch": 0.728980049480214, "grad_norm": 1.9632083177566528, "learning_rate": 1.4264913406029506e-06, "loss": 0.8369, "step": 59815 }, { "epoch": 0.7290409857043618, "grad_norm": 2.3376920223236084, "learning_rate": 1.426170622193714e-06, "loss": 0.7897, "step": 59820 }, { "epoch": 0.7291019219285096, "grad_norm": 1.7745786905288696, "learning_rate": 1.4258499037844773e-06, "loss": 0.8452, "step": 59825 }, { "epoch": 0.7291628581526575, "grad_norm": 2.40535044670105, "learning_rate": 1.4255291853752407e-06, "loss": 0.8425, "step": 59830 }, { "epoch": 0.7292237943768053, "grad_norm": 1.9247782230377197, "learning_rate": 1.4252084669660042e-06, "loss": 0.8713, "step": 59835 }, { "epoch": 0.729284730600953, "grad_norm": 1.9662235975265503, "learning_rate": 1.4248877485567672e-06, "loss": 0.8118, "step": 59840 }, { "epoch": 0.7293456668251008, "grad_norm": 1.8228431940078735, "learning_rate": 1.4245670301475306e-06, "loss": 0.8148, "step": 59845 }, { "epoch": 0.7294066030492486, "grad_norm": 2.021702289581299, "learning_rate": 1.424246311738294e-06, "loss": 0.8438, "step": 59850 }, { "epoch": 0.7294675392733965, "grad_norm": 1.949718713760376, "learning_rate": 1.423925593329057e-06, "loss": 0.7798, "step": 59855 }, { "epoch": 0.7295284754975443, "grad_norm": 2.088412046432495, "learning_rate": 1.4236048749198205e-06, "loss": 0.8811, "step": 59860 }, { "epoch": 0.7295894117216921, "grad_norm": 1.8825626373291016, "learning_rate": 1.4232841565105837e-06, "loss": 0.8969, "step": 59865 }, { "epoch": 0.7296503479458399, "grad_norm": 1.813862919807434, "learning_rate": 1.4229634381013472e-06, "loss": 0.9069, "step": 59870 }, { "epoch": 0.7297112841699877, "grad_norm": 1.719172716140747, "learning_rate": 1.4226427196921106e-06, "loss": 0.9058, "step": 59875 }, { "epoch": 0.7297722203941355, "grad_norm": 1.8184137344360352, "learning_rate": 1.4223220012828736e-06, "loss": 0.7721, "step": 59880 }, { "epoch": 0.7298331566182833, "grad_norm": 1.931000828742981, "learning_rate": 1.422001282873637e-06, "loss": 0.7936, "step": 59885 }, { "epoch": 0.7298940928424311, "grad_norm": 2.0183205604553223, "learning_rate": 1.4216805644644005e-06, "loss": 0.8538, "step": 59890 }, { "epoch": 0.7299550290665789, "grad_norm": 1.975215196609497, "learning_rate": 1.4213598460551635e-06, "loss": 0.8361, "step": 59895 }, { "epoch": 0.7300159652907268, "grad_norm": 1.8755377531051636, "learning_rate": 1.421039127645927e-06, "loss": 0.7954, "step": 59900 }, { "epoch": 0.7300769015148746, "grad_norm": 1.8903270959854126, "learning_rate": 1.4207184092366902e-06, "loss": 0.7989, "step": 59905 }, { "epoch": 0.7301378377390223, "grad_norm": 1.7620303630828857, "learning_rate": 1.4203976908274536e-06, "loss": 0.8038, "step": 59910 }, { "epoch": 0.7301987739631701, "grad_norm": 2.185513496398926, "learning_rate": 1.420076972418217e-06, "loss": 0.8162, "step": 59915 }, { "epoch": 0.730259710187318, "grad_norm": 1.742042899131775, "learning_rate": 1.41975625400898e-06, "loss": 0.7985, "step": 59920 }, { "epoch": 0.7303206464114658, "grad_norm": 1.9457637071609497, "learning_rate": 1.4194355355997435e-06, "loss": 0.8174, "step": 59925 }, { "epoch": 0.7303815826356136, "grad_norm": 1.9402523040771484, "learning_rate": 1.419114817190507e-06, "loss": 0.9077, "step": 59930 }, { "epoch": 0.7304425188597614, "grad_norm": 1.9037525653839111, "learning_rate": 1.4187940987812702e-06, "loss": 0.8211, "step": 59935 }, { "epoch": 0.7305034550839091, "grad_norm": 2.2023088932037354, "learning_rate": 1.4184733803720334e-06, "loss": 0.8042, "step": 59940 }, { "epoch": 0.730564391308057, "grad_norm": 2.083503484725952, "learning_rate": 1.4181526619627967e-06, "loss": 0.7943, "step": 59945 }, { "epoch": 0.7306253275322048, "grad_norm": 1.9921777248382568, "learning_rate": 1.4178319435535601e-06, "loss": 0.8254, "step": 59950 }, { "epoch": 0.7306862637563526, "grad_norm": 1.8862102031707764, "learning_rate": 1.4175112251443236e-06, "loss": 0.7782, "step": 59955 }, { "epoch": 0.7307471999805004, "grad_norm": 1.6842721700668335, "learning_rate": 1.4171905067350866e-06, "loss": 0.8215, "step": 59960 }, { "epoch": 0.7308081362046482, "grad_norm": 2.163421869277954, "learning_rate": 1.41686978832585e-06, "loss": 0.8372, "step": 59965 }, { "epoch": 0.7308690724287961, "grad_norm": 2.252777576446533, "learning_rate": 1.4165490699166134e-06, "loss": 0.8931, "step": 59970 }, { "epoch": 0.7309300086529438, "grad_norm": 1.971985101699829, "learning_rate": 1.4162283515073767e-06, "loss": 0.7947, "step": 59975 }, { "epoch": 0.7309909448770916, "grad_norm": 1.8599399328231812, "learning_rate": 1.41590763309814e-06, "loss": 0.8758, "step": 59980 }, { "epoch": 0.7310518811012394, "grad_norm": 2.169597864151001, "learning_rate": 1.4155869146889031e-06, "loss": 0.8218, "step": 59985 }, { "epoch": 0.7311128173253872, "grad_norm": 1.893736481666565, "learning_rate": 1.4152661962796666e-06, "loss": 0.8842, "step": 59990 }, { "epoch": 0.7311737535495351, "grad_norm": 2.4712324142456055, "learning_rate": 1.41494547787043e-06, "loss": 0.8432, "step": 59995 }, { "epoch": 0.7312346897736829, "grad_norm": 2.000084400177002, "learning_rate": 1.414624759461193e-06, "loss": 0.8417, "step": 60000 }, { "epoch": 0.7312956259978307, "grad_norm": 1.9712305068969727, "learning_rate": 1.4143040410519565e-06, "loss": 0.8318, "step": 60005 }, { "epoch": 0.7313565622219784, "grad_norm": 1.7615785598754883, "learning_rate": 1.41398332264272e-06, "loss": 0.8376, "step": 60010 }, { "epoch": 0.7314174984461262, "grad_norm": 2.070666551589966, "learning_rate": 1.4136626042334831e-06, "loss": 0.8327, "step": 60015 }, { "epoch": 0.7314784346702741, "grad_norm": 1.8597913980484009, "learning_rate": 1.4133418858242464e-06, "loss": 0.7917, "step": 60020 }, { "epoch": 0.7315393708944219, "grad_norm": 2.151432514190674, "learning_rate": 1.4130211674150096e-06, "loss": 0.7567, "step": 60025 }, { "epoch": 0.7316003071185697, "grad_norm": 2.1313185691833496, "learning_rate": 1.412700449005773e-06, "loss": 0.8719, "step": 60030 }, { "epoch": 0.7316612433427175, "grad_norm": 2.635141611099243, "learning_rate": 1.4123797305965365e-06, "loss": 0.8711, "step": 60035 }, { "epoch": 0.7317221795668654, "grad_norm": 1.9202868938446045, "learning_rate": 1.4120590121872995e-06, "loss": 0.7942, "step": 60040 }, { "epoch": 0.7317831157910131, "grad_norm": 1.798466682434082, "learning_rate": 1.411738293778063e-06, "loss": 0.7544, "step": 60045 }, { "epoch": 0.7318440520151609, "grad_norm": 1.837325930595398, "learning_rate": 1.4114175753688264e-06, "loss": 0.8368, "step": 60050 }, { "epoch": 0.7319049882393087, "grad_norm": 2.0546131134033203, "learning_rate": 1.4110968569595896e-06, "loss": 0.8246, "step": 60055 }, { "epoch": 0.7319659244634565, "grad_norm": 2.0328493118286133, "learning_rate": 1.410776138550353e-06, "loss": 0.8443, "step": 60060 }, { "epoch": 0.7320268606876044, "grad_norm": 1.792251706123352, "learning_rate": 1.410455420141116e-06, "loss": 0.8431, "step": 60065 }, { "epoch": 0.7320877969117522, "grad_norm": 1.937503695487976, "learning_rate": 1.4101347017318795e-06, "loss": 0.7842, "step": 60070 }, { "epoch": 0.7321487331359, "grad_norm": 1.875333547592163, "learning_rate": 1.409813983322643e-06, "loss": 0.737, "step": 60075 }, { "epoch": 0.7322096693600477, "grad_norm": 2.0131924152374268, "learning_rate": 1.409493264913406e-06, "loss": 0.7765, "step": 60080 }, { "epoch": 0.7322706055841955, "grad_norm": 2.12199068069458, "learning_rate": 1.4091725465041694e-06, "loss": 0.7373, "step": 60085 }, { "epoch": 0.7323315418083434, "grad_norm": 2.4243783950805664, "learning_rate": 1.4088518280949328e-06, "loss": 0.8297, "step": 60090 }, { "epoch": 0.7323924780324912, "grad_norm": 1.858393907546997, "learning_rate": 1.408531109685696e-06, "loss": 0.8536, "step": 60095 }, { "epoch": 0.732453414256639, "grad_norm": 2.0320520401000977, "learning_rate": 1.4082103912764595e-06, "loss": 0.7974, "step": 60100 }, { "epoch": 0.7325143504807868, "grad_norm": 2.0566813945770264, "learning_rate": 1.4078896728672225e-06, "loss": 0.8367, "step": 60105 }, { "epoch": 0.7325752867049347, "grad_norm": 1.945123553276062, "learning_rate": 1.407568954457986e-06, "loss": 0.8064, "step": 60110 }, { "epoch": 0.7326362229290824, "grad_norm": 1.9292240142822266, "learning_rate": 1.4072482360487494e-06, "loss": 0.7574, "step": 60115 }, { "epoch": 0.7326971591532302, "grad_norm": 1.9583137035369873, "learning_rate": 1.4069275176395124e-06, "loss": 0.7548, "step": 60120 }, { "epoch": 0.732758095377378, "grad_norm": 1.9471328258514404, "learning_rate": 1.4066067992302759e-06, "loss": 0.7571, "step": 60125 }, { "epoch": 0.7328190316015258, "grad_norm": 2.1944098472595215, "learning_rate": 1.4062860808210393e-06, "loss": 0.7323, "step": 60130 }, { "epoch": 0.7328799678256737, "grad_norm": 1.777182936668396, "learning_rate": 1.4059653624118025e-06, "loss": 0.8041, "step": 60135 }, { "epoch": 0.7329409040498215, "grad_norm": 2.2956533432006836, "learning_rate": 1.405644644002566e-06, "loss": 0.8721, "step": 60140 }, { "epoch": 0.7330018402739693, "grad_norm": 1.9084786176681519, "learning_rate": 1.4053239255933294e-06, "loss": 0.788, "step": 60145 }, { "epoch": 0.733062776498117, "grad_norm": 1.996117115020752, "learning_rate": 1.4050032071840924e-06, "loss": 0.8512, "step": 60150 }, { "epoch": 0.7331237127222648, "grad_norm": 1.9651516675949097, "learning_rate": 1.4046824887748559e-06, "loss": 0.8487, "step": 60155 }, { "epoch": 0.7331846489464127, "grad_norm": 1.9609308242797852, "learning_rate": 1.404361770365619e-06, "loss": 0.7994, "step": 60160 }, { "epoch": 0.7332455851705605, "grad_norm": 2.5488743782043457, "learning_rate": 1.4040410519563823e-06, "loss": 0.7831, "step": 60165 }, { "epoch": 0.7333065213947083, "grad_norm": 1.9246968030929565, "learning_rate": 1.4037203335471458e-06, "loss": 0.8991, "step": 60170 }, { "epoch": 0.7333674576188561, "grad_norm": 1.7661125659942627, "learning_rate": 1.403399615137909e-06, "loss": 0.838, "step": 60175 }, { "epoch": 0.733428393843004, "grad_norm": 2.1684296131134033, "learning_rate": 1.4030788967286724e-06, "loss": 0.9701, "step": 60180 }, { "epoch": 0.7334893300671517, "grad_norm": 1.9793485403060913, "learning_rate": 1.4027581783194359e-06, "loss": 0.8472, "step": 60185 }, { "epoch": 0.7335502662912995, "grad_norm": 1.8180277347564697, "learning_rate": 1.4024374599101989e-06, "loss": 0.837, "step": 60190 }, { "epoch": 0.7336112025154473, "grad_norm": 1.7405383586883545, "learning_rate": 1.4021167415009623e-06, "loss": 0.7476, "step": 60195 }, { "epoch": 0.7336721387395951, "grad_norm": 2.0116958618164062, "learning_rate": 1.4017960230917256e-06, "loss": 0.8318, "step": 60200 }, { "epoch": 0.733733074963743, "grad_norm": 2.077918529510498, "learning_rate": 1.4014753046824888e-06, "loss": 0.8516, "step": 60205 }, { "epoch": 0.7337940111878908, "grad_norm": 2.2797017097473145, "learning_rate": 1.4011545862732522e-06, "loss": 0.774, "step": 60210 }, { "epoch": 0.7338549474120386, "grad_norm": 2.13273024559021, "learning_rate": 1.4008338678640155e-06, "loss": 0.7993, "step": 60215 }, { "epoch": 0.7339158836361863, "grad_norm": 2.005120038986206, "learning_rate": 1.400513149454779e-06, "loss": 0.8002, "step": 60220 }, { "epoch": 0.7339768198603341, "grad_norm": 1.8635783195495605, "learning_rate": 1.4001924310455423e-06, "loss": 0.8367, "step": 60225 }, { "epoch": 0.734037756084482, "grad_norm": 2.030118227005005, "learning_rate": 1.3998717126363054e-06, "loss": 0.8257, "step": 60230 }, { "epoch": 0.7340986923086298, "grad_norm": 1.6903098821640015, "learning_rate": 1.3995509942270688e-06, "loss": 0.8042, "step": 60235 }, { "epoch": 0.7341596285327776, "grad_norm": 1.9562112092971802, "learning_rate": 1.399230275817832e-06, "loss": 0.8682, "step": 60240 }, { "epoch": 0.7342205647569254, "grad_norm": 2.096735954284668, "learning_rate": 1.3989095574085953e-06, "loss": 0.8234, "step": 60245 }, { "epoch": 0.7342815009810733, "grad_norm": 1.8354700803756714, "learning_rate": 1.3985888389993587e-06, "loss": 0.7907, "step": 60250 }, { "epoch": 0.734342437205221, "grad_norm": 1.9414511919021606, "learning_rate": 1.398268120590122e-06, "loss": 0.714, "step": 60255 }, { "epoch": 0.7344033734293688, "grad_norm": 1.8381882905960083, "learning_rate": 1.3979474021808854e-06, "loss": 0.8114, "step": 60260 }, { "epoch": 0.7344643096535166, "grad_norm": 1.8999909162521362, "learning_rate": 1.3976266837716488e-06, "loss": 0.8346, "step": 60265 }, { "epoch": 0.7345252458776644, "grad_norm": 2.0079569816589355, "learning_rate": 1.3973059653624118e-06, "loss": 0.7787, "step": 60270 }, { "epoch": 0.7345861821018123, "grad_norm": 1.809327244758606, "learning_rate": 1.3969852469531753e-06, "loss": 0.8376, "step": 60275 }, { "epoch": 0.7346471183259601, "grad_norm": 2.085193157196045, "learning_rate": 1.3966645285439385e-06, "loss": 0.8166, "step": 60280 }, { "epoch": 0.7347080545501079, "grad_norm": 1.9921834468841553, "learning_rate": 1.396343810134702e-06, "loss": 0.8562, "step": 60285 }, { "epoch": 0.7347689907742556, "grad_norm": 2.0936801433563232, "learning_rate": 1.3960230917254652e-06, "loss": 0.8338, "step": 60290 }, { "epoch": 0.7348299269984034, "grad_norm": 2.1602025032043457, "learning_rate": 1.3957023733162284e-06, "loss": 0.7526, "step": 60295 }, { "epoch": 0.7348908632225513, "grad_norm": 2.2827415466308594, "learning_rate": 1.3953816549069918e-06, "loss": 0.7901, "step": 60300 }, { "epoch": 0.7349517994466991, "grad_norm": 2.0536062717437744, "learning_rate": 1.3950609364977553e-06, "loss": 0.8579, "step": 60305 }, { "epoch": 0.7350127356708469, "grad_norm": 1.9052605628967285, "learning_rate": 1.3947402180885183e-06, "loss": 0.8006, "step": 60310 }, { "epoch": 0.7350736718949947, "grad_norm": 1.9175013303756714, "learning_rate": 1.3944194996792817e-06, "loss": 0.7757, "step": 60315 }, { "epoch": 0.7351346081191426, "grad_norm": 2.23494029045105, "learning_rate": 1.394098781270045e-06, "loss": 0.8578, "step": 60320 }, { "epoch": 0.7351955443432903, "grad_norm": 2.055300712585449, "learning_rate": 1.3937780628608084e-06, "loss": 0.8575, "step": 60325 }, { "epoch": 0.7352564805674381, "grad_norm": 2.6076412200927734, "learning_rate": 1.3934573444515716e-06, "loss": 0.8679, "step": 60330 }, { "epoch": 0.7353174167915859, "grad_norm": 1.8157260417938232, "learning_rate": 1.3931366260423348e-06, "loss": 0.8046, "step": 60335 }, { "epoch": 0.7353783530157337, "grad_norm": 2.012058734893799, "learning_rate": 1.3928159076330983e-06, "loss": 0.859, "step": 60340 }, { "epoch": 0.7354392892398816, "grad_norm": 1.9828581809997559, "learning_rate": 1.3924951892238617e-06, "loss": 0.7864, "step": 60345 }, { "epoch": 0.7355002254640294, "grad_norm": 1.6445215940475464, "learning_rate": 1.3921744708146247e-06, "loss": 0.7439, "step": 60350 }, { "epoch": 0.7355611616881772, "grad_norm": 1.67390775680542, "learning_rate": 1.3918537524053882e-06, "loss": 0.8231, "step": 60355 }, { "epoch": 0.7356220979123249, "grad_norm": 2.333819627761841, "learning_rate": 1.3915330339961514e-06, "loss": 0.8559, "step": 60360 }, { "epoch": 0.7356830341364727, "grad_norm": 1.7201427221298218, "learning_rate": 1.3912123155869149e-06, "loss": 0.8538, "step": 60365 }, { "epoch": 0.7357439703606206, "grad_norm": 1.801392674446106, "learning_rate": 1.390891597177678e-06, "loss": 0.7604, "step": 60370 }, { "epoch": 0.7358049065847684, "grad_norm": 1.976709246635437, "learning_rate": 1.3905708787684413e-06, "loss": 0.8376, "step": 60375 }, { "epoch": 0.7358658428089162, "grad_norm": 2.2984697818756104, "learning_rate": 1.3902501603592048e-06, "loss": 0.8387, "step": 60380 }, { "epoch": 0.735926779033064, "grad_norm": 1.6006603240966797, "learning_rate": 1.3899294419499682e-06, "loss": 0.812, "step": 60385 }, { "epoch": 0.7359877152572118, "grad_norm": 1.9053846597671509, "learning_rate": 1.3896087235407312e-06, "loss": 0.8199, "step": 60390 }, { "epoch": 0.7360486514813596, "grad_norm": 2.225848913192749, "learning_rate": 1.3892880051314946e-06, "loss": 0.8332, "step": 60395 }, { "epoch": 0.7361095877055074, "grad_norm": 1.8491384983062744, "learning_rate": 1.3889672867222579e-06, "loss": 0.7833, "step": 60400 }, { "epoch": 0.7361705239296552, "grad_norm": 2.0013046264648438, "learning_rate": 1.3886465683130213e-06, "loss": 0.8067, "step": 60405 }, { "epoch": 0.736231460153803, "grad_norm": 1.7212986946105957, "learning_rate": 1.3883258499037848e-06, "loss": 0.779, "step": 60410 }, { "epoch": 0.7362923963779509, "grad_norm": 1.9132276773452759, "learning_rate": 1.3880051314945478e-06, "loss": 0.7492, "step": 60415 }, { "epoch": 0.7363533326020987, "grad_norm": 2.3540432453155518, "learning_rate": 1.3876844130853112e-06, "loss": 0.8197, "step": 60420 }, { "epoch": 0.7364142688262465, "grad_norm": 1.9642716646194458, "learning_rate": 1.3873636946760747e-06, "loss": 0.8292, "step": 60425 }, { "epoch": 0.7364752050503942, "grad_norm": 2.3408710956573486, "learning_rate": 1.3870429762668377e-06, "loss": 0.809, "step": 60430 }, { "epoch": 0.736536141274542, "grad_norm": 1.9747179746627808, "learning_rate": 1.3867222578576011e-06, "loss": 0.8267, "step": 60435 }, { "epoch": 0.7365970774986899, "grad_norm": 2.297227144241333, "learning_rate": 1.3864015394483646e-06, "loss": 0.7998, "step": 60440 }, { "epoch": 0.7366580137228377, "grad_norm": 2.1571381092071533, "learning_rate": 1.3860808210391278e-06, "loss": 0.8548, "step": 60445 }, { "epoch": 0.7367189499469855, "grad_norm": 1.7579984664916992, "learning_rate": 1.3857601026298912e-06, "loss": 0.8027, "step": 60450 }, { "epoch": 0.7367798861711333, "grad_norm": 2.0658323764801025, "learning_rate": 1.3854393842206542e-06, "loss": 0.8795, "step": 60455 }, { "epoch": 0.7368408223952811, "grad_norm": 1.9721221923828125, "learning_rate": 1.3851186658114177e-06, "loss": 0.8483, "step": 60460 }, { "epoch": 0.7369017586194289, "grad_norm": 2.145517587661743, "learning_rate": 1.3847979474021811e-06, "loss": 0.8095, "step": 60465 }, { "epoch": 0.7369626948435767, "grad_norm": 1.8846720457077026, "learning_rate": 1.3844772289929441e-06, "loss": 0.7948, "step": 60470 }, { "epoch": 0.7370236310677245, "grad_norm": 2.2629852294921875, "learning_rate": 1.3841565105837076e-06, "loss": 0.8398, "step": 60475 }, { "epoch": 0.7370845672918723, "grad_norm": 2.0392520427703857, "learning_rate": 1.383835792174471e-06, "loss": 0.8875, "step": 60480 }, { "epoch": 0.7371455035160202, "grad_norm": 1.8591742515563965, "learning_rate": 1.3835150737652342e-06, "loss": 0.7398, "step": 60485 }, { "epoch": 0.737206439740168, "grad_norm": 2.245176076889038, "learning_rate": 1.3831943553559977e-06, "loss": 0.8287, "step": 60490 }, { "epoch": 0.7372673759643158, "grad_norm": 1.8644376993179321, "learning_rate": 1.3828736369467607e-06, "loss": 0.8714, "step": 60495 }, { "epoch": 0.7373283121884635, "grad_norm": 1.7579377889633179, "learning_rate": 1.3825529185375241e-06, "loss": 0.8185, "step": 60500 }, { "epoch": 0.7373892484126113, "grad_norm": 1.983998417854309, "learning_rate": 1.3822322001282876e-06, "loss": 0.7727, "step": 60505 }, { "epoch": 0.7374501846367592, "grad_norm": 1.8698534965515137, "learning_rate": 1.3819114817190508e-06, "loss": 0.7508, "step": 60510 }, { "epoch": 0.737511120860907, "grad_norm": 1.971655011177063, "learning_rate": 1.381590763309814e-06, "loss": 0.8804, "step": 60515 }, { "epoch": 0.7375720570850548, "grad_norm": 2.0222055912017822, "learning_rate": 1.3812700449005775e-06, "loss": 0.7819, "step": 60520 }, { "epoch": 0.7376329933092026, "grad_norm": 1.6767842769622803, "learning_rate": 1.3809493264913407e-06, "loss": 0.8441, "step": 60525 }, { "epoch": 0.7376939295333504, "grad_norm": 1.8415707349777222, "learning_rate": 1.3806286080821041e-06, "loss": 0.7678, "step": 60530 }, { "epoch": 0.7377548657574982, "grad_norm": 1.7673083543777466, "learning_rate": 1.3803078896728672e-06, "loss": 0.8655, "step": 60535 }, { "epoch": 0.737815801981646, "grad_norm": 1.7529919147491455, "learning_rate": 1.3799871712636306e-06, "loss": 0.7887, "step": 60540 }, { "epoch": 0.7378767382057938, "grad_norm": 1.8854247331619263, "learning_rate": 1.379666452854394e-06, "loss": 0.7973, "step": 60545 }, { "epoch": 0.7379376744299416, "grad_norm": 1.9450913667678833, "learning_rate": 1.3793457344451573e-06, "loss": 0.7969, "step": 60550 }, { "epoch": 0.7379986106540894, "grad_norm": 1.6576616764068604, "learning_rate": 1.3790250160359205e-06, "loss": 0.796, "step": 60555 }, { "epoch": 0.7380595468782373, "grad_norm": 1.9489482641220093, "learning_rate": 1.378704297626684e-06, "loss": 0.8174, "step": 60560 }, { "epoch": 0.7381204831023851, "grad_norm": 2.1455495357513428, "learning_rate": 1.3783835792174472e-06, "loss": 0.7721, "step": 60565 }, { "epoch": 0.7381814193265328, "grad_norm": 2.539090394973755, "learning_rate": 1.3780628608082106e-06, "loss": 0.7935, "step": 60570 }, { "epoch": 0.7382423555506806, "grad_norm": 1.9526325464248657, "learning_rate": 1.3777421423989736e-06, "loss": 0.8311, "step": 60575 }, { "epoch": 0.7383032917748285, "grad_norm": 1.9577966928482056, "learning_rate": 1.377421423989737e-06, "loss": 0.8216, "step": 60580 }, { "epoch": 0.7383642279989763, "grad_norm": 2.430577039718628, "learning_rate": 1.3771007055805005e-06, "loss": 0.7505, "step": 60585 }, { "epoch": 0.7384251642231241, "grad_norm": 1.9930434226989746, "learning_rate": 1.3767799871712637e-06, "loss": 0.829, "step": 60590 }, { "epoch": 0.7384861004472719, "grad_norm": 1.768865704536438, "learning_rate": 1.376459268762027e-06, "loss": 0.8467, "step": 60595 }, { "epoch": 0.7385470366714197, "grad_norm": 1.9643281698226929, "learning_rate": 1.3761385503527904e-06, "loss": 0.8027, "step": 60600 }, { "epoch": 0.7386079728955675, "grad_norm": 2.0894744396209717, "learning_rate": 1.3758178319435536e-06, "loss": 0.8075, "step": 60605 }, { "epoch": 0.7386689091197153, "grad_norm": 2.5575826168060303, "learning_rate": 1.375497113534317e-06, "loss": 0.858, "step": 60610 }, { "epoch": 0.7387298453438631, "grad_norm": 2.105126142501831, "learning_rate": 1.37517639512508e-06, "loss": 0.8384, "step": 60615 }, { "epoch": 0.7387907815680109, "grad_norm": 1.919845461845398, "learning_rate": 1.3748556767158435e-06, "loss": 0.7638, "step": 60620 }, { "epoch": 0.7388517177921587, "grad_norm": 2.130410671234131, "learning_rate": 1.374534958306607e-06, "loss": 0.833, "step": 60625 }, { "epoch": 0.7389126540163066, "grad_norm": 1.705581784248352, "learning_rate": 1.3742142398973702e-06, "loss": 0.7446, "step": 60630 }, { "epoch": 0.7389735902404544, "grad_norm": 1.9936753511428833, "learning_rate": 1.3738935214881336e-06, "loss": 0.8595, "step": 60635 }, { "epoch": 0.7390345264646021, "grad_norm": 2.0093963146209717, "learning_rate": 1.3735728030788969e-06, "loss": 0.7682, "step": 60640 }, { "epoch": 0.7390954626887499, "grad_norm": 1.8026337623596191, "learning_rate": 1.37325208466966e-06, "loss": 0.8111, "step": 60645 }, { "epoch": 0.7391563989128977, "grad_norm": 2.016523838043213, "learning_rate": 1.3729313662604235e-06, "loss": 0.8471, "step": 60650 }, { "epoch": 0.7392173351370456, "grad_norm": 1.833520531654358, "learning_rate": 1.3726106478511866e-06, "loss": 0.8629, "step": 60655 }, { "epoch": 0.7392782713611934, "grad_norm": 2.813390016555786, "learning_rate": 1.37228992944195e-06, "loss": 0.8271, "step": 60660 }, { "epoch": 0.7393392075853412, "grad_norm": 1.9911103248596191, "learning_rate": 1.3719692110327134e-06, "loss": 0.8609, "step": 60665 }, { "epoch": 0.739400143809489, "grad_norm": 2.4362802505493164, "learning_rate": 1.3716484926234767e-06, "loss": 0.8336, "step": 60670 }, { "epoch": 0.7394610800336368, "grad_norm": 1.737987756729126, "learning_rate": 1.3713277742142401e-06, "loss": 0.8125, "step": 60675 }, { "epoch": 0.7395220162577846, "grad_norm": 2.109809637069702, "learning_rate": 1.3710070558050033e-06, "loss": 0.8465, "step": 60680 }, { "epoch": 0.7395829524819324, "grad_norm": 1.7864899635314941, "learning_rate": 1.3706863373957666e-06, "loss": 0.7995, "step": 60685 }, { "epoch": 0.7396438887060802, "grad_norm": 2.1656787395477295, "learning_rate": 1.37036561898653e-06, "loss": 0.8421, "step": 60690 }, { "epoch": 0.739704824930228, "grad_norm": 2.107588529586792, "learning_rate": 1.370044900577293e-06, "loss": 0.7549, "step": 60695 }, { "epoch": 0.7397657611543759, "grad_norm": 2.0179152488708496, "learning_rate": 1.3697241821680565e-06, "loss": 0.7706, "step": 60700 }, { "epoch": 0.7398266973785237, "grad_norm": 2.041161298751831, "learning_rate": 1.36940346375882e-06, "loss": 0.801, "step": 60705 }, { "epoch": 0.7398876336026714, "grad_norm": 2.135524272918701, "learning_rate": 1.3690827453495831e-06, "loss": 0.8886, "step": 60710 }, { "epoch": 0.7399485698268192, "grad_norm": 1.8992500305175781, "learning_rate": 1.3687620269403466e-06, "loss": 0.7584, "step": 60715 }, { "epoch": 0.740009506050967, "grad_norm": 1.9523221254348755, "learning_rate": 1.3684413085311098e-06, "loss": 0.8726, "step": 60720 }, { "epoch": 0.7400704422751149, "grad_norm": 1.9547569751739502, "learning_rate": 1.368120590121873e-06, "loss": 0.7922, "step": 60725 }, { "epoch": 0.7401313784992627, "grad_norm": 1.9644969701766968, "learning_rate": 1.3677998717126365e-06, "loss": 0.8473, "step": 60730 }, { "epoch": 0.7401923147234105, "grad_norm": 1.8620240688323975, "learning_rate": 1.3674791533034e-06, "loss": 0.8317, "step": 60735 }, { "epoch": 0.7402532509475583, "grad_norm": 2.443526029586792, "learning_rate": 1.367158434894163e-06, "loss": 0.8014, "step": 60740 }, { "epoch": 0.740314187171706, "grad_norm": 1.9833283424377441, "learning_rate": 1.3668377164849264e-06, "loss": 0.893, "step": 60745 }, { "epoch": 0.7403751233958539, "grad_norm": 2.2233729362487793, "learning_rate": 1.3665169980756896e-06, "loss": 0.8056, "step": 60750 }, { "epoch": 0.7404360596200017, "grad_norm": 2.1046814918518066, "learning_rate": 1.366196279666453e-06, "loss": 0.8283, "step": 60755 }, { "epoch": 0.7404969958441495, "grad_norm": 1.6850045919418335, "learning_rate": 1.3658755612572165e-06, "loss": 0.8092, "step": 60760 }, { "epoch": 0.7405579320682973, "grad_norm": 1.8980672359466553, "learning_rate": 1.3655548428479795e-06, "loss": 0.8216, "step": 60765 }, { "epoch": 0.7406188682924452, "grad_norm": 1.8895032405853271, "learning_rate": 1.365234124438743e-06, "loss": 0.8371, "step": 60770 }, { "epoch": 0.740679804516593, "grad_norm": 1.6959397792816162, "learning_rate": 1.3649134060295064e-06, "loss": 0.8599, "step": 60775 }, { "epoch": 0.7407407407407407, "grad_norm": 2.470405340194702, "learning_rate": 1.3645926876202694e-06, "loss": 0.8407, "step": 60780 }, { "epoch": 0.7408016769648885, "grad_norm": 2.040710687637329, "learning_rate": 1.3642719692110328e-06, "loss": 0.7719, "step": 60785 }, { "epoch": 0.7408626131890363, "grad_norm": 1.7455005645751953, "learning_rate": 1.363951250801796e-06, "loss": 0.8366, "step": 60790 }, { "epoch": 0.7409235494131842, "grad_norm": 1.8455792665481567, "learning_rate": 1.3636305323925595e-06, "loss": 0.9048, "step": 60795 }, { "epoch": 0.740984485637332, "grad_norm": 2.1415581703186035, "learning_rate": 1.363309813983323e-06, "loss": 0.8551, "step": 60800 }, { "epoch": 0.7410454218614798, "grad_norm": 2.012826681137085, "learning_rate": 1.362989095574086e-06, "loss": 0.8212, "step": 60805 }, { "epoch": 0.7411063580856276, "grad_norm": 2.8568942546844482, "learning_rate": 1.3626683771648494e-06, "loss": 0.8109, "step": 60810 }, { "epoch": 0.7411672943097753, "grad_norm": 2.0614547729492188, "learning_rate": 1.3623476587556128e-06, "loss": 0.8649, "step": 60815 }, { "epoch": 0.7412282305339232, "grad_norm": 1.8882228136062622, "learning_rate": 1.3620269403463759e-06, "loss": 0.7899, "step": 60820 }, { "epoch": 0.741289166758071, "grad_norm": 2.054356336593628, "learning_rate": 1.3617062219371393e-06, "loss": 0.7669, "step": 60825 }, { "epoch": 0.7413501029822188, "grad_norm": 2.062664031982422, "learning_rate": 1.3613855035279025e-06, "loss": 0.8161, "step": 60830 }, { "epoch": 0.7414110392063666, "grad_norm": 2.1299185752868652, "learning_rate": 1.361064785118666e-06, "loss": 0.8432, "step": 60835 }, { "epoch": 0.7414719754305145, "grad_norm": 2.1633005142211914, "learning_rate": 1.3607440667094294e-06, "loss": 0.853, "step": 60840 }, { "epoch": 0.7415329116546623, "grad_norm": 2.0298705101013184, "learning_rate": 1.3604233483001924e-06, "loss": 0.844, "step": 60845 }, { "epoch": 0.74159384787881, "grad_norm": 2.012115001678467, "learning_rate": 1.3601026298909559e-06, "loss": 0.8114, "step": 60850 }, { "epoch": 0.7416547841029578, "grad_norm": 1.934411883354187, "learning_rate": 1.3597819114817193e-06, "loss": 0.7998, "step": 60855 }, { "epoch": 0.7417157203271056, "grad_norm": 1.8955986499786377, "learning_rate": 1.3594611930724825e-06, "loss": 0.9059, "step": 60860 }, { "epoch": 0.7417766565512535, "grad_norm": 2.0811736583709717, "learning_rate": 1.3591404746632458e-06, "loss": 0.7691, "step": 60865 }, { "epoch": 0.7418375927754013, "grad_norm": 1.827860713005066, "learning_rate": 1.358819756254009e-06, "loss": 0.8308, "step": 60870 }, { "epoch": 0.7418985289995491, "grad_norm": 1.8403292894363403, "learning_rate": 1.3584990378447724e-06, "loss": 0.7797, "step": 60875 }, { "epoch": 0.7419594652236968, "grad_norm": 2.114271402359009, "learning_rate": 1.3581783194355359e-06, "loss": 0.8391, "step": 60880 }, { "epoch": 0.7420204014478446, "grad_norm": 2.0065577030181885, "learning_rate": 1.3578576010262989e-06, "loss": 0.8369, "step": 60885 }, { "epoch": 0.7420813376719925, "grad_norm": 2.597243547439575, "learning_rate": 1.3575368826170623e-06, "loss": 0.8068, "step": 60890 }, { "epoch": 0.7421422738961403, "grad_norm": 2.105574369430542, "learning_rate": 1.3572161642078258e-06, "loss": 0.8352, "step": 60895 }, { "epoch": 0.7422032101202881, "grad_norm": 2.144430160522461, "learning_rate": 1.356895445798589e-06, "loss": 0.831, "step": 60900 }, { "epoch": 0.7422641463444359, "grad_norm": 1.9527326822280884, "learning_rate": 1.3565747273893522e-06, "loss": 0.7505, "step": 60905 }, { "epoch": 0.7423250825685838, "grad_norm": 2.0007431507110596, "learning_rate": 1.3562540089801154e-06, "loss": 0.7921, "step": 60910 }, { "epoch": 0.7423860187927315, "grad_norm": 2.1085898876190186, "learning_rate": 1.3559332905708789e-06, "loss": 0.7861, "step": 60915 }, { "epoch": 0.7424469550168793, "grad_norm": 1.8963459730148315, "learning_rate": 1.3556125721616423e-06, "loss": 0.8432, "step": 60920 }, { "epoch": 0.7425078912410271, "grad_norm": 1.8810158967971802, "learning_rate": 1.3552918537524053e-06, "loss": 0.8181, "step": 60925 }, { "epoch": 0.7425688274651749, "grad_norm": 1.9975790977478027, "learning_rate": 1.3549711353431688e-06, "loss": 0.7891, "step": 60930 }, { "epoch": 0.7426297636893228, "grad_norm": 1.8962376117706299, "learning_rate": 1.3546504169339322e-06, "loss": 0.8164, "step": 60935 }, { "epoch": 0.7426906999134706, "grad_norm": 1.761742353439331, "learning_rate": 1.3543296985246955e-06, "loss": 0.8876, "step": 60940 }, { "epoch": 0.7427516361376184, "grad_norm": 1.7646363973617554, "learning_rate": 1.3540089801154587e-06, "loss": 0.829, "step": 60945 }, { "epoch": 0.7428125723617661, "grad_norm": 1.8330081701278687, "learning_rate": 1.353688261706222e-06, "loss": 0.8252, "step": 60950 }, { "epoch": 0.7428735085859139, "grad_norm": 2.039348840713501, "learning_rate": 1.3533675432969854e-06, "loss": 0.8748, "step": 60955 }, { "epoch": 0.7429344448100618, "grad_norm": 1.8668421506881714, "learning_rate": 1.3530468248877488e-06, "loss": 0.7798, "step": 60960 }, { "epoch": 0.7429953810342096, "grad_norm": 1.9877960681915283, "learning_rate": 1.3527261064785118e-06, "loss": 0.827, "step": 60965 }, { "epoch": 0.7430563172583574, "grad_norm": 1.8866342306137085, "learning_rate": 1.3524053880692752e-06, "loss": 0.8185, "step": 60970 }, { "epoch": 0.7431172534825052, "grad_norm": 1.8894882202148438, "learning_rate": 1.3520846696600387e-06, "loss": 0.8113, "step": 60975 }, { "epoch": 0.7431781897066531, "grad_norm": 1.9786702394485474, "learning_rate": 1.351763951250802e-06, "loss": 0.8246, "step": 60980 }, { "epoch": 0.7432391259308008, "grad_norm": 1.8188159465789795, "learning_rate": 1.3514432328415654e-06, "loss": 0.8465, "step": 60985 }, { "epoch": 0.7433000621549486, "grad_norm": 1.8451719284057617, "learning_rate": 1.3511225144323284e-06, "loss": 0.8174, "step": 60990 }, { "epoch": 0.7433609983790964, "grad_norm": 2.1543004512786865, "learning_rate": 1.3508017960230918e-06, "loss": 0.8589, "step": 60995 }, { "epoch": 0.7434219346032442, "grad_norm": 1.904349684715271, "learning_rate": 1.3504810776138553e-06, "loss": 0.8317, "step": 61000 }, { "epoch": 0.7434828708273921, "grad_norm": 1.9629251956939697, "learning_rate": 1.3501603592046183e-06, "loss": 0.7893, "step": 61005 }, { "epoch": 0.7435438070515399, "grad_norm": 1.8348370790481567, "learning_rate": 1.3498396407953817e-06, "loss": 0.796, "step": 61010 }, { "epoch": 0.7436047432756877, "grad_norm": 2.073949098587036, "learning_rate": 1.3495189223861452e-06, "loss": 0.788, "step": 61015 }, { "epoch": 0.7436656794998354, "grad_norm": 1.9293484687805176, "learning_rate": 1.3491982039769084e-06, "loss": 0.8108, "step": 61020 }, { "epoch": 0.7437266157239832, "grad_norm": 1.909228801727295, "learning_rate": 1.3488774855676718e-06, "loss": 0.9256, "step": 61025 }, { "epoch": 0.7437875519481311, "grad_norm": 1.8694173097610474, "learning_rate": 1.348556767158435e-06, "loss": 0.7413, "step": 61030 }, { "epoch": 0.7438484881722789, "grad_norm": 1.8281831741333008, "learning_rate": 1.3482360487491983e-06, "loss": 0.7925, "step": 61035 }, { "epoch": 0.7439094243964267, "grad_norm": 1.817215919494629, "learning_rate": 1.3479153303399617e-06, "loss": 0.8325, "step": 61040 }, { "epoch": 0.7439703606205745, "grad_norm": 2.7383227348327637, "learning_rate": 1.3475946119307247e-06, "loss": 0.8602, "step": 61045 }, { "epoch": 0.7440312968447224, "grad_norm": 2.25779128074646, "learning_rate": 1.3472738935214882e-06, "loss": 0.8311, "step": 61050 }, { "epoch": 0.7440922330688701, "grad_norm": 2.4378795623779297, "learning_rate": 1.3469531751122516e-06, "loss": 0.8589, "step": 61055 }, { "epoch": 0.7441531692930179, "grad_norm": 1.8585712909698486, "learning_rate": 1.3466324567030148e-06, "loss": 0.7957, "step": 61060 }, { "epoch": 0.7442141055171657, "grad_norm": 2.13417387008667, "learning_rate": 1.3463117382937783e-06, "loss": 0.8011, "step": 61065 }, { "epoch": 0.7442750417413135, "grad_norm": 2.096310615539551, "learning_rate": 1.3459910198845415e-06, "loss": 0.8662, "step": 61070 }, { "epoch": 0.7443359779654614, "grad_norm": 2.1184780597686768, "learning_rate": 1.3456703014753047e-06, "loss": 0.8531, "step": 61075 }, { "epoch": 0.7443969141896092, "grad_norm": 1.9974315166473389, "learning_rate": 1.3453495830660682e-06, "loss": 0.799, "step": 61080 }, { "epoch": 0.744457850413757, "grad_norm": 1.9531230926513672, "learning_rate": 1.3450288646568312e-06, "loss": 0.7687, "step": 61085 }, { "epoch": 0.7445187866379047, "grad_norm": 1.8124408721923828, "learning_rate": 1.3447081462475946e-06, "loss": 0.841, "step": 61090 }, { "epoch": 0.7445797228620525, "grad_norm": 2.1759235858917236, "learning_rate": 1.344387427838358e-06, "loss": 0.7577, "step": 61095 }, { "epoch": 0.7446406590862004, "grad_norm": 1.9772672653198242, "learning_rate": 1.3440667094291213e-06, "loss": 0.7842, "step": 61100 }, { "epoch": 0.7447015953103482, "grad_norm": 2.283365249633789, "learning_rate": 1.3437459910198847e-06, "loss": 0.8276, "step": 61105 }, { "epoch": 0.744762531534496, "grad_norm": 2.044981002807617, "learning_rate": 1.3434252726106482e-06, "loss": 0.8776, "step": 61110 }, { "epoch": 0.7448234677586438, "grad_norm": 1.7948867082595825, "learning_rate": 1.3431045542014112e-06, "loss": 0.8749, "step": 61115 }, { "epoch": 0.7448844039827917, "grad_norm": 2.2129812240600586, "learning_rate": 1.3427838357921746e-06, "loss": 0.8492, "step": 61120 }, { "epoch": 0.7449453402069394, "grad_norm": 2.5662994384765625, "learning_rate": 1.3424631173829379e-06, "loss": 0.8026, "step": 61125 }, { "epoch": 0.7450062764310872, "grad_norm": 1.9817297458648682, "learning_rate": 1.342142398973701e-06, "loss": 0.8403, "step": 61130 }, { "epoch": 0.745067212655235, "grad_norm": 1.9633901119232178, "learning_rate": 1.3418216805644645e-06, "loss": 0.7591, "step": 61135 }, { "epoch": 0.7451281488793828, "grad_norm": 2.4737484455108643, "learning_rate": 1.3415009621552278e-06, "loss": 0.8271, "step": 61140 }, { "epoch": 0.7451890851035307, "grad_norm": 2.018709659576416, "learning_rate": 1.3411802437459912e-06, "loss": 0.8858, "step": 61145 }, { "epoch": 0.7452500213276785, "grad_norm": 2.062263250350952, "learning_rate": 1.3408595253367547e-06, "loss": 0.767, "step": 61150 }, { "epoch": 0.7453109575518263, "grad_norm": 1.702458381652832, "learning_rate": 1.3405388069275177e-06, "loss": 0.8293, "step": 61155 }, { "epoch": 0.745371893775974, "grad_norm": 2.099341869354248, "learning_rate": 1.3402180885182811e-06, "loss": 0.8686, "step": 61160 }, { "epoch": 0.7454328300001218, "grad_norm": 1.7271332740783691, "learning_rate": 1.3398973701090443e-06, "loss": 0.8321, "step": 61165 }, { "epoch": 0.7454937662242697, "grad_norm": 2.3097729682922363, "learning_rate": 1.3395766516998076e-06, "loss": 0.7802, "step": 61170 }, { "epoch": 0.7455547024484175, "grad_norm": 1.8845350742340088, "learning_rate": 1.339255933290571e-06, "loss": 0.7604, "step": 61175 }, { "epoch": 0.7456156386725653, "grad_norm": 1.725962519645691, "learning_rate": 1.3389352148813342e-06, "loss": 0.8301, "step": 61180 }, { "epoch": 0.7456765748967131, "grad_norm": 1.8924453258514404, "learning_rate": 1.3386144964720977e-06, "loss": 0.8395, "step": 61185 }, { "epoch": 0.745737511120861, "grad_norm": 1.7892481088638306, "learning_rate": 1.3382937780628611e-06, "loss": 0.7657, "step": 61190 }, { "epoch": 0.7457984473450087, "grad_norm": 1.7804113626480103, "learning_rate": 1.3379730596536241e-06, "loss": 0.8709, "step": 61195 }, { "epoch": 0.7458593835691565, "grad_norm": 1.7188875675201416, "learning_rate": 1.3376523412443876e-06, "loss": 0.777, "step": 61200 }, { "epoch": 0.7459203197933043, "grad_norm": 2.1224262714385986, "learning_rate": 1.3373316228351508e-06, "loss": 0.8134, "step": 61205 }, { "epoch": 0.7459812560174521, "grad_norm": 1.829349398612976, "learning_rate": 1.3370109044259142e-06, "loss": 0.8125, "step": 61210 }, { "epoch": 0.7460421922416, "grad_norm": 1.5828182697296143, "learning_rate": 1.3366901860166775e-06, "loss": 0.7868, "step": 61215 }, { "epoch": 0.7461031284657478, "grad_norm": 1.9310311079025269, "learning_rate": 1.3363694676074407e-06, "loss": 0.85, "step": 61220 }, { "epoch": 0.7461640646898956, "grad_norm": 1.8250046968460083, "learning_rate": 1.3360487491982041e-06, "loss": 0.7446, "step": 61225 }, { "epoch": 0.7462250009140433, "grad_norm": 2.1802268028259277, "learning_rate": 1.3357280307889676e-06, "loss": 0.852, "step": 61230 }, { "epoch": 0.7462859371381911, "grad_norm": 2.5652589797973633, "learning_rate": 1.3354073123797306e-06, "loss": 0.7944, "step": 61235 }, { "epoch": 0.746346873362339, "grad_norm": 2.215764045715332, "learning_rate": 1.335086593970494e-06, "loss": 0.8411, "step": 61240 }, { "epoch": 0.7464078095864868, "grad_norm": 1.8084431886672974, "learning_rate": 1.3347658755612573e-06, "loss": 0.8198, "step": 61245 }, { "epoch": 0.7464687458106346, "grad_norm": 1.9641104936599731, "learning_rate": 1.3344451571520207e-06, "loss": 0.844, "step": 61250 }, { "epoch": 0.7465296820347824, "grad_norm": 1.8805845975875854, "learning_rate": 1.334124438742784e-06, "loss": 0.7949, "step": 61255 }, { "epoch": 0.7465906182589302, "grad_norm": 1.9003480672836304, "learning_rate": 1.3338037203335472e-06, "loss": 0.8311, "step": 61260 }, { "epoch": 0.746651554483078, "grad_norm": 1.9594676494598389, "learning_rate": 1.3334830019243106e-06, "loss": 0.8016, "step": 61265 }, { "epoch": 0.7467124907072258, "grad_norm": 1.985957384109497, "learning_rate": 1.333162283515074e-06, "loss": 0.7686, "step": 61270 }, { "epoch": 0.7467734269313736, "grad_norm": 2.0636789798736572, "learning_rate": 1.332841565105837e-06, "loss": 0.8565, "step": 61275 }, { "epoch": 0.7468343631555214, "grad_norm": 1.8421406745910645, "learning_rate": 1.3325208466966005e-06, "loss": 0.7734, "step": 61280 }, { "epoch": 0.7468952993796693, "grad_norm": 1.7350572347640991, "learning_rate": 1.3322001282873637e-06, "loss": 0.7882, "step": 61285 }, { "epoch": 0.7469562356038171, "grad_norm": 2.023096799850464, "learning_rate": 1.3318794098781272e-06, "loss": 0.7618, "step": 61290 }, { "epoch": 0.7470171718279649, "grad_norm": 1.862914800643921, "learning_rate": 1.3315586914688904e-06, "loss": 0.7324, "step": 61295 }, { "epoch": 0.7470781080521126, "grad_norm": 2.282148838043213, "learning_rate": 1.3312379730596536e-06, "loss": 0.8103, "step": 61300 }, { "epoch": 0.7471390442762604, "grad_norm": 1.9339714050292969, "learning_rate": 1.330917254650417e-06, "loss": 0.7713, "step": 61305 }, { "epoch": 0.7471999805004083, "grad_norm": 2.0311193466186523, "learning_rate": 1.3305965362411805e-06, "loss": 0.7805, "step": 61310 }, { "epoch": 0.7472609167245561, "grad_norm": 1.6817060708999634, "learning_rate": 1.3302758178319435e-06, "loss": 0.8209, "step": 61315 }, { "epoch": 0.7473218529487039, "grad_norm": 1.8558704853057861, "learning_rate": 1.329955099422707e-06, "loss": 0.8873, "step": 61320 }, { "epoch": 0.7473827891728517, "grad_norm": 1.8544706106185913, "learning_rate": 1.3296343810134702e-06, "loss": 0.7646, "step": 61325 }, { "epoch": 0.7474437253969995, "grad_norm": 1.9525189399719238, "learning_rate": 1.3293136626042336e-06, "loss": 0.8363, "step": 61330 }, { "epoch": 0.7475046616211473, "grad_norm": 2.163430690765381, "learning_rate": 1.328992944194997e-06, "loss": 0.8884, "step": 61335 }, { "epoch": 0.7475655978452951, "grad_norm": 2.0512983798980713, "learning_rate": 1.32867222578576e-06, "loss": 0.8402, "step": 61340 }, { "epoch": 0.7476265340694429, "grad_norm": 2.0636980533599854, "learning_rate": 1.3283515073765235e-06, "loss": 0.8004, "step": 61345 }, { "epoch": 0.7476874702935907, "grad_norm": 1.9499433040618896, "learning_rate": 1.328030788967287e-06, "loss": 0.7891, "step": 61350 }, { "epoch": 0.7477484065177386, "grad_norm": 1.9086620807647705, "learning_rate": 1.32771007055805e-06, "loss": 0.7888, "step": 61355 }, { "epoch": 0.7478093427418864, "grad_norm": 2.051703453063965, "learning_rate": 1.3273893521488134e-06, "loss": 0.8054, "step": 61360 }, { "epoch": 0.7478702789660342, "grad_norm": 1.84699285030365, "learning_rate": 1.3270686337395769e-06, "loss": 0.759, "step": 61365 }, { "epoch": 0.7479312151901819, "grad_norm": 1.9856497049331665, "learning_rate": 1.32674791533034e-06, "loss": 0.7687, "step": 61370 }, { "epoch": 0.7479921514143297, "grad_norm": 1.718469262123108, "learning_rate": 1.3264271969211035e-06, "loss": 0.8373, "step": 61375 }, { "epoch": 0.7480530876384776, "grad_norm": 2.1478617191314697, "learning_rate": 1.3261064785118666e-06, "loss": 0.8362, "step": 61380 }, { "epoch": 0.7481140238626254, "grad_norm": 2.135751247406006, "learning_rate": 1.32578576010263e-06, "loss": 0.7831, "step": 61385 }, { "epoch": 0.7481749600867732, "grad_norm": 2.438603162765503, "learning_rate": 1.3254650416933934e-06, "loss": 0.8275, "step": 61390 }, { "epoch": 0.748235896310921, "grad_norm": 2.0453200340270996, "learning_rate": 1.3251443232841565e-06, "loss": 0.7843, "step": 61395 }, { "epoch": 0.7482968325350688, "grad_norm": 2.095100164413452, "learning_rate": 1.3248236048749199e-06, "loss": 0.8419, "step": 61400 }, { "epoch": 0.7483577687592166, "grad_norm": 1.9056307077407837, "learning_rate": 1.3245028864656833e-06, "loss": 0.8034, "step": 61405 }, { "epoch": 0.7484187049833644, "grad_norm": 2.335771322250366, "learning_rate": 1.3241821680564466e-06, "loss": 0.8412, "step": 61410 }, { "epoch": 0.7484796412075122, "grad_norm": 1.9193525314331055, "learning_rate": 1.32386144964721e-06, "loss": 0.7353, "step": 61415 }, { "epoch": 0.74854057743166, "grad_norm": 2.0497682094573975, "learning_rate": 1.323540731237973e-06, "loss": 0.8595, "step": 61420 }, { "epoch": 0.7486015136558078, "grad_norm": 1.8785921335220337, "learning_rate": 1.3232200128287365e-06, "loss": 0.7879, "step": 61425 }, { "epoch": 0.7486624498799557, "grad_norm": 2.0467052459716797, "learning_rate": 1.3228992944195e-06, "loss": 0.7813, "step": 61430 }, { "epoch": 0.7487233861041035, "grad_norm": 1.7138558626174927, "learning_rate": 1.322578576010263e-06, "loss": 0.8605, "step": 61435 }, { "epoch": 0.7487843223282512, "grad_norm": 2.0808022022247314, "learning_rate": 1.3222578576010264e-06, "loss": 0.8408, "step": 61440 }, { "epoch": 0.748845258552399, "grad_norm": 1.7397369146347046, "learning_rate": 1.3219371391917898e-06, "loss": 0.8106, "step": 61445 }, { "epoch": 0.7489061947765469, "grad_norm": 2.1446990966796875, "learning_rate": 1.321616420782553e-06, "loss": 0.793, "step": 61450 }, { "epoch": 0.7489671310006947, "grad_norm": 1.7726422548294067, "learning_rate": 1.3212957023733165e-06, "loss": 0.8773, "step": 61455 }, { "epoch": 0.7490280672248425, "grad_norm": 2.1535632610321045, "learning_rate": 1.3209749839640795e-06, "loss": 0.8167, "step": 61460 }, { "epoch": 0.7490890034489903, "grad_norm": 1.7784223556518555, "learning_rate": 1.320654265554843e-06, "loss": 0.821, "step": 61465 }, { "epoch": 0.7491499396731381, "grad_norm": 1.9290376901626587, "learning_rate": 1.3203335471456064e-06, "loss": 0.8491, "step": 61470 }, { "epoch": 0.7492108758972859, "grad_norm": 1.9891620874404907, "learning_rate": 1.3200128287363696e-06, "loss": 0.8246, "step": 61475 }, { "epoch": 0.7492718121214337, "grad_norm": 1.8983778953552246, "learning_rate": 1.3196921103271328e-06, "loss": 0.8139, "step": 61480 }, { "epoch": 0.7493327483455815, "grad_norm": 2.073460102081299, "learning_rate": 1.3193713919178963e-06, "loss": 0.8052, "step": 61485 }, { "epoch": 0.7493936845697293, "grad_norm": 1.730021357536316, "learning_rate": 1.3190506735086595e-06, "loss": 0.7509, "step": 61490 }, { "epoch": 0.7494546207938771, "grad_norm": 1.8085284233093262, "learning_rate": 1.318729955099423e-06, "loss": 0.7238, "step": 61495 }, { "epoch": 0.749515557018025, "grad_norm": 1.8163583278656006, "learning_rate": 1.318409236690186e-06, "loss": 0.8353, "step": 61500 }, { "epoch": 0.7495764932421728, "grad_norm": 2.025028705596924, "learning_rate": 1.3180885182809494e-06, "loss": 0.7679, "step": 61505 }, { "epoch": 0.7496374294663205, "grad_norm": 1.700360894203186, "learning_rate": 1.3177677998717128e-06, "loss": 0.8323, "step": 61510 }, { "epoch": 0.7496983656904683, "grad_norm": 2.040163040161133, "learning_rate": 1.317447081462476e-06, "loss": 0.8531, "step": 61515 }, { "epoch": 0.7497593019146161, "grad_norm": 1.8696662187576294, "learning_rate": 1.3171263630532393e-06, "loss": 0.7919, "step": 61520 }, { "epoch": 0.749820238138764, "grad_norm": 1.9555715322494507, "learning_rate": 1.3168056446440027e-06, "loss": 0.8377, "step": 61525 }, { "epoch": 0.7498811743629118, "grad_norm": 1.9426108598709106, "learning_rate": 1.316484926234766e-06, "loss": 0.7674, "step": 61530 }, { "epoch": 0.7499421105870596, "grad_norm": 2.028425693511963, "learning_rate": 1.3161642078255294e-06, "loss": 0.8287, "step": 61535 }, { "epoch": 0.7500030468112074, "grad_norm": 2.1270980834960938, "learning_rate": 1.3158434894162924e-06, "loss": 0.7808, "step": 61540 }, { "epoch": 0.7500639830353552, "grad_norm": 1.7748266458511353, "learning_rate": 1.3155227710070558e-06, "loss": 0.7835, "step": 61545 }, { "epoch": 0.750124919259503, "grad_norm": 2.2000513076782227, "learning_rate": 1.3152020525978193e-06, "loss": 0.8087, "step": 61550 }, { "epoch": 0.7501858554836508, "grad_norm": 2.1091601848602295, "learning_rate": 1.3148813341885825e-06, "loss": 0.7698, "step": 61555 }, { "epoch": 0.7502467917077986, "grad_norm": 2.146864891052246, "learning_rate": 1.3145606157793457e-06, "loss": 0.8259, "step": 61560 }, { "epoch": 0.7503077279319464, "grad_norm": 1.9652642011642456, "learning_rate": 1.3142398973701092e-06, "loss": 0.7993, "step": 61565 }, { "epoch": 0.7503686641560943, "grad_norm": 2.7630810737609863, "learning_rate": 1.3139191789608724e-06, "loss": 0.8266, "step": 61570 }, { "epoch": 0.7504296003802421, "grad_norm": 1.9713201522827148, "learning_rate": 1.3135984605516359e-06, "loss": 0.7603, "step": 61575 }, { "epoch": 0.7504905366043898, "grad_norm": 1.6687015295028687, "learning_rate": 1.3132777421423989e-06, "loss": 0.8236, "step": 61580 }, { "epoch": 0.7505514728285376, "grad_norm": 1.8250868320465088, "learning_rate": 1.3129570237331623e-06, "loss": 0.8291, "step": 61585 }, { "epoch": 0.7506124090526854, "grad_norm": 2.5105295181274414, "learning_rate": 1.3126363053239258e-06, "loss": 0.844, "step": 61590 }, { "epoch": 0.7506733452768333, "grad_norm": 2.074775218963623, "learning_rate": 1.312315586914689e-06, "loss": 0.8517, "step": 61595 }, { "epoch": 0.7507342815009811, "grad_norm": 2.273516893386841, "learning_rate": 1.3119948685054524e-06, "loss": 0.8527, "step": 61600 }, { "epoch": 0.7507952177251289, "grad_norm": 2.0572125911712646, "learning_rate": 1.3116741500962156e-06, "loss": 0.8073, "step": 61605 }, { "epoch": 0.7508561539492767, "grad_norm": 2.494807481765747, "learning_rate": 1.3113534316869789e-06, "loss": 0.8034, "step": 61610 }, { "epoch": 0.7509170901734245, "grad_norm": 1.8684556484222412, "learning_rate": 1.3110327132777423e-06, "loss": 0.7476, "step": 61615 }, { "epoch": 0.7509780263975723, "grad_norm": 1.9166595935821533, "learning_rate": 1.3107119948685053e-06, "loss": 0.8553, "step": 61620 }, { "epoch": 0.7510389626217201, "grad_norm": 2.255699872970581, "learning_rate": 1.3103912764592688e-06, "loss": 0.7816, "step": 61625 }, { "epoch": 0.7510998988458679, "grad_norm": 1.896119475364685, "learning_rate": 1.3100705580500322e-06, "loss": 0.8111, "step": 61630 }, { "epoch": 0.7511608350700157, "grad_norm": 1.9353889226913452, "learning_rate": 1.3097498396407954e-06, "loss": 0.8524, "step": 61635 }, { "epoch": 0.7512217712941636, "grad_norm": 2.2115583419799805, "learning_rate": 1.3094291212315589e-06, "loss": 0.8618, "step": 61640 }, { "epoch": 0.7512827075183114, "grad_norm": 1.735561490058899, "learning_rate": 1.3091084028223221e-06, "loss": 0.7539, "step": 61645 }, { "epoch": 0.7513436437424591, "grad_norm": 1.9505659341812134, "learning_rate": 1.3087876844130853e-06, "loss": 0.7807, "step": 61650 }, { "epoch": 0.7514045799666069, "grad_norm": 2.4191319942474365, "learning_rate": 1.3084669660038488e-06, "loss": 0.8001, "step": 61655 }, { "epoch": 0.7514655161907547, "grad_norm": 1.9000306129455566, "learning_rate": 1.3081462475946122e-06, "loss": 0.8019, "step": 61660 }, { "epoch": 0.7515264524149026, "grad_norm": 1.6668787002563477, "learning_rate": 1.3078255291853752e-06, "loss": 0.8075, "step": 61665 }, { "epoch": 0.7515873886390504, "grad_norm": 2.191070795059204, "learning_rate": 1.3075048107761387e-06, "loss": 0.9108, "step": 61670 }, { "epoch": 0.7516483248631982, "grad_norm": 2.098048448562622, "learning_rate": 1.307184092366902e-06, "loss": 0.8322, "step": 61675 }, { "epoch": 0.751709261087346, "grad_norm": 2.1836843490600586, "learning_rate": 1.3068633739576653e-06, "loss": 0.7936, "step": 61680 }, { "epoch": 0.7517701973114937, "grad_norm": 1.8541964292526245, "learning_rate": 1.3065426555484288e-06, "loss": 0.8268, "step": 61685 }, { "epoch": 0.7518311335356416, "grad_norm": 1.9815170764923096, "learning_rate": 1.3062219371391918e-06, "loss": 0.8981, "step": 61690 }, { "epoch": 0.7518920697597894, "grad_norm": 2.565713882446289, "learning_rate": 1.3059012187299552e-06, "loss": 0.884, "step": 61695 }, { "epoch": 0.7519530059839372, "grad_norm": 2.2336435317993164, "learning_rate": 1.3055805003207187e-06, "loss": 0.8377, "step": 61700 }, { "epoch": 0.752013942208085, "grad_norm": 2.0015342235565186, "learning_rate": 1.3052597819114817e-06, "loss": 0.8633, "step": 61705 }, { "epoch": 0.7520748784322329, "grad_norm": 2.2536981105804443, "learning_rate": 1.3049390635022451e-06, "loss": 0.9039, "step": 61710 }, { "epoch": 0.7521358146563807, "grad_norm": 2.327463150024414, "learning_rate": 1.3046183450930084e-06, "loss": 0.8254, "step": 61715 }, { "epoch": 0.7521967508805284, "grad_norm": 3.6028733253479004, "learning_rate": 1.3042976266837718e-06, "loss": 0.8267, "step": 61720 }, { "epoch": 0.7522576871046762, "grad_norm": 2.248616933822632, "learning_rate": 1.3039769082745353e-06, "loss": 0.7734, "step": 61725 }, { "epoch": 0.752318623328824, "grad_norm": 2.0788681507110596, "learning_rate": 1.3036561898652983e-06, "loss": 0.8343, "step": 61730 }, { "epoch": 0.7523795595529719, "grad_norm": 2.0049126148223877, "learning_rate": 1.3033354714560617e-06, "loss": 0.8055, "step": 61735 }, { "epoch": 0.7524404957771197, "grad_norm": 1.8402209281921387, "learning_rate": 1.3030147530468251e-06, "loss": 0.7954, "step": 61740 }, { "epoch": 0.7525014320012675, "grad_norm": 1.9347800016403198, "learning_rate": 1.3026940346375882e-06, "loss": 0.8439, "step": 61745 }, { "epoch": 0.7525623682254153, "grad_norm": 1.9426226615905762, "learning_rate": 1.3023733162283516e-06, "loss": 0.8132, "step": 61750 }, { "epoch": 0.752623304449563, "grad_norm": 2.0827558040618896, "learning_rate": 1.3020525978191148e-06, "loss": 0.8624, "step": 61755 }, { "epoch": 0.7526842406737109, "grad_norm": 1.608078956604004, "learning_rate": 1.3017318794098783e-06, "loss": 0.8401, "step": 61760 }, { "epoch": 0.7527451768978587, "grad_norm": 1.7721409797668457, "learning_rate": 1.3014111610006417e-06, "loss": 0.7704, "step": 61765 }, { "epoch": 0.7528061131220065, "grad_norm": 1.931645154953003, "learning_rate": 1.3010904425914047e-06, "loss": 0.7342, "step": 61770 }, { "epoch": 0.7528670493461543, "grad_norm": 2.0242645740509033, "learning_rate": 1.3007697241821682e-06, "loss": 0.7592, "step": 61775 }, { "epoch": 0.7529279855703022, "grad_norm": 2.210076332092285, "learning_rate": 1.3004490057729316e-06, "loss": 0.8194, "step": 61780 }, { "epoch": 0.75298892179445, "grad_norm": 2.0792834758758545, "learning_rate": 1.3001282873636946e-06, "loss": 0.7644, "step": 61785 }, { "epoch": 0.7530498580185977, "grad_norm": 1.915184497833252, "learning_rate": 1.299807568954458e-06, "loss": 0.8293, "step": 61790 }, { "epoch": 0.7531107942427455, "grad_norm": 1.9167201519012451, "learning_rate": 1.2994868505452213e-06, "loss": 0.8162, "step": 61795 }, { "epoch": 0.7531717304668933, "grad_norm": 1.8299189805984497, "learning_rate": 1.2991661321359847e-06, "loss": 0.786, "step": 61800 }, { "epoch": 0.7532326666910412, "grad_norm": 1.6337553262710571, "learning_rate": 1.2988454137267482e-06, "loss": 0.7574, "step": 61805 }, { "epoch": 0.753293602915189, "grad_norm": 2.0166468620300293, "learning_rate": 1.2985246953175112e-06, "loss": 0.7715, "step": 61810 }, { "epoch": 0.7533545391393368, "grad_norm": 2.0910282135009766, "learning_rate": 1.2982039769082746e-06, "loss": 0.8162, "step": 61815 }, { "epoch": 0.7534154753634846, "grad_norm": 2.423811197280884, "learning_rate": 1.297883258499038e-06, "loss": 0.8004, "step": 61820 }, { "epoch": 0.7534764115876323, "grad_norm": 1.9409164190292358, "learning_rate": 1.2975625400898013e-06, "loss": 0.7381, "step": 61825 }, { "epoch": 0.7535373478117802, "grad_norm": 2.654419183731079, "learning_rate": 1.2972418216805645e-06, "loss": 0.824, "step": 61830 }, { "epoch": 0.753598284035928, "grad_norm": 1.8377631902694702, "learning_rate": 1.2969211032713278e-06, "loss": 0.8451, "step": 61835 }, { "epoch": 0.7536592202600758, "grad_norm": 2.0520942211151123, "learning_rate": 1.2966003848620912e-06, "loss": 0.8006, "step": 61840 }, { "epoch": 0.7537201564842236, "grad_norm": 1.9780064821243286, "learning_rate": 1.2962796664528546e-06, "loss": 0.793, "step": 61845 }, { "epoch": 0.7537810927083715, "grad_norm": 1.9385241270065308, "learning_rate": 1.2959589480436177e-06, "loss": 0.743, "step": 61850 }, { "epoch": 0.7538420289325192, "grad_norm": 2.359128952026367, "learning_rate": 1.295638229634381e-06, "loss": 0.8066, "step": 61855 }, { "epoch": 0.753902965156667, "grad_norm": 2.0965797901153564, "learning_rate": 1.2953175112251445e-06, "loss": 0.8168, "step": 61860 }, { "epoch": 0.7539639013808148, "grad_norm": 2.1490461826324463, "learning_rate": 1.2949967928159078e-06, "loss": 0.8332, "step": 61865 }, { "epoch": 0.7540248376049626, "grad_norm": 1.8605870008468628, "learning_rate": 1.294676074406671e-06, "loss": 0.7767, "step": 61870 }, { "epoch": 0.7540857738291105, "grad_norm": 1.8678001165390015, "learning_rate": 1.2943553559974342e-06, "loss": 0.7988, "step": 61875 }, { "epoch": 0.7541467100532583, "grad_norm": 1.9162639379501343, "learning_rate": 1.2940346375881977e-06, "loss": 0.7803, "step": 61880 }, { "epoch": 0.7542076462774061, "grad_norm": 2.1131653785705566, "learning_rate": 1.293713919178961e-06, "loss": 0.7688, "step": 61885 }, { "epoch": 0.7542685825015538, "grad_norm": 1.8026539087295532, "learning_rate": 1.2933932007697241e-06, "loss": 0.8159, "step": 61890 }, { "epoch": 0.7543295187257016, "grad_norm": 1.9310156106948853, "learning_rate": 1.2930724823604876e-06, "loss": 0.7249, "step": 61895 }, { "epoch": 0.7543904549498495, "grad_norm": 2.066310405731201, "learning_rate": 1.292751763951251e-06, "loss": 0.8167, "step": 61900 }, { "epoch": 0.7544513911739973, "grad_norm": 2.1906752586364746, "learning_rate": 1.2924310455420142e-06, "loss": 0.826, "step": 61905 }, { "epoch": 0.7545123273981451, "grad_norm": 1.884495496749878, "learning_rate": 1.2921103271327775e-06, "loss": 0.8139, "step": 61910 }, { "epoch": 0.7545732636222929, "grad_norm": 2.0656187534332275, "learning_rate": 1.2917896087235407e-06, "loss": 0.8035, "step": 61915 }, { "epoch": 0.7546341998464408, "grad_norm": 2.039595127105713, "learning_rate": 1.2914688903143041e-06, "loss": 0.7452, "step": 61920 }, { "epoch": 0.7546951360705885, "grad_norm": 1.815359115600586, "learning_rate": 1.2911481719050676e-06, "loss": 0.7844, "step": 61925 }, { "epoch": 0.7547560722947363, "grad_norm": 1.922608494758606, "learning_rate": 1.2908274534958306e-06, "loss": 0.8499, "step": 61930 }, { "epoch": 0.7548170085188841, "grad_norm": 1.7804611921310425, "learning_rate": 1.290506735086594e-06, "loss": 0.7923, "step": 61935 }, { "epoch": 0.7548779447430319, "grad_norm": 2.291905164718628, "learning_rate": 1.2901860166773575e-06, "loss": 0.855, "step": 61940 }, { "epoch": 0.7549388809671798, "grad_norm": 2.308250665664673, "learning_rate": 1.2898652982681207e-06, "loss": 0.7838, "step": 61945 }, { "epoch": 0.7549998171913276, "grad_norm": 1.9503066539764404, "learning_rate": 1.2895445798588841e-06, "loss": 0.8481, "step": 61950 }, { "epoch": 0.7550607534154754, "grad_norm": 1.8611046075820923, "learning_rate": 1.2892238614496474e-06, "loss": 0.8135, "step": 61955 }, { "epoch": 0.7551216896396231, "grad_norm": 1.9522780179977417, "learning_rate": 1.2889031430404106e-06, "loss": 0.7799, "step": 61960 }, { "epoch": 0.7551826258637709, "grad_norm": 1.9110209941864014, "learning_rate": 1.288582424631174e-06, "loss": 0.7912, "step": 61965 }, { "epoch": 0.7552435620879188, "grad_norm": 2.17289662361145, "learning_rate": 1.288261706221937e-06, "loss": 0.8275, "step": 61970 }, { "epoch": 0.7553044983120666, "grad_norm": 1.5847209692001343, "learning_rate": 1.2879409878127005e-06, "loss": 0.9092, "step": 61975 }, { "epoch": 0.7553654345362144, "grad_norm": 1.880138635635376, "learning_rate": 1.287620269403464e-06, "loss": 0.7808, "step": 61980 }, { "epoch": 0.7554263707603622, "grad_norm": 1.8547005653381348, "learning_rate": 1.2872995509942272e-06, "loss": 0.8396, "step": 61985 }, { "epoch": 0.75548730698451, "grad_norm": 1.723063349723816, "learning_rate": 1.2869788325849906e-06, "loss": 0.7837, "step": 61990 }, { "epoch": 0.7555482432086578, "grad_norm": 2.051038980484009, "learning_rate": 1.2866581141757538e-06, "loss": 0.8535, "step": 61995 }, { "epoch": 0.7556091794328056, "grad_norm": 2.2024145126342773, "learning_rate": 1.286337395766517e-06, "loss": 0.753, "step": 62000 }, { "epoch": 0.7556701156569534, "grad_norm": 2.676100969314575, "learning_rate": 1.2860166773572805e-06, "loss": 0.8378, "step": 62005 }, { "epoch": 0.7557310518811012, "grad_norm": 1.9624096155166626, "learning_rate": 1.2856959589480435e-06, "loss": 0.7722, "step": 62010 }, { "epoch": 0.7557919881052491, "grad_norm": 1.8337554931640625, "learning_rate": 1.285375240538807e-06, "loss": 0.7108, "step": 62015 }, { "epoch": 0.7558529243293969, "grad_norm": 2.8143105506896973, "learning_rate": 1.2850545221295704e-06, "loss": 0.8302, "step": 62020 }, { "epoch": 0.7559138605535447, "grad_norm": 2.0122716426849365, "learning_rate": 1.2847338037203336e-06, "loss": 0.7588, "step": 62025 }, { "epoch": 0.7559747967776924, "grad_norm": 2.018437385559082, "learning_rate": 1.284413085311097e-06, "loss": 0.787, "step": 62030 }, { "epoch": 0.7560357330018402, "grad_norm": 1.9026817083358765, "learning_rate": 1.2840923669018605e-06, "loss": 0.7857, "step": 62035 }, { "epoch": 0.7560966692259881, "grad_norm": 1.714950442314148, "learning_rate": 1.2837716484926235e-06, "loss": 0.8232, "step": 62040 }, { "epoch": 0.7561576054501359, "grad_norm": 1.7960820198059082, "learning_rate": 1.283450930083387e-06, "loss": 0.766, "step": 62045 }, { "epoch": 0.7562185416742837, "grad_norm": 1.9503955841064453, "learning_rate": 1.2831302116741502e-06, "loss": 0.8633, "step": 62050 }, { "epoch": 0.7562794778984315, "grad_norm": 1.8694809675216675, "learning_rate": 1.2828094932649134e-06, "loss": 0.8478, "step": 62055 }, { "epoch": 0.7563404141225794, "grad_norm": 2.0302186012268066, "learning_rate": 1.2824887748556769e-06, "loss": 0.808, "step": 62060 }, { "epoch": 0.7564013503467271, "grad_norm": 1.9426584243774414, "learning_rate": 1.28216805644644e-06, "loss": 0.7996, "step": 62065 }, { "epoch": 0.7564622865708749, "grad_norm": 1.9948132038116455, "learning_rate": 1.2818473380372035e-06, "loss": 0.8254, "step": 62070 }, { "epoch": 0.7565232227950227, "grad_norm": 1.8011384010314941, "learning_rate": 1.281526619627967e-06, "loss": 0.7916, "step": 62075 }, { "epoch": 0.7565841590191705, "grad_norm": 1.8785035610198975, "learning_rate": 1.28120590121873e-06, "loss": 0.8348, "step": 62080 }, { "epoch": 0.7566450952433184, "grad_norm": 1.9336930513381958, "learning_rate": 1.2808851828094934e-06, "loss": 0.7566, "step": 62085 }, { "epoch": 0.7567060314674662, "grad_norm": 1.7890785932540894, "learning_rate": 1.2805644644002567e-06, "loss": 0.8249, "step": 62090 }, { "epoch": 0.756766967691614, "grad_norm": 1.7507963180541992, "learning_rate": 1.2802437459910199e-06, "loss": 0.9286, "step": 62095 }, { "epoch": 0.7568279039157617, "grad_norm": 2.2840359210968018, "learning_rate": 1.2799230275817833e-06, "loss": 0.8318, "step": 62100 }, { "epoch": 0.7568888401399095, "grad_norm": 1.8156874179840088, "learning_rate": 1.2796023091725465e-06, "loss": 0.8426, "step": 62105 }, { "epoch": 0.7569497763640574, "grad_norm": 1.9315674304962158, "learning_rate": 1.27928159076331e-06, "loss": 0.7897, "step": 62110 }, { "epoch": 0.7570107125882052, "grad_norm": 2.0730414390563965, "learning_rate": 1.2789608723540734e-06, "loss": 0.8248, "step": 62115 }, { "epoch": 0.757071648812353, "grad_norm": 2.252737283706665, "learning_rate": 1.2786401539448364e-06, "loss": 0.7633, "step": 62120 }, { "epoch": 0.7571325850365008, "grad_norm": 2.137420177459717, "learning_rate": 1.2783194355355999e-06, "loss": 0.7706, "step": 62125 }, { "epoch": 0.7571935212606486, "grad_norm": 1.989922285079956, "learning_rate": 1.2779987171263631e-06, "loss": 0.9262, "step": 62130 }, { "epoch": 0.7572544574847964, "grad_norm": 2.346036911010742, "learning_rate": 1.2776779987171263e-06, "loss": 0.8157, "step": 62135 }, { "epoch": 0.7573153937089442, "grad_norm": 1.845933198928833, "learning_rate": 1.2773572803078898e-06, "loss": 0.8437, "step": 62140 }, { "epoch": 0.757376329933092, "grad_norm": 1.8836638927459717, "learning_rate": 1.277036561898653e-06, "loss": 0.7832, "step": 62145 }, { "epoch": 0.7574372661572398, "grad_norm": 1.8979418277740479, "learning_rate": 1.2767158434894165e-06, "loss": 0.7965, "step": 62150 }, { "epoch": 0.7574982023813877, "grad_norm": 2.7464122772216797, "learning_rate": 1.2763951250801799e-06, "loss": 0.7935, "step": 62155 }, { "epoch": 0.7575591386055355, "grad_norm": 1.8079115152359009, "learning_rate": 1.276074406670943e-06, "loss": 0.7263, "step": 62160 }, { "epoch": 0.7576200748296833, "grad_norm": 1.8947384357452393, "learning_rate": 1.2757536882617063e-06, "loss": 0.8527, "step": 62165 }, { "epoch": 0.757681011053831, "grad_norm": 2.097909927368164, "learning_rate": 1.2754329698524696e-06, "loss": 0.8511, "step": 62170 }, { "epoch": 0.7577419472779788, "grad_norm": 1.7838833332061768, "learning_rate": 1.275112251443233e-06, "loss": 0.7574, "step": 62175 }, { "epoch": 0.7578028835021267, "grad_norm": 1.8941004276275635, "learning_rate": 1.2747915330339962e-06, "loss": 0.8196, "step": 62180 }, { "epoch": 0.7578638197262745, "grad_norm": 2.0951242446899414, "learning_rate": 1.2744708146247595e-06, "loss": 0.7839, "step": 62185 }, { "epoch": 0.7579247559504223, "grad_norm": 1.853016972541809, "learning_rate": 1.274150096215523e-06, "loss": 0.7545, "step": 62190 }, { "epoch": 0.7579856921745701, "grad_norm": 2.1434082984924316, "learning_rate": 1.2738293778062864e-06, "loss": 0.8546, "step": 62195 }, { "epoch": 0.758046628398718, "grad_norm": 1.8234245777130127, "learning_rate": 1.2735086593970494e-06, "loss": 0.7536, "step": 62200 }, { "epoch": 0.7581075646228657, "grad_norm": 1.8365782499313354, "learning_rate": 1.2731879409878128e-06, "loss": 0.8489, "step": 62205 }, { "epoch": 0.7581685008470135, "grad_norm": 2.2209982872009277, "learning_rate": 1.272867222578576e-06, "loss": 0.8345, "step": 62210 }, { "epoch": 0.7582294370711613, "grad_norm": 2.003178119659424, "learning_rate": 1.2725465041693395e-06, "loss": 0.7744, "step": 62215 }, { "epoch": 0.7582903732953091, "grad_norm": 2.148388624191284, "learning_rate": 1.2722257857601027e-06, "loss": 0.7738, "step": 62220 }, { "epoch": 0.758351309519457, "grad_norm": 2.1897666454315186, "learning_rate": 1.271905067350866e-06, "loss": 0.8509, "step": 62225 }, { "epoch": 0.7584122457436048, "grad_norm": 2.031646490097046, "learning_rate": 1.2715843489416294e-06, "loss": 0.818, "step": 62230 }, { "epoch": 0.7584731819677526, "grad_norm": 2.1292128562927246, "learning_rate": 1.2712636305323928e-06, "loss": 0.8331, "step": 62235 }, { "epoch": 0.7585341181919003, "grad_norm": 1.8797012567520142, "learning_rate": 1.2709429121231558e-06, "loss": 0.7822, "step": 62240 }, { "epoch": 0.7585950544160481, "grad_norm": 1.9691129922866821, "learning_rate": 1.2706221937139193e-06, "loss": 0.7736, "step": 62245 }, { "epoch": 0.758655990640196, "grad_norm": 1.9462001323699951, "learning_rate": 1.2703014753046827e-06, "loss": 0.8243, "step": 62250 }, { "epoch": 0.7587169268643438, "grad_norm": 2.0945963859558105, "learning_rate": 1.269980756895446e-06, "loss": 0.8184, "step": 62255 }, { "epoch": 0.7587778630884916, "grad_norm": 1.7388876676559448, "learning_rate": 1.2696600384862092e-06, "loss": 0.7521, "step": 62260 }, { "epoch": 0.7588387993126394, "grad_norm": 1.7981047630310059, "learning_rate": 1.2693393200769724e-06, "loss": 0.7953, "step": 62265 }, { "epoch": 0.7588997355367872, "grad_norm": 2.3907291889190674, "learning_rate": 1.2690186016677358e-06, "loss": 0.8119, "step": 62270 }, { "epoch": 0.758960671760935, "grad_norm": 2.3795135021209717, "learning_rate": 1.2686978832584993e-06, "loss": 0.7531, "step": 62275 }, { "epoch": 0.7590216079850828, "grad_norm": 1.8759161233901978, "learning_rate": 1.2683771648492623e-06, "loss": 0.8321, "step": 62280 }, { "epoch": 0.7590825442092306, "grad_norm": 2.1150410175323486, "learning_rate": 1.2680564464400257e-06, "loss": 0.7823, "step": 62285 }, { "epoch": 0.7591434804333784, "grad_norm": 2.0739023685455322, "learning_rate": 1.2677357280307892e-06, "loss": 0.7589, "step": 62290 }, { "epoch": 0.7592044166575262, "grad_norm": 1.728783369064331, "learning_rate": 1.2674150096215524e-06, "loss": 0.7548, "step": 62295 }, { "epoch": 0.7592653528816741, "grad_norm": 2.4401462078094482, "learning_rate": 1.2670942912123158e-06, "loss": 0.7526, "step": 62300 }, { "epoch": 0.7593262891058219, "grad_norm": 1.903266191482544, "learning_rate": 1.2667735728030789e-06, "loss": 0.784, "step": 62305 }, { "epoch": 0.7593872253299696, "grad_norm": 1.8325692415237427, "learning_rate": 1.2664528543938423e-06, "loss": 0.8273, "step": 62310 }, { "epoch": 0.7594481615541174, "grad_norm": 2.4971609115600586, "learning_rate": 1.2661321359846057e-06, "loss": 0.8311, "step": 62315 }, { "epoch": 0.7595090977782653, "grad_norm": 1.8933175802230835, "learning_rate": 1.2658114175753688e-06, "loss": 0.8243, "step": 62320 }, { "epoch": 0.7595700340024131, "grad_norm": 2.1339986324310303, "learning_rate": 1.2654906991661322e-06, "loss": 0.8144, "step": 62325 }, { "epoch": 0.7596309702265609, "grad_norm": 1.8948982954025269, "learning_rate": 1.2651699807568956e-06, "loss": 1.0252, "step": 62330 }, { "epoch": 0.7596919064507087, "grad_norm": 2.1870956420898438, "learning_rate": 1.2648492623476589e-06, "loss": 0.8237, "step": 62335 }, { "epoch": 0.7597528426748565, "grad_norm": 2.1592276096343994, "learning_rate": 1.2645285439384223e-06, "loss": 0.8516, "step": 62340 }, { "epoch": 0.7598137788990043, "grad_norm": 1.8449015617370605, "learning_rate": 1.2642078255291853e-06, "loss": 0.7966, "step": 62345 }, { "epoch": 0.7598747151231521, "grad_norm": 1.6768929958343506, "learning_rate": 1.2638871071199488e-06, "loss": 0.8437, "step": 62350 }, { "epoch": 0.7599356513472999, "grad_norm": 1.9345027208328247, "learning_rate": 1.2635663887107122e-06, "loss": 0.8664, "step": 62355 }, { "epoch": 0.7599965875714477, "grad_norm": 1.9342029094696045, "learning_rate": 1.2632456703014752e-06, "loss": 0.8172, "step": 62360 }, { "epoch": 0.7600575237955955, "grad_norm": 1.8906984329223633, "learning_rate": 1.2629249518922387e-06, "loss": 0.8087, "step": 62365 }, { "epoch": 0.7601184600197434, "grad_norm": 2.1891398429870605, "learning_rate": 1.2626042334830021e-06, "loss": 0.8108, "step": 62370 }, { "epoch": 0.7601793962438912, "grad_norm": 1.9378244876861572, "learning_rate": 1.2622835150737653e-06, "loss": 0.8376, "step": 62375 }, { "epoch": 0.7602403324680389, "grad_norm": 1.8413599729537964, "learning_rate": 1.2619627966645288e-06, "loss": 0.8007, "step": 62380 }, { "epoch": 0.7603012686921867, "grad_norm": 2.2292003631591797, "learning_rate": 1.2616420782552918e-06, "loss": 0.8098, "step": 62385 }, { "epoch": 0.7603622049163345, "grad_norm": 2.2053442001342773, "learning_rate": 1.2613213598460552e-06, "loss": 0.8356, "step": 62390 }, { "epoch": 0.7604231411404824, "grad_norm": 2.186248302459717, "learning_rate": 1.2610006414368187e-06, "loss": 0.8347, "step": 62395 }, { "epoch": 0.7604840773646302, "grad_norm": 1.970633625984192, "learning_rate": 1.260679923027582e-06, "loss": 0.8359, "step": 62400 }, { "epoch": 0.760545013588778, "grad_norm": 1.7650020122528076, "learning_rate": 1.2603592046183451e-06, "loss": 0.7419, "step": 62405 }, { "epoch": 0.7606059498129258, "grad_norm": 1.904784917831421, "learning_rate": 1.2600384862091086e-06, "loss": 0.8362, "step": 62410 }, { "epoch": 0.7606668860370736, "grad_norm": 2.0161800384521484, "learning_rate": 1.2597177677998718e-06, "loss": 0.8005, "step": 62415 }, { "epoch": 0.7607278222612214, "grad_norm": 2.001723289489746, "learning_rate": 1.2593970493906352e-06, "loss": 0.7804, "step": 62420 }, { "epoch": 0.7607887584853692, "grad_norm": 1.9215351343154907, "learning_rate": 1.2590763309813983e-06, "loss": 0.7657, "step": 62425 }, { "epoch": 0.760849694709517, "grad_norm": 1.7886520624160767, "learning_rate": 1.2587556125721617e-06, "loss": 0.8858, "step": 62430 }, { "epoch": 0.7609106309336648, "grad_norm": 1.9512672424316406, "learning_rate": 1.2584348941629251e-06, "loss": 0.8146, "step": 62435 }, { "epoch": 0.7609715671578127, "grad_norm": 1.8434056043624878, "learning_rate": 1.2581141757536884e-06, "loss": 0.8413, "step": 62440 }, { "epoch": 0.7610325033819605, "grad_norm": 1.8447036743164062, "learning_rate": 1.2577934573444516e-06, "loss": 0.8192, "step": 62445 }, { "epoch": 0.7610934396061082, "grad_norm": 2.530571937561035, "learning_rate": 1.257472738935215e-06, "loss": 0.8453, "step": 62450 }, { "epoch": 0.761154375830256, "grad_norm": 1.9528898000717163, "learning_rate": 1.2571520205259783e-06, "loss": 0.9095, "step": 62455 }, { "epoch": 0.7612153120544038, "grad_norm": 1.9379608631134033, "learning_rate": 1.2568313021167417e-06, "loss": 0.8475, "step": 62460 }, { "epoch": 0.7612762482785517, "grad_norm": 2.1078100204467773, "learning_rate": 1.2565105837075047e-06, "loss": 0.7887, "step": 62465 }, { "epoch": 0.7613371845026995, "grad_norm": 1.881645679473877, "learning_rate": 1.2561898652982682e-06, "loss": 0.8024, "step": 62470 }, { "epoch": 0.7613981207268473, "grad_norm": 2.358748197555542, "learning_rate": 1.2558691468890316e-06, "loss": 0.8501, "step": 62475 }, { "epoch": 0.7614590569509951, "grad_norm": 2.067152738571167, "learning_rate": 1.2555484284797948e-06, "loss": 0.8187, "step": 62480 }, { "epoch": 0.7615199931751429, "grad_norm": 1.8986876010894775, "learning_rate": 1.255227710070558e-06, "loss": 0.7661, "step": 62485 }, { "epoch": 0.7615809293992907, "grad_norm": 2.2183523178100586, "learning_rate": 1.2549069916613215e-06, "loss": 0.7748, "step": 62490 }, { "epoch": 0.7616418656234385, "grad_norm": 2.0202972888946533, "learning_rate": 1.2545862732520847e-06, "loss": 0.7905, "step": 62495 }, { "epoch": 0.7617028018475863, "grad_norm": 2.0472571849823, "learning_rate": 1.2542655548428482e-06, "loss": 0.827, "step": 62500 }, { "epoch": 0.7617637380717341, "grad_norm": 2.077784776687622, "learning_rate": 1.2539448364336112e-06, "loss": 0.8114, "step": 62505 }, { "epoch": 0.761824674295882, "grad_norm": 2.4626784324645996, "learning_rate": 1.2536241180243746e-06, "loss": 0.8141, "step": 62510 }, { "epoch": 0.7618856105200298, "grad_norm": 2.747485876083374, "learning_rate": 1.253303399615138e-06, "loss": 0.8106, "step": 62515 }, { "epoch": 0.7619465467441775, "grad_norm": 2.280205011367798, "learning_rate": 1.2529826812059013e-06, "loss": 0.7839, "step": 62520 }, { "epoch": 0.7620074829683253, "grad_norm": 1.8505388498306274, "learning_rate": 1.2526619627966647e-06, "loss": 0.827, "step": 62525 }, { "epoch": 0.7620684191924731, "grad_norm": 1.7491992712020874, "learning_rate": 1.252341244387428e-06, "loss": 0.7614, "step": 62530 }, { "epoch": 0.762129355416621, "grad_norm": 1.854085922241211, "learning_rate": 1.2520205259781912e-06, "loss": 0.7997, "step": 62535 }, { "epoch": 0.7621902916407688, "grad_norm": 1.894819974899292, "learning_rate": 1.2516998075689546e-06, "loss": 0.8387, "step": 62540 }, { "epoch": 0.7622512278649166, "grad_norm": 1.8208179473876953, "learning_rate": 1.251379089159718e-06, "loss": 0.7389, "step": 62545 }, { "epoch": 0.7623121640890644, "grad_norm": 1.7714511156082153, "learning_rate": 1.251058370750481e-06, "loss": 0.7925, "step": 62550 }, { "epoch": 0.7623731003132121, "grad_norm": 2.0001392364501953, "learning_rate": 1.2507376523412445e-06, "loss": 0.805, "step": 62555 }, { "epoch": 0.76243403653736, "grad_norm": 2.1753041744232178, "learning_rate": 1.2504169339320078e-06, "loss": 0.8196, "step": 62560 }, { "epoch": 0.7624949727615078, "grad_norm": 1.9489694833755493, "learning_rate": 1.2500962155227712e-06, "loss": 0.7732, "step": 62565 }, { "epoch": 0.7625559089856556, "grad_norm": 2.417292833328247, "learning_rate": 1.2497754971135344e-06, "loss": 0.7951, "step": 62570 }, { "epoch": 0.7626168452098034, "grad_norm": 2.7878334522247314, "learning_rate": 1.2494547787042979e-06, "loss": 0.8281, "step": 62575 }, { "epoch": 0.7626777814339513, "grad_norm": 2.3012936115264893, "learning_rate": 1.249134060295061e-06, "loss": 0.9118, "step": 62580 }, { "epoch": 0.7627387176580991, "grad_norm": 2.0684854984283447, "learning_rate": 1.2488133418858243e-06, "loss": 0.8188, "step": 62585 }, { "epoch": 0.7627996538822468, "grad_norm": 2.01831316947937, "learning_rate": 1.2484926234765876e-06, "loss": 0.872, "step": 62590 }, { "epoch": 0.7628605901063946, "grad_norm": 1.7748299837112427, "learning_rate": 1.248171905067351e-06, "loss": 0.8015, "step": 62595 }, { "epoch": 0.7629215263305424, "grad_norm": 1.6556792259216309, "learning_rate": 1.2478511866581142e-06, "loss": 0.8582, "step": 62600 }, { "epoch": 0.7629824625546903, "grad_norm": 1.9776062965393066, "learning_rate": 1.2475304682488777e-06, "loss": 0.8273, "step": 62605 }, { "epoch": 0.7630433987788381, "grad_norm": 1.8531930446624756, "learning_rate": 1.2472097498396409e-06, "loss": 0.8076, "step": 62610 }, { "epoch": 0.7631043350029859, "grad_norm": 1.7977938652038574, "learning_rate": 1.2468890314304043e-06, "loss": 0.7668, "step": 62615 }, { "epoch": 0.7631652712271337, "grad_norm": 2.1307406425476074, "learning_rate": 1.2465683130211676e-06, "loss": 0.8625, "step": 62620 }, { "epoch": 0.7632262074512814, "grad_norm": 1.6801995038986206, "learning_rate": 1.2462475946119308e-06, "loss": 0.7815, "step": 62625 }, { "epoch": 0.7632871436754293, "grad_norm": 1.9270620346069336, "learning_rate": 1.245926876202694e-06, "loss": 0.824, "step": 62630 }, { "epoch": 0.7633480798995771, "grad_norm": 1.8626497983932495, "learning_rate": 1.2456061577934575e-06, "loss": 0.8036, "step": 62635 }, { "epoch": 0.7634090161237249, "grad_norm": 1.951952576637268, "learning_rate": 1.2452854393842207e-06, "loss": 0.8563, "step": 62640 }, { "epoch": 0.7634699523478727, "grad_norm": 2.386958360671997, "learning_rate": 1.2449647209749841e-06, "loss": 0.803, "step": 62645 }, { "epoch": 0.7635308885720206, "grad_norm": 1.983479619026184, "learning_rate": 1.2446440025657474e-06, "loss": 0.8567, "step": 62650 }, { "epoch": 0.7635918247961684, "grad_norm": 1.7647807598114014, "learning_rate": 1.2443232841565108e-06, "loss": 0.7799, "step": 62655 }, { "epoch": 0.7636527610203161, "grad_norm": 2.1988861560821533, "learning_rate": 1.244002565747274e-06, "loss": 0.8, "step": 62660 }, { "epoch": 0.7637136972444639, "grad_norm": 1.7492828369140625, "learning_rate": 1.2436818473380373e-06, "loss": 0.8397, "step": 62665 }, { "epoch": 0.7637746334686117, "grad_norm": 1.9109883308410645, "learning_rate": 1.2433611289288005e-06, "loss": 0.7702, "step": 62670 }, { "epoch": 0.7638355696927596, "grad_norm": 2.3266961574554443, "learning_rate": 1.243040410519564e-06, "loss": 0.8276, "step": 62675 }, { "epoch": 0.7638965059169074, "grad_norm": 1.9397839307785034, "learning_rate": 1.2427196921103271e-06, "loss": 0.857, "step": 62680 }, { "epoch": 0.7639574421410552, "grad_norm": 1.9993643760681152, "learning_rate": 1.2423989737010906e-06, "loss": 0.834, "step": 62685 }, { "epoch": 0.764018378365203, "grad_norm": 1.7642419338226318, "learning_rate": 1.2420782552918538e-06, "loss": 0.8267, "step": 62690 }, { "epoch": 0.7640793145893507, "grad_norm": 1.9950724840164185, "learning_rate": 1.2417575368826173e-06, "loss": 0.7667, "step": 62695 }, { "epoch": 0.7641402508134986, "grad_norm": 1.8775990009307861, "learning_rate": 1.2414368184733805e-06, "loss": 0.8449, "step": 62700 }, { "epoch": 0.7642011870376464, "grad_norm": 1.8582152128219604, "learning_rate": 1.2411161000641437e-06, "loss": 0.8517, "step": 62705 }, { "epoch": 0.7642621232617942, "grad_norm": 2.381209135055542, "learning_rate": 1.240795381654907e-06, "loss": 0.8433, "step": 62710 }, { "epoch": 0.764323059485942, "grad_norm": 1.9276618957519531, "learning_rate": 1.2404746632456704e-06, "loss": 0.887, "step": 62715 }, { "epoch": 0.7643839957100899, "grad_norm": 2.2780134677886963, "learning_rate": 1.2401539448364338e-06, "loss": 0.8394, "step": 62720 }, { "epoch": 0.7644449319342377, "grad_norm": 2.0971872806549072, "learning_rate": 1.239833226427197e-06, "loss": 0.808, "step": 62725 }, { "epoch": 0.7645058681583854, "grad_norm": 1.9321194887161255, "learning_rate": 1.2395125080179603e-06, "loss": 0.8461, "step": 62730 }, { "epoch": 0.7645668043825332, "grad_norm": 1.9799768924713135, "learning_rate": 1.2391917896087237e-06, "loss": 0.8037, "step": 62735 }, { "epoch": 0.764627740606681, "grad_norm": 2.1867141723632812, "learning_rate": 1.238871071199487e-06, "loss": 0.9125, "step": 62740 }, { "epoch": 0.7646886768308289, "grad_norm": 1.8460359573364258, "learning_rate": 1.2385503527902502e-06, "loss": 0.7964, "step": 62745 }, { "epoch": 0.7647496130549767, "grad_norm": 1.813293218612671, "learning_rate": 1.2382296343810136e-06, "loss": 0.7989, "step": 62750 }, { "epoch": 0.7648105492791245, "grad_norm": 2.3476710319519043, "learning_rate": 1.2379089159717768e-06, "loss": 0.8522, "step": 62755 }, { "epoch": 0.7648714855032723, "grad_norm": 2.070889472961426, "learning_rate": 1.2375881975625403e-06, "loss": 0.7929, "step": 62760 }, { "epoch": 0.76493242172742, "grad_norm": 1.7889831066131592, "learning_rate": 1.2372674791533035e-06, "loss": 0.7788, "step": 62765 }, { "epoch": 0.7649933579515679, "grad_norm": 1.9238673448562622, "learning_rate": 1.2369467607440667e-06, "loss": 0.7794, "step": 62770 }, { "epoch": 0.7650542941757157, "grad_norm": 2.081885576248169, "learning_rate": 1.2366260423348302e-06, "loss": 0.7932, "step": 62775 }, { "epoch": 0.7651152303998635, "grad_norm": 2.103118896484375, "learning_rate": 1.2363053239255934e-06, "loss": 0.8805, "step": 62780 }, { "epoch": 0.7651761666240113, "grad_norm": 2.1259584426879883, "learning_rate": 1.2359846055163566e-06, "loss": 0.7891, "step": 62785 }, { "epoch": 0.7652371028481592, "grad_norm": 1.9504830837249756, "learning_rate": 1.23566388710712e-06, "loss": 0.7904, "step": 62790 }, { "epoch": 0.7652980390723069, "grad_norm": 2.010316848754883, "learning_rate": 1.2353431686978833e-06, "loss": 0.8143, "step": 62795 }, { "epoch": 0.7653589752964547, "grad_norm": 2.0933284759521484, "learning_rate": 1.2350224502886467e-06, "loss": 0.8144, "step": 62800 }, { "epoch": 0.7654199115206025, "grad_norm": 2.498605489730835, "learning_rate": 1.23470173187941e-06, "loss": 0.8309, "step": 62805 }, { "epoch": 0.7654808477447503, "grad_norm": 1.9356403350830078, "learning_rate": 1.2343810134701732e-06, "loss": 0.7833, "step": 62810 }, { "epoch": 0.7655417839688982, "grad_norm": 1.8429423570632935, "learning_rate": 1.2340602950609366e-06, "loss": 0.7664, "step": 62815 }, { "epoch": 0.765602720193046, "grad_norm": 2.0183498859405518, "learning_rate": 1.2337395766516999e-06, "loss": 0.7153, "step": 62820 }, { "epoch": 0.7656636564171938, "grad_norm": 2.1505136489868164, "learning_rate": 1.233418858242463e-06, "loss": 0.7536, "step": 62825 }, { "epoch": 0.7657245926413415, "grad_norm": 1.611081600189209, "learning_rate": 1.2330981398332265e-06, "loss": 0.8726, "step": 62830 }, { "epoch": 0.7657855288654893, "grad_norm": 2.0580291748046875, "learning_rate": 1.2327774214239898e-06, "loss": 0.8143, "step": 62835 }, { "epoch": 0.7658464650896372, "grad_norm": 2.1553452014923096, "learning_rate": 1.2324567030147532e-06, "loss": 0.8013, "step": 62840 }, { "epoch": 0.765907401313785, "grad_norm": 2.077500343322754, "learning_rate": 1.2321359846055164e-06, "loss": 0.8317, "step": 62845 }, { "epoch": 0.7659683375379328, "grad_norm": 1.91743803024292, "learning_rate": 1.2318152661962799e-06, "loss": 0.8307, "step": 62850 }, { "epoch": 0.7660292737620806, "grad_norm": 2.0584940910339355, "learning_rate": 1.2314945477870431e-06, "loss": 0.8042, "step": 62855 }, { "epoch": 0.7660902099862285, "grad_norm": 2.154292106628418, "learning_rate": 1.2311738293778063e-06, "loss": 0.8178, "step": 62860 }, { "epoch": 0.7661511462103762, "grad_norm": 2.0310256481170654, "learning_rate": 1.2308531109685696e-06, "loss": 0.7762, "step": 62865 }, { "epoch": 0.766212082434524, "grad_norm": 1.8684203624725342, "learning_rate": 1.230532392559333e-06, "loss": 0.8314, "step": 62870 }, { "epoch": 0.7662730186586718, "grad_norm": 2.367295503616333, "learning_rate": 1.2302116741500964e-06, "loss": 0.8291, "step": 62875 }, { "epoch": 0.7663339548828196, "grad_norm": 1.6980607509613037, "learning_rate": 1.2298909557408597e-06, "loss": 0.7934, "step": 62880 }, { "epoch": 0.7663948911069675, "grad_norm": 2.339055061340332, "learning_rate": 1.229570237331623e-06, "loss": 0.8653, "step": 62885 }, { "epoch": 0.7664558273311153, "grad_norm": 1.8025503158569336, "learning_rate": 1.2292495189223863e-06, "loss": 0.8664, "step": 62890 }, { "epoch": 0.7665167635552631, "grad_norm": 1.9393428564071655, "learning_rate": 1.2289288005131496e-06, "loss": 0.8018, "step": 62895 }, { "epoch": 0.7665776997794108, "grad_norm": 1.7904282808303833, "learning_rate": 1.2286080821039128e-06, "loss": 0.8382, "step": 62900 }, { "epoch": 0.7666386360035586, "grad_norm": 2.102149248123169, "learning_rate": 1.228287363694676e-06, "loss": 0.7528, "step": 62905 }, { "epoch": 0.7666995722277065, "grad_norm": 2.0230090618133545, "learning_rate": 1.2279666452854395e-06, "loss": 0.7908, "step": 62910 }, { "epoch": 0.7667605084518543, "grad_norm": 2.013017177581787, "learning_rate": 1.227645926876203e-06, "loss": 0.8618, "step": 62915 }, { "epoch": 0.7668214446760021, "grad_norm": 2.0531909465789795, "learning_rate": 1.2273252084669661e-06, "loss": 0.9239, "step": 62920 }, { "epoch": 0.7668823809001499, "grad_norm": 1.8494950532913208, "learning_rate": 1.2270044900577294e-06, "loss": 0.8189, "step": 62925 }, { "epoch": 0.7669433171242978, "grad_norm": 1.6912767887115479, "learning_rate": 1.2266837716484928e-06, "loss": 0.7443, "step": 62930 }, { "epoch": 0.7670042533484455, "grad_norm": 1.8988245725631714, "learning_rate": 1.226363053239256e-06, "loss": 0.8081, "step": 62935 }, { "epoch": 0.7670651895725933, "grad_norm": 2.0464279651641846, "learning_rate": 1.2260423348300193e-06, "loss": 0.8892, "step": 62940 }, { "epoch": 0.7671261257967411, "grad_norm": 1.8923791646957397, "learning_rate": 1.2257216164207827e-06, "loss": 0.8461, "step": 62945 }, { "epoch": 0.7671870620208889, "grad_norm": 1.8576645851135254, "learning_rate": 1.225400898011546e-06, "loss": 0.7853, "step": 62950 }, { "epoch": 0.7672479982450368, "grad_norm": 2.161609411239624, "learning_rate": 1.2250801796023094e-06, "loss": 0.8458, "step": 62955 }, { "epoch": 0.7673089344691846, "grad_norm": 1.8018652200698853, "learning_rate": 1.2247594611930726e-06, "loss": 0.8449, "step": 62960 }, { "epoch": 0.7673698706933324, "grad_norm": 2.1649329662323, "learning_rate": 1.2244387427838358e-06, "loss": 0.8904, "step": 62965 }, { "epoch": 0.7674308069174801, "grad_norm": 2.440145254135132, "learning_rate": 1.2241180243745993e-06, "loss": 0.7603, "step": 62970 }, { "epoch": 0.7674917431416279, "grad_norm": 1.8762234449386597, "learning_rate": 1.2237973059653625e-06, "loss": 0.853, "step": 62975 }, { "epoch": 0.7675526793657758, "grad_norm": 2.0162174701690674, "learning_rate": 1.2234765875561257e-06, "loss": 0.8153, "step": 62980 }, { "epoch": 0.7676136155899236, "grad_norm": 2.0339982509613037, "learning_rate": 1.2231558691468892e-06, "loss": 0.7389, "step": 62985 }, { "epoch": 0.7676745518140714, "grad_norm": 2.075232744216919, "learning_rate": 1.2228351507376524e-06, "loss": 0.765, "step": 62990 }, { "epoch": 0.7677354880382192, "grad_norm": 2.1855661869049072, "learning_rate": 1.2225144323284158e-06, "loss": 0.7921, "step": 62995 }, { "epoch": 0.767796424262367, "grad_norm": 1.8104863166809082, "learning_rate": 1.222193713919179e-06, "loss": 0.7908, "step": 63000 }, { "epoch": 0.7678573604865148, "grad_norm": 1.8392647504806519, "learning_rate": 1.2218729955099423e-06, "loss": 0.7937, "step": 63005 }, { "epoch": 0.7679182967106626, "grad_norm": 2.033491373062134, "learning_rate": 1.2215522771007057e-06, "loss": 0.7549, "step": 63010 }, { "epoch": 0.7679792329348104, "grad_norm": 1.915379524230957, "learning_rate": 1.221231558691469e-06, "loss": 0.8266, "step": 63015 }, { "epoch": 0.7680401691589582, "grad_norm": 2.012279748916626, "learning_rate": 1.2209108402822322e-06, "loss": 0.7825, "step": 63020 }, { "epoch": 0.768101105383106, "grad_norm": 2.9751415252685547, "learning_rate": 1.2205901218729956e-06, "loss": 0.8607, "step": 63025 }, { "epoch": 0.7681620416072539, "grad_norm": 2.0839920043945312, "learning_rate": 1.2202694034637589e-06, "loss": 0.8268, "step": 63030 }, { "epoch": 0.7682229778314017, "grad_norm": 1.9918262958526611, "learning_rate": 1.2199486850545223e-06, "loss": 0.7907, "step": 63035 }, { "epoch": 0.7682839140555494, "grad_norm": 1.826700210571289, "learning_rate": 1.2196279666452855e-06, "loss": 0.7591, "step": 63040 }, { "epoch": 0.7683448502796972, "grad_norm": 1.8974171876907349, "learning_rate": 1.2193072482360488e-06, "loss": 0.8287, "step": 63045 }, { "epoch": 0.7684057865038451, "grad_norm": 1.5477111339569092, "learning_rate": 1.2189865298268122e-06, "loss": 0.7865, "step": 63050 }, { "epoch": 0.7684667227279929, "grad_norm": 1.9565777778625488, "learning_rate": 1.2186658114175754e-06, "loss": 0.7758, "step": 63055 }, { "epoch": 0.7685276589521407, "grad_norm": 1.9444459676742554, "learning_rate": 1.2183450930083387e-06, "loss": 0.7851, "step": 63060 }, { "epoch": 0.7685885951762885, "grad_norm": 2.1199216842651367, "learning_rate": 1.218024374599102e-06, "loss": 0.835, "step": 63065 }, { "epoch": 0.7686495314004363, "grad_norm": 2.183352470397949, "learning_rate": 1.2177036561898655e-06, "loss": 0.8223, "step": 63070 }, { "epoch": 0.7687104676245841, "grad_norm": 2.2661495208740234, "learning_rate": 1.2173829377806288e-06, "loss": 0.7949, "step": 63075 }, { "epoch": 0.7687714038487319, "grad_norm": 2.054393768310547, "learning_rate": 1.217062219371392e-06, "loss": 0.8233, "step": 63080 }, { "epoch": 0.7688323400728797, "grad_norm": 1.947589635848999, "learning_rate": 1.2167415009621552e-06, "loss": 0.7197, "step": 63085 }, { "epoch": 0.7688932762970275, "grad_norm": 2.2390313148498535, "learning_rate": 1.2164207825529187e-06, "loss": 0.8194, "step": 63090 }, { "epoch": 0.7689542125211754, "grad_norm": 2.251286029815674, "learning_rate": 1.2161000641436819e-06, "loss": 0.8582, "step": 63095 }, { "epoch": 0.7690151487453232, "grad_norm": 1.9611629247665405, "learning_rate": 1.2157793457344451e-06, "loss": 0.8142, "step": 63100 }, { "epoch": 0.769076084969471, "grad_norm": 2.0334296226501465, "learning_rate": 1.2154586273252086e-06, "loss": 0.8902, "step": 63105 }, { "epoch": 0.7691370211936187, "grad_norm": 1.8954840898513794, "learning_rate": 1.215137908915972e-06, "loss": 0.8939, "step": 63110 }, { "epoch": 0.7691979574177665, "grad_norm": 2.8595082759857178, "learning_rate": 1.2148171905067352e-06, "loss": 0.7312, "step": 63115 }, { "epoch": 0.7692588936419144, "grad_norm": 1.9615886211395264, "learning_rate": 1.2144964720974985e-06, "loss": 0.8389, "step": 63120 }, { "epoch": 0.7693198298660622, "grad_norm": 2.0529325008392334, "learning_rate": 1.2141757536882617e-06, "loss": 0.7698, "step": 63125 }, { "epoch": 0.76938076609021, "grad_norm": 1.9608726501464844, "learning_rate": 1.2138550352790251e-06, "loss": 0.8168, "step": 63130 }, { "epoch": 0.7694417023143578, "grad_norm": 2.0648391246795654, "learning_rate": 1.2135343168697884e-06, "loss": 0.7711, "step": 63135 }, { "epoch": 0.7695026385385056, "grad_norm": 1.864972710609436, "learning_rate": 1.2132135984605518e-06, "loss": 0.8882, "step": 63140 }, { "epoch": 0.7695635747626534, "grad_norm": 1.9391088485717773, "learning_rate": 1.212892880051315e-06, "loss": 0.7835, "step": 63145 }, { "epoch": 0.7696245109868012, "grad_norm": 2.192641019821167, "learning_rate": 1.2125721616420785e-06, "loss": 0.7647, "step": 63150 }, { "epoch": 0.769685447210949, "grad_norm": 1.8683013916015625, "learning_rate": 1.2122514432328417e-06, "loss": 0.8674, "step": 63155 }, { "epoch": 0.7697463834350968, "grad_norm": 1.9692559242248535, "learning_rate": 1.211930724823605e-06, "loss": 0.8469, "step": 63160 }, { "epoch": 0.7698073196592446, "grad_norm": 1.9248610734939575, "learning_rate": 1.2116100064143684e-06, "loss": 0.8313, "step": 63165 }, { "epoch": 0.7698682558833925, "grad_norm": 2.082028388977051, "learning_rate": 1.2112892880051316e-06, "loss": 0.8219, "step": 63170 }, { "epoch": 0.7699291921075403, "grad_norm": 1.862497091293335, "learning_rate": 1.2109685695958948e-06, "loss": 0.8237, "step": 63175 }, { "epoch": 0.769990128331688, "grad_norm": 1.9845925569534302, "learning_rate": 1.2106478511866583e-06, "loss": 0.7934, "step": 63180 }, { "epoch": 0.7700510645558358, "grad_norm": 1.8687446117401123, "learning_rate": 1.2103271327774215e-06, "loss": 0.7271, "step": 63185 }, { "epoch": 0.7701120007799837, "grad_norm": 2.0549373626708984, "learning_rate": 1.210006414368185e-06, "loss": 0.7234, "step": 63190 }, { "epoch": 0.7701729370041315, "grad_norm": 1.7680639028549194, "learning_rate": 1.2096856959589482e-06, "loss": 0.8095, "step": 63195 }, { "epoch": 0.7702338732282793, "grad_norm": 2.0625081062316895, "learning_rate": 1.2093649775497114e-06, "loss": 0.8666, "step": 63200 }, { "epoch": 0.7702948094524271, "grad_norm": 1.840359091758728, "learning_rate": 1.2090442591404748e-06, "loss": 0.7566, "step": 63205 }, { "epoch": 0.7703557456765749, "grad_norm": 1.775303840637207, "learning_rate": 1.208723540731238e-06, "loss": 0.8159, "step": 63210 }, { "epoch": 0.7704166819007227, "grad_norm": 1.8597183227539062, "learning_rate": 1.2084028223220013e-06, "loss": 0.7803, "step": 63215 }, { "epoch": 0.7704776181248705, "grad_norm": 1.7133985757827759, "learning_rate": 1.2080821039127647e-06, "loss": 0.8583, "step": 63220 }, { "epoch": 0.7705385543490183, "grad_norm": 1.9878010749816895, "learning_rate": 1.2077613855035282e-06, "loss": 0.8038, "step": 63225 }, { "epoch": 0.7705994905731661, "grad_norm": 2.113149881362915, "learning_rate": 1.2074406670942914e-06, "loss": 0.8455, "step": 63230 }, { "epoch": 0.770660426797314, "grad_norm": 1.954186201095581, "learning_rate": 1.2071199486850546e-06, "loss": 0.7697, "step": 63235 }, { "epoch": 0.7707213630214618, "grad_norm": 2.066807985305786, "learning_rate": 1.2067992302758178e-06, "loss": 0.746, "step": 63240 }, { "epoch": 0.7707822992456096, "grad_norm": 2.1919305324554443, "learning_rate": 1.2064785118665813e-06, "loss": 0.8595, "step": 63245 }, { "epoch": 0.7708432354697573, "grad_norm": 2.002659559249878, "learning_rate": 1.2061577934573445e-06, "loss": 0.8187, "step": 63250 }, { "epoch": 0.7709041716939051, "grad_norm": 2.0005781650543213, "learning_rate": 1.2058370750481077e-06, "loss": 0.7503, "step": 63255 }, { "epoch": 0.770965107918053, "grad_norm": 2.0820200443267822, "learning_rate": 1.2055163566388712e-06, "loss": 0.8701, "step": 63260 }, { "epoch": 0.7710260441422008, "grad_norm": 2.4095070362091064, "learning_rate": 1.2051956382296346e-06, "loss": 0.7842, "step": 63265 }, { "epoch": 0.7710869803663486, "grad_norm": 1.9130958318710327, "learning_rate": 1.2048749198203979e-06, "loss": 0.7972, "step": 63270 }, { "epoch": 0.7711479165904964, "grad_norm": 1.9373960494995117, "learning_rate": 1.204554201411161e-06, "loss": 0.7569, "step": 63275 }, { "epoch": 0.7712088528146442, "grad_norm": 2.3740413188934326, "learning_rate": 1.2042334830019243e-06, "loss": 0.7865, "step": 63280 }, { "epoch": 0.771269789038792, "grad_norm": 2.0136611461639404, "learning_rate": 1.2039127645926878e-06, "loss": 0.8283, "step": 63285 }, { "epoch": 0.7713307252629398, "grad_norm": 2.2114317417144775, "learning_rate": 1.203592046183451e-06, "loss": 0.7864, "step": 63290 }, { "epoch": 0.7713916614870876, "grad_norm": 1.9111793041229248, "learning_rate": 1.2032713277742144e-06, "loss": 0.8524, "step": 63295 }, { "epoch": 0.7714525977112354, "grad_norm": 1.7679507732391357, "learning_rate": 1.2029506093649777e-06, "loss": 0.8033, "step": 63300 }, { "epoch": 0.7715135339353832, "grad_norm": 2.0406653881073, "learning_rate": 1.202629890955741e-06, "loss": 0.8458, "step": 63305 }, { "epoch": 0.7715744701595311, "grad_norm": 2.1997809410095215, "learning_rate": 1.2023091725465043e-06, "loss": 0.8112, "step": 63310 }, { "epoch": 0.7716354063836789, "grad_norm": 2.3314664363861084, "learning_rate": 1.2019884541372675e-06, "loss": 0.8563, "step": 63315 }, { "epoch": 0.7716963426078266, "grad_norm": 1.9406956434249878, "learning_rate": 1.2016677357280308e-06, "loss": 0.8478, "step": 63320 }, { "epoch": 0.7717572788319744, "grad_norm": 1.9465258121490479, "learning_rate": 1.2013470173187942e-06, "loss": 0.7926, "step": 63325 }, { "epoch": 0.7718182150561222, "grad_norm": 1.9319688081741333, "learning_rate": 1.2010262989095574e-06, "loss": 0.7835, "step": 63330 }, { "epoch": 0.7718791512802701, "grad_norm": 1.8768655061721802, "learning_rate": 1.2007055805003209e-06, "loss": 0.7847, "step": 63335 }, { "epoch": 0.7719400875044179, "grad_norm": 1.7831557989120483, "learning_rate": 1.2003848620910841e-06, "loss": 0.7447, "step": 63340 }, { "epoch": 0.7720010237285657, "grad_norm": 1.769439458847046, "learning_rate": 1.2000641436818476e-06, "loss": 0.8176, "step": 63345 }, { "epoch": 0.7720619599527135, "grad_norm": 2.0890419483184814, "learning_rate": 1.1997434252726108e-06, "loss": 0.859, "step": 63350 }, { "epoch": 0.7721228961768613, "grad_norm": 2.251889944076538, "learning_rate": 1.199422706863374e-06, "loss": 0.8154, "step": 63355 }, { "epoch": 0.7721838324010091, "grad_norm": 1.999668002128601, "learning_rate": 1.1991019884541372e-06, "loss": 0.8316, "step": 63360 }, { "epoch": 0.7722447686251569, "grad_norm": 1.8326157331466675, "learning_rate": 1.1987812700449007e-06, "loss": 0.7705, "step": 63365 }, { "epoch": 0.7723057048493047, "grad_norm": 1.9411487579345703, "learning_rate": 1.198460551635664e-06, "loss": 0.7623, "step": 63370 }, { "epoch": 0.7723666410734525, "grad_norm": 2.138369560241699, "learning_rate": 1.1981398332264273e-06, "loss": 0.8442, "step": 63375 }, { "epoch": 0.7724275772976004, "grad_norm": 1.9201329946517944, "learning_rate": 1.1978191148171906e-06, "loss": 0.8002, "step": 63380 }, { "epoch": 0.7724885135217482, "grad_norm": 2.1130712032318115, "learning_rate": 1.197498396407954e-06, "loss": 0.7881, "step": 63385 }, { "epoch": 0.7725494497458959, "grad_norm": 2.16257905960083, "learning_rate": 1.1971776779987172e-06, "loss": 0.8572, "step": 63390 }, { "epoch": 0.7726103859700437, "grad_norm": 1.9089957475662231, "learning_rate": 1.1968569595894805e-06, "loss": 0.7999, "step": 63395 }, { "epoch": 0.7726713221941915, "grad_norm": 1.9493192434310913, "learning_rate": 1.1965362411802437e-06, "loss": 0.819, "step": 63400 }, { "epoch": 0.7727322584183394, "grad_norm": 2.0680878162384033, "learning_rate": 1.1962155227710071e-06, "loss": 0.8248, "step": 63405 }, { "epoch": 0.7727931946424872, "grad_norm": 2.025334358215332, "learning_rate": 1.1958948043617704e-06, "loss": 0.8145, "step": 63410 }, { "epoch": 0.772854130866635, "grad_norm": 1.7975856065750122, "learning_rate": 1.1955740859525338e-06, "loss": 0.8704, "step": 63415 }, { "epoch": 0.7729150670907828, "grad_norm": 1.870237946510315, "learning_rate": 1.195253367543297e-06, "loss": 0.826, "step": 63420 }, { "epoch": 0.7729760033149305, "grad_norm": 1.792922854423523, "learning_rate": 1.1949326491340605e-06, "loss": 0.708, "step": 63425 }, { "epoch": 0.7730369395390784, "grad_norm": 2.0404698848724365, "learning_rate": 1.1946119307248237e-06, "loss": 0.8015, "step": 63430 }, { "epoch": 0.7730978757632262, "grad_norm": 2.091895580291748, "learning_rate": 1.194291212315587e-06, "loss": 0.8272, "step": 63435 }, { "epoch": 0.773158811987374, "grad_norm": 2.0817058086395264, "learning_rate": 1.1939704939063502e-06, "loss": 0.817, "step": 63440 }, { "epoch": 0.7732197482115218, "grad_norm": 1.7672474384307861, "learning_rate": 1.1936497754971136e-06, "loss": 0.7669, "step": 63445 }, { "epoch": 0.7732806844356697, "grad_norm": 2.3370494842529297, "learning_rate": 1.1933290570878768e-06, "loss": 0.8676, "step": 63450 }, { "epoch": 0.7733416206598175, "grad_norm": 1.9703770875930786, "learning_rate": 1.1930083386786403e-06, "loss": 0.7982, "step": 63455 }, { "epoch": 0.7734025568839652, "grad_norm": 1.9817211627960205, "learning_rate": 1.1926876202694037e-06, "loss": 0.7872, "step": 63460 }, { "epoch": 0.773463493108113, "grad_norm": 2.4780936241149902, "learning_rate": 1.192366901860167e-06, "loss": 0.7999, "step": 63465 }, { "epoch": 0.7735244293322608, "grad_norm": 2.2872025966644287, "learning_rate": 1.1920461834509302e-06, "loss": 0.791, "step": 63470 }, { "epoch": 0.7735853655564087, "grad_norm": 2.222482204437256, "learning_rate": 1.1917254650416934e-06, "loss": 0.8459, "step": 63475 }, { "epoch": 0.7736463017805565, "grad_norm": 1.880462408065796, "learning_rate": 1.1914047466324568e-06, "loss": 0.8755, "step": 63480 }, { "epoch": 0.7737072380047043, "grad_norm": 1.8776456117630005, "learning_rate": 1.19108402822322e-06, "loss": 0.8106, "step": 63485 }, { "epoch": 0.7737681742288521, "grad_norm": 2.9926860332489014, "learning_rate": 1.1907633098139835e-06, "loss": 0.8224, "step": 63490 }, { "epoch": 0.7738291104529998, "grad_norm": 1.7202082872390747, "learning_rate": 1.1904425914047467e-06, "loss": 0.7731, "step": 63495 }, { "epoch": 0.7738900466771477, "grad_norm": 1.8757500648498535, "learning_rate": 1.1901218729955102e-06, "loss": 0.8357, "step": 63500 }, { "epoch": 0.7739509829012955, "grad_norm": 1.9934091567993164, "learning_rate": 1.1898011545862734e-06, "loss": 0.8421, "step": 63505 }, { "epoch": 0.7740119191254433, "grad_norm": 1.9906842708587646, "learning_rate": 1.1894804361770366e-06, "loss": 0.8338, "step": 63510 }, { "epoch": 0.7740728553495911, "grad_norm": 2.5699665546417236, "learning_rate": 1.1891597177677999e-06, "loss": 0.8478, "step": 63515 }, { "epoch": 0.774133791573739, "grad_norm": 2.0096535682678223, "learning_rate": 1.1888389993585633e-06, "loss": 0.8787, "step": 63520 }, { "epoch": 0.7741947277978868, "grad_norm": 1.9836452007293701, "learning_rate": 1.1885182809493265e-06, "loss": 0.8494, "step": 63525 }, { "epoch": 0.7742556640220345, "grad_norm": 1.7415608167648315, "learning_rate": 1.18819756254009e-06, "loss": 0.7918, "step": 63530 }, { "epoch": 0.7743166002461823, "grad_norm": 2.0467660427093506, "learning_rate": 1.1878768441308532e-06, "loss": 0.7752, "step": 63535 }, { "epoch": 0.7743775364703301, "grad_norm": 2.6041738986968994, "learning_rate": 1.1875561257216166e-06, "loss": 0.8345, "step": 63540 }, { "epoch": 0.774438472694478, "grad_norm": 3.1370859146118164, "learning_rate": 1.1872354073123799e-06, "loss": 0.7093, "step": 63545 }, { "epoch": 0.7744994089186258, "grad_norm": 2.0015077590942383, "learning_rate": 1.186914688903143e-06, "loss": 0.8446, "step": 63550 }, { "epoch": 0.7745603451427736, "grad_norm": 1.7577451467514038, "learning_rate": 1.1865939704939063e-06, "loss": 0.773, "step": 63555 }, { "epoch": 0.7746212813669214, "grad_norm": 1.7972792387008667, "learning_rate": 1.1862732520846698e-06, "loss": 0.7418, "step": 63560 }, { "epoch": 0.7746822175910691, "grad_norm": 2.3283655643463135, "learning_rate": 1.185952533675433e-06, "loss": 0.8177, "step": 63565 }, { "epoch": 0.774743153815217, "grad_norm": 1.8796498775482178, "learning_rate": 1.1856318152661964e-06, "loss": 0.8511, "step": 63570 }, { "epoch": 0.7748040900393648, "grad_norm": 1.7604376077651978, "learning_rate": 1.1853110968569597e-06, "loss": 0.8369, "step": 63575 }, { "epoch": 0.7748650262635126, "grad_norm": 1.9352455139160156, "learning_rate": 1.1849903784477231e-06, "loss": 0.8392, "step": 63580 }, { "epoch": 0.7749259624876604, "grad_norm": 1.9009368419647217, "learning_rate": 1.1846696600384863e-06, "loss": 0.8462, "step": 63585 }, { "epoch": 0.7749868987118083, "grad_norm": 2.0602214336395264, "learning_rate": 1.1843489416292496e-06, "loss": 0.8261, "step": 63590 }, { "epoch": 0.7750478349359561, "grad_norm": 2.0802457332611084, "learning_rate": 1.1840282232200128e-06, "loss": 0.8517, "step": 63595 }, { "epoch": 0.7751087711601038, "grad_norm": 2.016843318939209, "learning_rate": 1.1837075048107762e-06, "loss": 0.8429, "step": 63600 }, { "epoch": 0.7751697073842516, "grad_norm": 1.5742191076278687, "learning_rate": 1.1833867864015395e-06, "loss": 0.8142, "step": 63605 }, { "epoch": 0.7752306436083994, "grad_norm": 1.8268821239471436, "learning_rate": 1.183066067992303e-06, "loss": 0.8218, "step": 63610 }, { "epoch": 0.7752915798325473, "grad_norm": 1.9288195371627808, "learning_rate": 1.1827453495830661e-06, "loss": 0.8571, "step": 63615 }, { "epoch": 0.7753525160566951, "grad_norm": 1.8453449010849, "learning_rate": 1.1824246311738296e-06, "loss": 0.8161, "step": 63620 }, { "epoch": 0.7754134522808429, "grad_norm": 2.0470783710479736, "learning_rate": 1.1821039127645928e-06, "loss": 0.804, "step": 63625 }, { "epoch": 0.7754743885049907, "grad_norm": 2.135525941848755, "learning_rate": 1.181783194355356e-06, "loss": 0.8608, "step": 63630 }, { "epoch": 0.7755353247291384, "grad_norm": 2.0399842262268066, "learning_rate": 1.1814624759461193e-06, "loss": 0.7967, "step": 63635 }, { "epoch": 0.7755962609532863, "grad_norm": 1.8998754024505615, "learning_rate": 1.1811417575368827e-06, "loss": 0.8059, "step": 63640 }, { "epoch": 0.7756571971774341, "grad_norm": 2.217257022857666, "learning_rate": 1.1808210391276461e-06, "loss": 0.7936, "step": 63645 }, { "epoch": 0.7757181334015819, "grad_norm": 2.087864637374878, "learning_rate": 1.1805003207184094e-06, "loss": 0.8282, "step": 63650 }, { "epoch": 0.7757790696257297, "grad_norm": 2.067901134490967, "learning_rate": 1.1801796023091726e-06, "loss": 0.7737, "step": 63655 }, { "epoch": 0.7758400058498776, "grad_norm": 2.169952630996704, "learning_rate": 1.179858883899936e-06, "loss": 0.8359, "step": 63660 }, { "epoch": 0.7759009420740254, "grad_norm": 1.9874087572097778, "learning_rate": 1.1795381654906993e-06, "loss": 0.8311, "step": 63665 }, { "epoch": 0.7759618782981731, "grad_norm": 2.0199005603790283, "learning_rate": 1.1792174470814625e-06, "loss": 0.8559, "step": 63670 }, { "epoch": 0.7760228145223209, "grad_norm": 1.887294054031372, "learning_rate": 1.1788967286722257e-06, "loss": 0.828, "step": 63675 }, { "epoch": 0.7760837507464687, "grad_norm": 2.193141222000122, "learning_rate": 1.1785760102629892e-06, "loss": 0.7779, "step": 63680 }, { "epoch": 0.7761446869706166, "grad_norm": 2.097205877304077, "learning_rate": 1.1782552918537526e-06, "loss": 0.8164, "step": 63685 }, { "epoch": 0.7762056231947644, "grad_norm": 1.8216360807418823, "learning_rate": 1.1779345734445158e-06, "loss": 0.845, "step": 63690 }, { "epoch": 0.7762665594189122, "grad_norm": 2.002842426300049, "learning_rate": 1.177613855035279e-06, "loss": 0.7622, "step": 63695 }, { "epoch": 0.77632749564306, "grad_norm": 2.056058168411255, "learning_rate": 1.1772931366260425e-06, "loss": 0.8531, "step": 63700 }, { "epoch": 0.7763884318672077, "grad_norm": 2.1486525535583496, "learning_rate": 1.1769724182168057e-06, "loss": 0.8564, "step": 63705 }, { "epoch": 0.7764493680913556, "grad_norm": 1.6970889568328857, "learning_rate": 1.176651699807569e-06, "loss": 0.779, "step": 63710 }, { "epoch": 0.7765103043155034, "grad_norm": 1.7658876180648804, "learning_rate": 1.1763309813983324e-06, "loss": 0.9507, "step": 63715 }, { "epoch": 0.7765712405396512, "grad_norm": 2.036979913711548, "learning_rate": 1.1760102629890956e-06, "loss": 0.8672, "step": 63720 }, { "epoch": 0.776632176763799, "grad_norm": 1.9634928703308105, "learning_rate": 1.175689544579859e-06, "loss": 0.7642, "step": 63725 }, { "epoch": 0.7766931129879469, "grad_norm": 2.2583699226379395, "learning_rate": 1.1753688261706223e-06, "loss": 0.8265, "step": 63730 }, { "epoch": 0.7767540492120947, "grad_norm": 1.8573517799377441, "learning_rate": 1.1750481077613855e-06, "loss": 0.7689, "step": 63735 }, { "epoch": 0.7768149854362424, "grad_norm": 1.8271437883377075, "learning_rate": 1.174727389352149e-06, "loss": 0.8215, "step": 63740 }, { "epoch": 0.7768759216603902, "grad_norm": 2.000833511352539, "learning_rate": 1.1744066709429122e-06, "loss": 0.8208, "step": 63745 }, { "epoch": 0.776936857884538, "grad_norm": 1.9971498250961304, "learning_rate": 1.1740859525336754e-06, "loss": 0.7708, "step": 63750 }, { "epoch": 0.7769977941086859, "grad_norm": 1.9187383651733398, "learning_rate": 1.1737652341244389e-06, "loss": 0.8275, "step": 63755 }, { "epoch": 0.7770587303328337, "grad_norm": 2.035543918609619, "learning_rate": 1.173444515715202e-06, "loss": 0.8769, "step": 63760 }, { "epoch": 0.7771196665569815, "grad_norm": 1.78867769241333, "learning_rate": 1.1731237973059655e-06, "loss": 0.7827, "step": 63765 }, { "epoch": 0.7771806027811292, "grad_norm": 1.7038496732711792, "learning_rate": 1.1728030788967288e-06, "loss": 0.8027, "step": 63770 }, { "epoch": 0.777241539005277, "grad_norm": 2.2374050617218018, "learning_rate": 1.1724823604874922e-06, "loss": 0.7921, "step": 63775 }, { "epoch": 0.7773024752294249, "grad_norm": 1.9105056524276733, "learning_rate": 1.1721616420782554e-06, "loss": 0.7978, "step": 63780 }, { "epoch": 0.7773634114535727, "grad_norm": 1.6839040517807007, "learning_rate": 1.1718409236690187e-06, "loss": 0.7933, "step": 63785 }, { "epoch": 0.7774243476777205, "grad_norm": 1.9018995761871338, "learning_rate": 1.1715202052597819e-06, "loss": 0.8193, "step": 63790 }, { "epoch": 0.7774852839018683, "grad_norm": 1.9619512557983398, "learning_rate": 1.1711994868505453e-06, "loss": 0.844, "step": 63795 }, { "epoch": 0.7775462201260162, "grad_norm": 2.0910744667053223, "learning_rate": 1.1708787684413086e-06, "loss": 0.8335, "step": 63800 }, { "epoch": 0.7776071563501639, "grad_norm": 2.52006196975708, "learning_rate": 1.170558050032072e-06, "loss": 0.8973, "step": 63805 }, { "epoch": 0.7776680925743117, "grad_norm": 1.731614589691162, "learning_rate": 1.1702373316228352e-06, "loss": 0.7944, "step": 63810 }, { "epoch": 0.7777290287984595, "grad_norm": 2.0026779174804688, "learning_rate": 1.1699166132135987e-06, "loss": 0.8641, "step": 63815 }, { "epoch": 0.7777899650226073, "grad_norm": 2.091434955596924, "learning_rate": 1.1695958948043619e-06, "loss": 0.8471, "step": 63820 }, { "epoch": 0.7778509012467552, "grad_norm": 2.19887638092041, "learning_rate": 1.1692751763951251e-06, "loss": 0.7666, "step": 63825 }, { "epoch": 0.777911837470903, "grad_norm": 2.0533547401428223, "learning_rate": 1.1689544579858883e-06, "loss": 0.8047, "step": 63830 }, { "epoch": 0.7779727736950508, "grad_norm": 1.9866217374801636, "learning_rate": 1.1686337395766518e-06, "loss": 0.7923, "step": 63835 }, { "epoch": 0.7780337099191985, "grad_norm": 2.333073854446411, "learning_rate": 1.1683130211674152e-06, "loss": 0.8038, "step": 63840 }, { "epoch": 0.7780946461433463, "grad_norm": 1.8171418905258179, "learning_rate": 1.1679923027581785e-06, "loss": 0.8789, "step": 63845 }, { "epoch": 0.7781555823674942, "grad_norm": 2.016625165939331, "learning_rate": 1.1676715843489417e-06, "loss": 0.764, "step": 63850 }, { "epoch": 0.778216518591642, "grad_norm": 2.1763663291931152, "learning_rate": 1.1673508659397051e-06, "loss": 0.8165, "step": 63855 }, { "epoch": 0.7782774548157898, "grad_norm": 2.0206947326660156, "learning_rate": 1.1670301475304684e-06, "loss": 0.8541, "step": 63860 }, { "epoch": 0.7783383910399376, "grad_norm": 1.7691004276275635, "learning_rate": 1.1667094291212316e-06, "loss": 0.8209, "step": 63865 }, { "epoch": 0.7783993272640854, "grad_norm": 2.512422800064087, "learning_rate": 1.166388710711995e-06, "loss": 0.8341, "step": 63870 }, { "epoch": 0.7784602634882332, "grad_norm": 2.222536325454712, "learning_rate": 1.1660679923027582e-06, "loss": 0.8393, "step": 63875 }, { "epoch": 0.778521199712381, "grad_norm": 2.081597089767456, "learning_rate": 1.1657472738935217e-06, "loss": 0.8147, "step": 63880 }, { "epoch": 0.7785821359365288, "grad_norm": 2.0557000637054443, "learning_rate": 1.165426555484285e-06, "loss": 0.8142, "step": 63885 }, { "epoch": 0.7786430721606766, "grad_norm": 1.7142571210861206, "learning_rate": 1.1651058370750481e-06, "loss": 0.7962, "step": 63890 }, { "epoch": 0.7787040083848245, "grad_norm": 2.06538987159729, "learning_rate": 1.1647851186658116e-06, "loss": 0.8588, "step": 63895 }, { "epoch": 0.7787649446089723, "grad_norm": 2.085949182510376, "learning_rate": 1.1644644002565748e-06, "loss": 0.874, "step": 63900 }, { "epoch": 0.7788258808331201, "grad_norm": 2.028041362762451, "learning_rate": 1.164143681847338e-06, "loss": 0.8387, "step": 63905 }, { "epoch": 0.7788868170572678, "grad_norm": 2.279130220413208, "learning_rate": 1.1638229634381015e-06, "loss": 0.7728, "step": 63910 }, { "epoch": 0.7789477532814156, "grad_norm": 2.149240016937256, "learning_rate": 1.1635022450288647e-06, "loss": 0.8603, "step": 63915 }, { "epoch": 0.7790086895055635, "grad_norm": 2.0590286254882812, "learning_rate": 1.1631815266196282e-06, "loss": 0.7484, "step": 63920 }, { "epoch": 0.7790696257297113, "grad_norm": 1.9587929248809814, "learning_rate": 1.1628608082103914e-06, "loss": 0.8438, "step": 63925 }, { "epoch": 0.7791305619538591, "grad_norm": 1.9332166910171509, "learning_rate": 1.1625400898011546e-06, "loss": 0.7812, "step": 63930 }, { "epoch": 0.7791914981780069, "grad_norm": 2.157280206680298, "learning_rate": 1.162219371391918e-06, "loss": 0.79, "step": 63935 }, { "epoch": 0.7792524344021547, "grad_norm": 1.9698233604431152, "learning_rate": 1.1618986529826813e-06, "loss": 0.8195, "step": 63940 }, { "epoch": 0.7793133706263025, "grad_norm": 2.0847384929656982, "learning_rate": 1.1615779345734445e-06, "loss": 0.7771, "step": 63945 }, { "epoch": 0.7793743068504503, "grad_norm": 1.799703598022461, "learning_rate": 1.161257216164208e-06, "loss": 0.8364, "step": 63950 }, { "epoch": 0.7794352430745981, "grad_norm": 2.021421432495117, "learning_rate": 1.1609364977549712e-06, "loss": 0.7604, "step": 63955 }, { "epoch": 0.7794961792987459, "grad_norm": 1.691784143447876, "learning_rate": 1.1606157793457346e-06, "loss": 0.8353, "step": 63960 }, { "epoch": 0.7795571155228938, "grad_norm": 1.8871814012527466, "learning_rate": 1.1602950609364978e-06, "loss": 0.8427, "step": 63965 }, { "epoch": 0.7796180517470416, "grad_norm": 1.6667158603668213, "learning_rate": 1.159974342527261e-06, "loss": 0.7647, "step": 63970 }, { "epoch": 0.7796789879711894, "grad_norm": 1.8772259950637817, "learning_rate": 1.1596536241180245e-06, "loss": 0.8251, "step": 63975 }, { "epoch": 0.7797399241953371, "grad_norm": 2.122783660888672, "learning_rate": 1.1593329057087877e-06, "loss": 0.7856, "step": 63980 }, { "epoch": 0.7798008604194849, "grad_norm": 2.067023992538452, "learning_rate": 1.159012187299551e-06, "loss": 0.8064, "step": 63985 }, { "epoch": 0.7798617966436328, "grad_norm": 1.762417197227478, "learning_rate": 1.1586914688903144e-06, "loss": 0.7879, "step": 63990 }, { "epoch": 0.7799227328677806, "grad_norm": 2.1438066959381104, "learning_rate": 1.1583707504810779e-06, "loss": 0.8218, "step": 63995 }, { "epoch": 0.7799836690919284, "grad_norm": 2.0851640701293945, "learning_rate": 1.158050032071841e-06, "loss": 0.8123, "step": 64000 }, { "epoch": 0.7800446053160762, "grad_norm": 1.722098708152771, "learning_rate": 1.1577293136626043e-06, "loss": 0.7744, "step": 64005 }, { "epoch": 0.780105541540224, "grad_norm": 2.454904317855835, "learning_rate": 1.1574085952533675e-06, "loss": 0.801, "step": 64010 }, { "epoch": 0.7801664777643718, "grad_norm": 2.322211265563965, "learning_rate": 1.157087876844131e-06, "loss": 0.7951, "step": 64015 }, { "epoch": 0.7802274139885196, "grad_norm": 1.962802767753601, "learning_rate": 1.1567671584348942e-06, "loss": 0.8359, "step": 64020 }, { "epoch": 0.7802883502126674, "grad_norm": 1.964829683303833, "learning_rate": 1.1564464400256574e-06, "loss": 0.7236, "step": 64025 }, { "epoch": 0.7803492864368152, "grad_norm": 1.8224252462387085, "learning_rate": 1.1561257216164209e-06, "loss": 0.7953, "step": 64030 }, { "epoch": 0.780410222660963, "grad_norm": 1.6244995594024658, "learning_rate": 1.1558050032071843e-06, "loss": 0.8169, "step": 64035 }, { "epoch": 0.7804711588851109, "grad_norm": 1.9406851530075073, "learning_rate": 1.1554842847979475e-06, "loss": 0.8182, "step": 64040 }, { "epoch": 0.7805320951092587, "grad_norm": 2.342881917953491, "learning_rate": 1.1551635663887108e-06, "loss": 0.7312, "step": 64045 }, { "epoch": 0.7805930313334064, "grad_norm": 2.0418734550476074, "learning_rate": 1.1548428479794742e-06, "loss": 0.8259, "step": 64050 }, { "epoch": 0.7806539675575542, "grad_norm": 1.9515475034713745, "learning_rate": 1.1545221295702374e-06, "loss": 0.7836, "step": 64055 }, { "epoch": 0.780714903781702, "grad_norm": 1.7172795534133911, "learning_rate": 1.1542014111610007e-06, "loss": 0.8381, "step": 64060 }, { "epoch": 0.7807758400058499, "grad_norm": 2.2687554359436035, "learning_rate": 1.1538806927517641e-06, "loss": 0.8211, "step": 64065 }, { "epoch": 0.7808367762299977, "grad_norm": 1.975641131401062, "learning_rate": 1.1535599743425273e-06, "loss": 0.7711, "step": 64070 }, { "epoch": 0.7808977124541455, "grad_norm": 2.0057525634765625, "learning_rate": 1.1532392559332908e-06, "loss": 0.8051, "step": 64075 }, { "epoch": 0.7809586486782933, "grad_norm": 1.895702838897705, "learning_rate": 1.152918537524054e-06, "loss": 0.7798, "step": 64080 }, { "epoch": 0.781019584902441, "grad_norm": 1.9683948755264282, "learning_rate": 1.1525978191148172e-06, "loss": 0.8207, "step": 64085 }, { "epoch": 0.7810805211265889, "grad_norm": 2.5610523223876953, "learning_rate": 1.1522771007055807e-06, "loss": 0.8114, "step": 64090 }, { "epoch": 0.7811414573507367, "grad_norm": 1.8041234016418457, "learning_rate": 1.151956382296344e-06, "loss": 0.773, "step": 64095 }, { "epoch": 0.7812023935748845, "grad_norm": 1.9569849967956543, "learning_rate": 1.1516356638871071e-06, "loss": 0.866, "step": 64100 }, { "epoch": 0.7812633297990323, "grad_norm": 1.8701213598251343, "learning_rate": 1.1513149454778706e-06, "loss": 0.7992, "step": 64105 }, { "epoch": 0.7813242660231802, "grad_norm": 2.023794651031494, "learning_rate": 1.1509942270686338e-06, "loss": 0.8762, "step": 64110 }, { "epoch": 0.781385202247328, "grad_norm": 2.0188496112823486, "learning_rate": 1.1506735086593972e-06, "loss": 0.8312, "step": 64115 }, { "epoch": 0.7814461384714757, "grad_norm": 2.0859830379486084, "learning_rate": 1.1503527902501605e-06, "loss": 0.7824, "step": 64120 }, { "epoch": 0.7815070746956235, "grad_norm": 1.8451398611068726, "learning_rate": 1.1500320718409237e-06, "loss": 0.7872, "step": 64125 }, { "epoch": 0.7815680109197713, "grad_norm": 1.9103728532791138, "learning_rate": 1.1497113534316871e-06, "loss": 0.7819, "step": 64130 }, { "epoch": 0.7816289471439192, "grad_norm": 2.2342641353607178, "learning_rate": 1.1493906350224504e-06, "loss": 0.7991, "step": 64135 }, { "epoch": 0.781689883368067, "grad_norm": 1.7276161909103394, "learning_rate": 1.1490699166132136e-06, "loss": 0.7833, "step": 64140 }, { "epoch": 0.7817508195922148, "grad_norm": 2.23270845413208, "learning_rate": 1.148749198203977e-06, "loss": 0.8384, "step": 64145 }, { "epoch": 0.7818117558163626, "grad_norm": 1.8893996477127075, "learning_rate": 1.1484284797947403e-06, "loss": 0.7768, "step": 64150 }, { "epoch": 0.7818726920405104, "grad_norm": 2.204404592514038, "learning_rate": 1.1481077613855037e-06, "loss": 0.7931, "step": 64155 }, { "epoch": 0.7819336282646582, "grad_norm": 2.0529556274414062, "learning_rate": 1.147787042976267e-06, "loss": 0.7966, "step": 64160 }, { "epoch": 0.781994564488806, "grad_norm": 1.8456854820251465, "learning_rate": 1.1474663245670302e-06, "loss": 0.8218, "step": 64165 }, { "epoch": 0.7820555007129538, "grad_norm": 2.0462989807128906, "learning_rate": 1.1471456061577936e-06, "loss": 0.8337, "step": 64170 }, { "epoch": 0.7821164369371016, "grad_norm": 2.2579140663146973, "learning_rate": 1.1468248877485568e-06, "loss": 0.8418, "step": 64175 }, { "epoch": 0.7821773731612495, "grad_norm": 1.89915931224823, "learning_rate": 1.14650416933932e-06, "loss": 0.8512, "step": 64180 }, { "epoch": 0.7822383093853973, "grad_norm": 2.0102791786193848, "learning_rate": 1.1461834509300835e-06, "loss": 0.8027, "step": 64185 }, { "epoch": 0.782299245609545, "grad_norm": 1.8246276378631592, "learning_rate": 1.145862732520847e-06, "loss": 0.7965, "step": 64190 }, { "epoch": 0.7823601818336928, "grad_norm": 1.960755705833435, "learning_rate": 1.1455420141116102e-06, "loss": 0.8495, "step": 64195 }, { "epoch": 0.7824211180578406, "grad_norm": 2.0261881351470947, "learning_rate": 1.1452212957023734e-06, "loss": 0.8407, "step": 64200 }, { "epoch": 0.7824820542819885, "grad_norm": 2.096545934677124, "learning_rate": 1.1449005772931366e-06, "loss": 0.7955, "step": 64205 }, { "epoch": 0.7825429905061363, "grad_norm": 2.168631076812744, "learning_rate": 1.1445798588839e-06, "loss": 0.8512, "step": 64210 }, { "epoch": 0.7826039267302841, "grad_norm": 2.06229305267334, "learning_rate": 1.1442591404746633e-06, "loss": 0.8619, "step": 64215 }, { "epoch": 0.7826648629544319, "grad_norm": 2.3170626163482666, "learning_rate": 1.1439384220654265e-06, "loss": 0.8287, "step": 64220 }, { "epoch": 0.7827257991785797, "grad_norm": 1.6899802684783936, "learning_rate": 1.14361770365619e-06, "loss": 0.782, "step": 64225 }, { "epoch": 0.7827867354027275, "grad_norm": 1.893458366394043, "learning_rate": 1.1432969852469534e-06, "loss": 0.7172, "step": 64230 }, { "epoch": 0.7828476716268753, "grad_norm": 1.9312394857406616, "learning_rate": 1.1429762668377166e-06, "loss": 0.8062, "step": 64235 }, { "epoch": 0.7829086078510231, "grad_norm": 1.8052729368209839, "learning_rate": 1.1426555484284799e-06, "loss": 0.8116, "step": 64240 }, { "epoch": 0.7829695440751709, "grad_norm": 2.2895021438598633, "learning_rate": 1.142334830019243e-06, "loss": 0.8899, "step": 64245 }, { "epoch": 0.7830304802993188, "grad_norm": 2.736083745956421, "learning_rate": 1.1420141116100065e-06, "loss": 0.7879, "step": 64250 }, { "epoch": 0.7830914165234666, "grad_norm": 2.1454384326934814, "learning_rate": 1.1416933932007698e-06, "loss": 0.8354, "step": 64255 }, { "epoch": 0.7831523527476143, "grad_norm": 2.1462767124176025, "learning_rate": 1.1413726747915332e-06, "loss": 0.7214, "step": 64260 }, { "epoch": 0.7832132889717621, "grad_norm": 1.904726266860962, "learning_rate": 1.1410519563822964e-06, "loss": 0.7307, "step": 64265 }, { "epoch": 0.78327422519591, "grad_norm": 2.001378059387207, "learning_rate": 1.1407312379730599e-06, "loss": 0.84, "step": 64270 }, { "epoch": 0.7833351614200578, "grad_norm": 1.764668345451355, "learning_rate": 1.140410519563823e-06, "loss": 0.7946, "step": 64275 }, { "epoch": 0.7833960976442056, "grad_norm": 1.8031994104385376, "learning_rate": 1.1400898011545863e-06, "loss": 0.7772, "step": 64280 }, { "epoch": 0.7834570338683534, "grad_norm": 1.883245825767517, "learning_rate": 1.1397690827453496e-06, "loss": 0.8957, "step": 64285 }, { "epoch": 0.7835179700925012, "grad_norm": 1.7917760610580444, "learning_rate": 1.139448364336113e-06, "loss": 0.8384, "step": 64290 }, { "epoch": 0.783578906316649, "grad_norm": 2.009044647216797, "learning_rate": 1.1391276459268762e-06, "loss": 0.8351, "step": 64295 }, { "epoch": 0.7836398425407968, "grad_norm": 1.7819374799728394, "learning_rate": 1.1388069275176397e-06, "loss": 0.7912, "step": 64300 }, { "epoch": 0.7837007787649446, "grad_norm": 1.9234910011291504, "learning_rate": 1.1384862091084029e-06, "loss": 0.8219, "step": 64305 }, { "epoch": 0.7837617149890924, "grad_norm": 1.950361967086792, "learning_rate": 1.1381654906991663e-06, "loss": 0.8456, "step": 64310 }, { "epoch": 0.7838226512132402, "grad_norm": 2.057170867919922, "learning_rate": 1.1378447722899296e-06, "loss": 0.7817, "step": 64315 }, { "epoch": 0.7838835874373881, "grad_norm": 3.3161964416503906, "learning_rate": 1.1375240538806928e-06, "loss": 0.8187, "step": 64320 }, { "epoch": 0.7839445236615359, "grad_norm": 1.8404021263122559, "learning_rate": 1.137203335471456e-06, "loss": 0.7646, "step": 64325 }, { "epoch": 0.7840054598856836, "grad_norm": 2.240664482116699, "learning_rate": 1.1368826170622195e-06, "loss": 0.8539, "step": 64330 }, { "epoch": 0.7840663961098314, "grad_norm": 1.8296135663986206, "learning_rate": 1.1365618986529827e-06, "loss": 0.7917, "step": 64335 }, { "epoch": 0.7841273323339792, "grad_norm": 1.7372533082962036, "learning_rate": 1.1362411802437461e-06, "loss": 0.9085, "step": 64340 }, { "epoch": 0.7841882685581271, "grad_norm": 1.7137646675109863, "learning_rate": 1.1359204618345094e-06, "loss": 0.7862, "step": 64345 }, { "epoch": 0.7842492047822749, "grad_norm": 2.1936707496643066, "learning_rate": 1.1355997434252728e-06, "loss": 0.7779, "step": 64350 }, { "epoch": 0.7843101410064227, "grad_norm": 1.9582762718200684, "learning_rate": 1.135279025016036e-06, "loss": 0.8033, "step": 64355 }, { "epoch": 0.7843710772305705, "grad_norm": 2.171388864517212, "learning_rate": 1.1349583066067993e-06, "loss": 0.8426, "step": 64360 }, { "epoch": 0.7844320134547182, "grad_norm": 1.6566836833953857, "learning_rate": 1.1346375881975627e-06, "loss": 0.7641, "step": 64365 }, { "epoch": 0.7844929496788661, "grad_norm": 1.8637771606445312, "learning_rate": 1.134316869788326e-06, "loss": 0.8142, "step": 64370 }, { "epoch": 0.7845538859030139, "grad_norm": 1.4799026250839233, "learning_rate": 1.1339961513790891e-06, "loss": 0.7679, "step": 64375 }, { "epoch": 0.7846148221271617, "grad_norm": 2.118497848510742, "learning_rate": 1.1336754329698526e-06, "loss": 0.7514, "step": 64380 }, { "epoch": 0.7846757583513095, "grad_norm": 2.5019986629486084, "learning_rate": 1.133354714560616e-06, "loss": 0.7402, "step": 64385 }, { "epoch": 0.7847366945754574, "grad_norm": 1.903901219367981, "learning_rate": 1.1330339961513793e-06, "loss": 0.7877, "step": 64390 }, { "epoch": 0.7847976307996052, "grad_norm": 2.2200543880462646, "learning_rate": 1.1327132777421425e-06, "loss": 0.8936, "step": 64395 }, { "epoch": 0.7848585670237529, "grad_norm": 1.7379717826843262, "learning_rate": 1.1323925593329057e-06, "loss": 0.853, "step": 64400 }, { "epoch": 0.7849195032479007, "grad_norm": 2.3687222003936768, "learning_rate": 1.1320718409236692e-06, "loss": 0.7783, "step": 64405 }, { "epoch": 0.7849804394720485, "grad_norm": 1.8296750783920288, "learning_rate": 1.1317511225144324e-06, "loss": 0.7951, "step": 64410 }, { "epoch": 0.7850413756961964, "grad_norm": 2.341400623321533, "learning_rate": 1.1314304041051958e-06, "loss": 0.8139, "step": 64415 }, { "epoch": 0.7851023119203442, "grad_norm": 2.0028858184814453, "learning_rate": 1.131109685695959e-06, "loss": 0.7455, "step": 64420 }, { "epoch": 0.785163248144492, "grad_norm": 2.0199198722839355, "learning_rate": 1.1307889672867225e-06, "loss": 0.855, "step": 64425 }, { "epoch": 0.7852241843686398, "grad_norm": 1.8021409511566162, "learning_rate": 1.1304682488774857e-06, "loss": 0.9106, "step": 64430 }, { "epoch": 0.7852851205927875, "grad_norm": 1.7375874519348145, "learning_rate": 1.130147530468249e-06, "loss": 0.7736, "step": 64435 }, { "epoch": 0.7853460568169354, "grad_norm": 1.7676842212677002, "learning_rate": 1.1298268120590122e-06, "loss": 0.7146, "step": 64440 }, { "epoch": 0.7854069930410832, "grad_norm": 1.9702743291854858, "learning_rate": 1.1295060936497756e-06, "loss": 0.8633, "step": 64445 }, { "epoch": 0.785467929265231, "grad_norm": 1.9352537393569946, "learning_rate": 1.1291853752405388e-06, "loss": 0.841, "step": 64450 }, { "epoch": 0.7855288654893788, "grad_norm": 2.009829044342041, "learning_rate": 1.1288646568313023e-06, "loss": 0.8363, "step": 64455 }, { "epoch": 0.7855898017135267, "grad_norm": 2.2329976558685303, "learning_rate": 1.1285439384220655e-06, "loss": 0.86, "step": 64460 }, { "epoch": 0.7856507379376745, "grad_norm": 1.9570528268814087, "learning_rate": 1.128223220012829e-06, "loss": 0.8156, "step": 64465 }, { "epoch": 0.7857116741618222, "grad_norm": 2.126298666000366, "learning_rate": 1.1279025016035922e-06, "loss": 0.7761, "step": 64470 }, { "epoch": 0.78577261038597, "grad_norm": 2.1370620727539062, "learning_rate": 1.1275817831943554e-06, "loss": 0.7939, "step": 64475 }, { "epoch": 0.7858335466101178, "grad_norm": 2.5105223655700684, "learning_rate": 1.1272610647851186e-06, "loss": 0.7968, "step": 64480 }, { "epoch": 0.7858944828342657, "grad_norm": 1.911975622177124, "learning_rate": 1.126940346375882e-06, "loss": 0.8064, "step": 64485 }, { "epoch": 0.7859554190584135, "grad_norm": 1.9962434768676758, "learning_rate": 1.1266196279666453e-06, "loss": 0.7648, "step": 64490 }, { "epoch": 0.7860163552825613, "grad_norm": 2.4746897220611572, "learning_rate": 1.1262989095574088e-06, "loss": 0.8503, "step": 64495 }, { "epoch": 0.7860772915067091, "grad_norm": 1.8312363624572754, "learning_rate": 1.125978191148172e-06, "loss": 0.8512, "step": 64500 }, { "epoch": 0.7861382277308568, "grad_norm": 2.1723473072052, "learning_rate": 1.1256574727389354e-06, "loss": 0.7911, "step": 64505 }, { "epoch": 0.7861991639550047, "grad_norm": 2.0570180416107178, "learning_rate": 1.1253367543296986e-06, "loss": 0.7297, "step": 64510 }, { "epoch": 0.7862601001791525, "grad_norm": 1.7970956563949585, "learning_rate": 1.1250160359204619e-06, "loss": 0.7645, "step": 64515 }, { "epoch": 0.7863210364033003, "grad_norm": 2.036375045776367, "learning_rate": 1.1246953175112251e-06, "loss": 0.8124, "step": 64520 }, { "epoch": 0.7863819726274481, "grad_norm": 1.8049052953720093, "learning_rate": 1.1243745991019885e-06, "loss": 0.8212, "step": 64525 }, { "epoch": 0.786442908851596, "grad_norm": 1.8683254718780518, "learning_rate": 1.1240538806927518e-06, "loss": 0.8573, "step": 64530 }, { "epoch": 0.7865038450757438, "grad_norm": 1.8792402744293213, "learning_rate": 1.1237331622835152e-06, "loss": 0.8409, "step": 64535 }, { "epoch": 0.7865647812998915, "grad_norm": 1.992836833000183, "learning_rate": 1.1234124438742784e-06, "loss": 0.7767, "step": 64540 }, { "epoch": 0.7866257175240393, "grad_norm": 2.0772926807403564, "learning_rate": 1.1230917254650419e-06, "loss": 0.7952, "step": 64545 }, { "epoch": 0.7866866537481871, "grad_norm": 2.334446668624878, "learning_rate": 1.1227710070558051e-06, "loss": 0.7845, "step": 64550 }, { "epoch": 0.786747589972335, "grad_norm": 2.125075101852417, "learning_rate": 1.1224502886465683e-06, "loss": 0.8211, "step": 64555 }, { "epoch": 0.7868085261964828, "grad_norm": 2.1960742473602295, "learning_rate": 1.1221295702373316e-06, "loss": 0.8406, "step": 64560 }, { "epoch": 0.7868694624206306, "grad_norm": 1.7477657794952393, "learning_rate": 1.121808851828095e-06, "loss": 0.8618, "step": 64565 }, { "epoch": 0.7869303986447784, "grad_norm": 1.9563664197921753, "learning_rate": 1.1214881334188582e-06, "loss": 0.8606, "step": 64570 }, { "epoch": 0.7869913348689261, "grad_norm": 2.005453109741211, "learning_rate": 1.1211674150096217e-06, "loss": 0.8134, "step": 64575 }, { "epoch": 0.787052271093074, "grad_norm": 2.6300065517425537, "learning_rate": 1.120846696600385e-06, "loss": 0.8126, "step": 64580 }, { "epoch": 0.7871132073172218, "grad_norm": 1.7752201557159424, "learning_rate": 1.1205259781911483e-06, "loss": 0.8754, "step": 64585 }, { "epoch": 0.7871741435413696, "grad_norm": 1.9111568927764893, "learning_rate": 1.1202052597819116e-06, "loss": 0.7802, "step": 64590 }, { "epoch": 0.7872350797655174, "grad_norm": 1.842463493347168, "learning_rate": 1.1198845413726748e-06, "loss": 0.7922, "step": 64595 }, { "epoch": 0.7872960159896653, "grad_norm": 1.9495373964309692, "learning_rate": 1.119563822963438e-06, "loss": 0.7506, "step": 64600 }, { "epoch": 0.7873569522138131, "grad_norm": 1.9976954460144043, "learning_rate": 1.1192431045542015e-06, "loss": 0.8432, "step": 64605 }, { "epoch": 0.7874178884379608, "grad_norm": 1.7766305208206177, "learning_rate": 1.118922386144965e-06, "loss": 0.8309, "step": 64610 }, { "epoch": 0.7874788246621086, "grad_norm": 1.8525854349136353, "learning_rate": 1.1186016677357281e-06, "loss": 0.7494, "step": 64615 }, { "epoch": 0.7875397608862564, "grad_norm": 2.0558793544769287, "learning_rate": 1.1182809493264914e-06, "loss": 0.8045, "step": 64620 }, { "epoch": 0.7876006971104043, "grad_norm": 1.9135828018188477, "learning_rate": 1.1179602309172548e-06, "loss": 0.809, "step": 64625 }, { "epoch": 0.7876616333345521, "grad_norm": 2.009479284286499, "learning_rate": 1.117639512508018e-06, "loss": 0.7951, "step": 64630 }, { "epoch": 0.7877225695586999, "grad_norm": 2.041264057159424, "learning_rate": 1.1173187940987813e-06, "loss": 0.8016, "step": 64635 }, { "epoch": 0.7877835057828477, "grad_norm": 1.7555607557296753, "learning_rate": 1.1169980756895447e-06, "loss": 0.7951, "step": 64640 }, { "epoch": 0.7878444420069954, "grad_norm": 2.4601657390594482, "learning_rate": 1.116677357280308e-06, "loss": 0.8011, "step": 64645 }, { "epoch": 0.7879053782311433, "grad_norm": 1.7191028594970703, "learning_rate": 1.1163566388710714e-06, "loss": 0.8247, "step": 64650 }, { "epoch": 0.7879663144552911, "grad_norm": 1.7384356260299683, "learning_rate": 1.1160359204618346e-06, "loss": 0.7909, "step": 64655 }, { "epoch": 0.7880272506794389, "grad_norm": 2.5093371868133545, "learning_rate": 1.115715202052598e-06, "loss": 0.8494, "step": 64660 }, { "epoch": 0.7880881869035867, "grad_norm": 2.0065348148345947, "learning_rate": 1.1153944836433613e-06, "loss": 0.8439, "step": 64665 }, { "epoch": 0.7881491231277346, "grad_norm": 1.9676008224487305, "learning_rate": 1.1150737652341245e-06, "loss": 0.8171, "step": 64670 }, { "epoch": 0.7882100593518824, "grad_norm": 2.0774588584899902, "learning_rate": 1.1147530468248877e-06, "loss": 0.8338, "step": 64675 }, { "epoch": 0.7882709955760301, "grad_norm": 1.8653911352157593, "learning_rate": 1.1144323284156512e-06, "loss": 0.8026, "step": 64680 }, { "epoch": 0.7883319318001779, "grad_norm": 2.0471317768096924, "learning_rate": 1.1141116100064144e-06, "loss": 0.8301, "step": 64685 }, { "epoch": 0.7883928680243257, "grad_norm": 2.4981541633605957, "learning_rate": 1.1137908915971778e-06, "loss": 0.8173, "step": 64690 }, { "epoch": 0.7884538042484736, "grad_norm": 2.0651309490203857, "learning_rate": 1.113470173187941e-06, "loss": 0.8257, "step": 64695 }, { "epoch": 0.7885147404726214, "grad_norm": 2.149416446685791, "learning_rate": 1.1131494547787045e-06, "loss": 0.8585, "step": 64700 }, { "epoch": 0.7885756766967692, "grad_norm": 1.8958791494369507, "learning_rate": 1.1128287363694677e-06, "loss": 0.852, "step": 64705 }, { "epoch": 0.7886366129209169, "grad_norm": 1.6796280145645142, "learning_rate": 1.112508017960231e-06, "loss": 0.7859, "step": 64710 }, { "epoch": 0.7886975491450647, "grad_norm": 2.1528677940368652, "learning_rate": 1.1121872995509942e-06, "loss": 0.7988, "step": 64715 }, { "epoch": 0.7887584853692126, "grad_norm": 2.3246309757232666, "learning_rate": 1.1118665811417576e-06, "loss": 0.8246, "step": 64720 }, { "epoch": 0.7888194215933604, "grad_norm": 1.8154730796813965, "learning_rate": 1.1115458627325209e-06, "loss": 0.8526, "step": 64725 }, { "epoch": 0.7888803578175082, "grad_norm": 2.017794370651245, "learning_rate": 1.1112251443232843e-06, "loss": 0.83, "step": 64730 }, { "epoch": 0.788941294041656, "grad_norm": 1.9213647842407227, "learning_rate": 1.1109044259140475e-06, "loss": 0.8104, "step": 64735 }, { "epoch": 0.7890022302658038, "grad_norm": 2.174837589263916, "learning_rate": 1.110583707504811e-06, "loss": 0.8185, "step": 64740 }, { "epoch": 0.7890631664899516, "grad_norm": 1.7469477653503418, "learning_rate": 1.1102629890955742e-06, "loss": 0.781, "step": 64745 }, { "epoch": 0.7891241027140994, "grad_norm": 2.0797646045684814, "learning_rate": 1.1099422706863374e-06, "loss": 0.8003, "step": 64750 }, { "epoch": 0.7891850389382472, "grad_norm": 2.067803382873535, "learning_rate": 1.1096215522771007e-06, "loss": 0.8465, "step": 64755 }, { "epoch": 0.789245975162395, "grad_norm": 1.7880843877792358, "learning_rate": 1.109300833867864e-06, "loss": 0.756, "step": 64760 }, { "epoch": 0.7893069113865429, "grad_norm": 2.207454204559326, "learning_rate": 1.1089801154586275e-06, "loss": 0.8163, "step": 64765 }, { "epoch": 0.7893678476106907, "grad_norm": 1.9277775287628174, "learning_rate": 1.1086593970493908e-06, "loss": 0.8066, "step": 64770 }, { "epoch": 0.7894287838348385, "grad_norm": 1.6603816747665405, "learning_rate": 1.108338678640154e-06, "loss": 0.8312, "step": 64775 }, { "epoch": 0.7894897200589862, "grad_norm": 1.9228767156600952, "learning_rate": 1.1080179602309174e-06, "loss": 0.8286, "step": 64780 }, { "epoch": 0.789550656283134, "grad_norm": 2.060537338256836, "learning_rate": 1.1076972418216807e-06, "loss": 0.795, "step": 64785 }, { "epoch": 0.7896115925072819, "grad_norm": 1.6760327816009521, "learning_rate": 1.1073765234124439e-06, "loss": 0.794, "step": 64790 }, { "epoch": 0.7896725287314297, "grad_norm": 2.053701639175415, "learning_rate": 1.1070558050032071e-06, "loss": 0.8056, "step": 64795 }, { "epoch": 0.7897334649555775, "grad_norm": 2.1325840950012207, "learning_rate": 1.1067350865939706e-06, "loss": 0.8445, "step": 64800 }, { "epoch": 0.7897944011797253, "grad_norm": 2.215843439102173, "learning_rate": 1.106414368184734e-06, "loss": 0.755, "step": 64805 }, { "epoch": 0.7898553374038731, "grad_norm": 2.0097880363464355, "learning_rate": 1.1060936497754972e-06, "loss": 0.8285, "step": 64810 }, { "epoch": 0.7899162736280209, "grad_norm": 1.9164519309997559, "learning_rate": 1.1057729313662605e-06, "loss": 0.8193, "step": 64815 }, { "epoch": 0.7899772098521687, "grad_norm": 1.9247766733169556, "learning_rate": 1.105452212957024e-06, "loss": 0.7251, "step": 64820 }, { "epoch": 0.7900381460763165, "grad_norm": 2.166243553161621, "learning_rate": 1.1051314945477871e-06, "loss": 0.8343, "step": 64825 }, { "epoch": 0.7900990823004643, "grad_norm": 2.788058042526245, "learning_rate": 1.1048107761385504e-06, "loss": 0.8954, "step": 64830 }, { "epoch": 0.7901600185246122, "grad_norm": 2.0788064002990723, "learning_rate": 1.1044900577293138e-06, "loss": 0.8776, "step": 64835 }, { "epoch": 0.79022095474876, "grad_norm": 1.8701242208480835, "learning_rate": 1.104169339320077e-06, "loss": 0.7826, "step": 64840 }, { "epoch": 0.7902818909729078, "grad_norm": 1.9635568857192993, "learning_rate": 1.1038486209108405e-06, "loss": 0.7982, "step": 64845 }, { "epoch": 0.7903428271970555, "grad_norm": 1.8356788158416748, "learning_rate": 1.1035279025016037e-06, "loss": 0.8357, "step": 64850 }, { "epoch": 0.7904037634212033, "grad_norm": 1.9025455713272095, "learning_rate": 1.103207184092367e-06, "loss": 0.8149, "step": 64855 }, { "epoch": 0.7904646996453512, "grad_norm": 1.6640243530273438, "learning_rate": 1.1028864656831304e-06, "loss": 0.8179, "step": 64860 }, { "epoch": 0.790525635869499, "grad_norm": 1.8729122877120972, "learning_rate": 1.1025657472738936e-06, "loss": 0.8197, "step": 64865 }, { "epoch": 0.7905865720936468, "grad_norm": 1.8088817596435547, "learning_rate": 1.1022450288646568e-06, "loss": 0.8265, "step": 64870 }, { "epoch": 0.7906475083177946, "grad_norm": 1.454574704170227, "learning_rate": 1.1019243104554203e-06, "loss": 0.7872, "step": 64875 }, { "epoch": 0.7907084445419424, "grad_norm": 2.5546889305114746, "learning_rate": 1.1016035920461835e-06, "loss": 0.7944, "step": 64880 }, { "epoch": 0.7907693807660902, "grad_norm": 2.1763651371002197, "learning_rate": 1.101282873636947e-06, "loss": 0.8262, "step": 64885 }, { "epoch": 0.790830316990238, "grad_norm": 1.8742800951004028, "learning_rate": 1.1009621552277102e-06, "loss": 0.8431, "step": 64890 }, { "epoch": 0.7908912532143858, "grad_norm": 2.44311785697937, "learning_rate": 1.1006414368184734e-06, "loss": 0.8289, "step": 64895 }, { "epoch": 0.7909521894385336, "grad_norm": 1.5528651475906372, "learning_rate": 1.1003207184092368e-06, "loss": 0.8522, "step": 64900 }, { "epoch": 0.7910131256626814, "grad_norm": 1.8520196676254272, "learning_rate": 1.1e-06, "loss": 0.9115, "step": 64905 }, { "epoch": 0.7910740618868293, "grad_norm": 2.5059256553649902, "learning_rate": 1.0996792815907633e-06, "loss": 0.8238, "step": 64910 }, { "epoch": 0.7911349981109771, "grad_norm": 2.210878849029541, "learning_rate": 1.0993585631815267e-06, "loss": 0.8225, "step": 64915 }, { "epoch": 0.7911959343351248, "grad_norm": 2.648876428604126, "learning_rate": 1.09903784477229e-06, "loss": 0.8685, "step": 64920 }, { "epoch": 0.7912568705592726, "grad_norm": 2.8509409427642822, "learning_rate": 1.0987171263630534e-06, "loss": 0.7998, "step": 64925 }, { "epoch": 0.7913178067834205, "grad_norm": 2.1221988201141357, "learning_rate": 1.0983964079538166e-06, "loss": 0.8054, "step": 64930 }, { "epoch": 0.7913787430075683, "grad_norm": 1.905480980873108, "learning_rate": 1.0980756895445799e-06, "loss": 0.7713, "step": 64935 }, { "epoch": 0.7914396792317161, "grad_norm": 1.7377843856811523, "learning_rate": 1.0977549711353433e-06, "loss": 0.8418, "step": 64940 }, { "epoch": 0.7915006154558639, "grad_norm": 1.9582291841506958, "learning_rate": 1.0974342527261065e-06, "loss": 0.8792, "step": 64945 }, { "epoch": 0.7915615516800117, "grad_norm": 1.8693851232528687, "learning_rate": 1.0971135343168697e-06, "loss": 0.7981, "step": 64950 }, { "epoch": 0.7916224879041595, "grad_norm": 2.1761255264282227, "learning_rate": 1.0967928159076332e-06, "loss": 0.7864, "step": 64955 }, { "epoch": 0.7916834241283073, "grad_norm": 1.8253860473632812, "learning_rate": 1.0964720974983966e-06, "loss": 0.8546, "step": 64960 }, { "epoch": 0.7917443603524551, "grad_norm": 1.9077198505401611, "learning_rate": 1.0961513790891599e-06, "loss": 0.8503, "step": 64965 }, { "epoch": 0.7918052965766029, "grad_norm": 2.0790750980377197, "learning_rate": 1.095830660679923e-06, "loss": 0.7741, "step": 64970 }, { "epoch": 0.7918662328007507, "grad_norm": 1.82659912109375, "learning_rate": 1.0955099422706865e-06, "loss": 0.8014, "step": 64975 }, { "epoch": 0.7919271690248986, "grad_norm": 2.3173787593841553, "learning_rate": 1.0951892238614498e-06, "loss": 0.8303, "step": 64980 }, { "epoch": 0.7919881052490464, "grad_norm": 2.0644187927246094, "learning_rate": 1.094868505452213e-06, "loss": 0.7614, "step": 64985 }, { "epoch": 0.7920490414731941, "grad_norm": 1.950433373451233, "learning_rate": 1.0945477870429762e-06, "loss": 0.8302, "step": 64990 }, { "epoch": 0.7921099776973419, "grad_norm": 1.9352420568466187, "learning_rate": 1.0942270686337397e-06, "loss": 0.8635, "step": 64995 }, { "epoch": 0.7921709139214897, "grad_norm": 1.934477686882019, "learning_rate": 1.093906350224503e-06, "loss": 0.8115, "step": 65000 }, { "epoch": 0.7922318501456376, "grad_norm": 1.9129174947738647, "learning_rate": 1.0935856318152663e-06, "loss": 0.8214, "step": 65005 }, { "epoch": 0.7922927863697854, "grad_norm": 1.7862024307250977, "learning_rate": 1.0932649134060295e-06, "loss": 0.8052, "step": 65010 }, { "epoch": 0.7923537225939332, "grad_norm": 2.018629312515259, "learning_rate": 1.092944194996793e-06, "loss": 0.824, "step": 65015 }, { "epoch": 0.792414658818081, "grad_norm": 1.9028563499450684, "learning_rate": 1.0926234765875562e-06, "loss": 0.7913, "step": 65020 }, { "epoch": 0.7924755950422288, "grad_norm": 1.8441777229309082, "learning_rate": 1.0923027581783194e-06, "loss": 0.8129, "step": 65025 }, { "epoch": 0.7925365312663766, "grad_norm": 1.8804510831832886, "learning_rate": 1.0919820397690829e-06, "loss": 0.8091, "step": 65030 }, { "epoch": 0.7925974674905244, "grad_norm": 1.8395795822143555, "learning_rate": 1.0916613213598461e-06, "loss": 0.7192, "step": 65035 }, { "epoch": 0.7926584037146722, "grad_norm": 2.1915156841278076, "learning_rate": 1.0913406029506096e-06, "loss": 0.8096, "step": 65040 }, { "epoch": 0.79271933993882, "grad_norm": 2.011563301086426, "learning_rate": 1.0910198845413728e-06, "loss": 0.8417, "step": 65045 }, { "epoch": 0.7927802761629679, "grad_norm": 2.029846668243408, "learning_rate": 1.090699166132136e-06, "loss": 0.7679, "step": 65050 }, { "epoch": 0.7928412123871157, "grad_norm": 1.6237342357635498, "learning_rate": 1.0903784477228995e-06, "loss": 0.7993, "step": 65055 }, { "epoch": 0.7929021486112634, "grad_norm": 2.094360113143921, "learning_rate": 1.0900577293136627e-06, "loss": 0.8465, "step": 65060 }, { "epoch": 0.7929630848354112, "grad_norm": 1.770277976989746, "learning_rate": 1.089737010904426e-06, "loss": 0.7922, "step": 65065 }, { "epoch": 0.793024021059559, "grad_norm": 2.3163955211639404, "learning_rate": 1.0894162924951894e-06, "loss": 0.8307, "step": 65070 }, { "epoch": 0.7930849572837069, "grad_norm": 2.1333351135253906, "learning_rate": 1.0890955740859526e-06, "loss": 0.8934, "step": 65075 }, { "epoch": 0.7931458935078547, "grad_norm": 2.0725321769714355, "learning_rate": 1.088774855676716e-06, "loss": 0.8099, "step": 65080 }, { "epoch": 0.7932068297320025, "grad_norm": 2.00158429145813, "learning_rate": 1.0884541372674792e-06, "loss": 0.8459, "step": 65085 }, { "epoch": 0.7932677659561503, "grad_norm": 2.2183001041412354, "learning_rate": 1.0881334188582425e-06, "loss": 0.7591, "step": 65090 }, { "epoch": 0.793328702180298, "grad_norm": 1.8359147310256958, "learning_rate": 1.087812700449006e-06, "loss": 0.8211, "step": 65095 }, { "epoch": 0.7933896384044459, "grad_norm": 1.9145971536636353, "learning_rate": 1.0874919820397691e-06, "loss": 0.8523, "step": 65100 }, { "epoch": 0.7934505746285937, "grad_norm": 1.8659067153930664, "learning_rate": 1.0871712636305324e-06, "loss": 0.8977, "step": 65105 }, { "epoch": 0.7935115108527415, "grad_norm": 2.1143269538879395, "learning_rate": 1.0868505452212958e-06, "loss": 0.8302, "step": 65110 }, { "epoch": 0.7935724470768893, "grad_norm": 1.77266263961792, "learning_rate": 1.0865298268120593e-06, "loss": 0.7688, "step": 65115 }, { "epoch": 0.7936333833010372, "grad_norm": 2.196814775466919, "learning_rate": 1.0862091084028225e-06, "loss": 0.8202, "step": 65120 }, { "epoch": 0.793694319525185, "grad_norm": 2.10256290435791, "learning_rate": 1.0858883899935857e-06, "loss": 0.8929, "step": 65125 }, { "epoch": 0.7937552557493327, "grad_norm": 2.1624648571014404, "learning_rate": 1.085567671584349e-06, "loss": 0.8581, "step": 65130 }, { "epoch": 0.7938161919734805, "grad_norm": 2.0470972061157227, "learning_rate": 1.0852469531751124e-06, "loss": 0.7947, "step": 65135 }, { "epoch": 0.7938771281976283, "grad_norm": 2.1472508907318115, "learning_rate": 1.0849262347658756e-06, "loss": 0.8544, "step": 65140 }, { "epoch": 0.7939380644217762, "grad_norm": 2.070376396179199, "learning_rate": 1.0846055163566388e-06, "loss": 0.8203, "step": 65145 }, { "epoch": 0.793999000645924, "grad_norm": 1.9615378379821777, "learning_rate": 1.0842847979474023e-06, "loss": 0.8806, "step": 65150 }, { "epoch": 0.7940599368700718, "grad_norm": 1.995200753211975, "learning_rate": 1.0839640795381657e-06, "loss": 0.6835, "step": 65155 }, { "epoch": 0.7941208730942196, "grad_norm": 2.0382778644561768, "learning_rate": 1.083643361128929e-06, "loss": 0.8472, "step": 65160 }, { "epoch": 0.7941818093183673, "grad_norm": 1.8323472738265991, "learning_rate": 1.0833226427196922e-06, "loss": 0.8507, "step": 65165 }, { "epoch": 0.7942427455425152, "grad_norm": 1.9759644269943237, "learning_rate": 1.0830019243104554e-06, "loss": 0.7351, "step": 65170 }, { "epoch": 0.794303681766663, "grad_norm": 1.984723687171936, "learning_rate": 1.0826812059012188e-06, "loss": 0.7356, "step": 65175 }, { "epoch": 0.7943646179908108, "grad_norm": 2.1172971725463867, "learning_rate": 1.082360487491982e-06, "loss": 0.7616, "step": 65180 }, { "epoch": 0.7944255542149586, "grad_norm": 2.004124402999878, "learning_rate": 1.0820397690827455e-06, "loss": 0.7822, "step": 65185 }, { "epoch": 0.7944864904391065, "grad_norm": 2.1628682613372803, "learning_rate": 1.0817190506735087e-06, "loss": 0.8589, "step": 65190 }, { "epoch": 0.7945474266632543, "grad_norm": 1.8689030408859253, "learning_rate": 1.0813983322642722e-06, "loss": 0.824, "step": 65195 }, { "epoch": 0.794608362887402, "grad_norm": 1.947665810585022, "learning_rate": 1.0810776138550354e-06, "loss": 0.7679, "step": 65200 }, { "epoch": 0.7946692991115498, "grad_norm": 1.751309871673584, "learning_rate": 1.0807568954457986e-06, "loss": 0.845, "step": 65205 }, { "epoch": 0.7947302353356976, "grad_norm": 2.1664037704467773, "learning_rate": 1.0804361770365619e-06, "loss": 0.764, "step": 65210 }, { "epoch": 0.7947911715598455, "grad_norm": 1.9067997932434082, "learning_rate": 1.0801154586273253e-06, "loss": 0.8352, "step": 65215 }, { "epoch": 0.7948521077839933, "grad_norm": 2.546760320663452, "learning_rate": 1.0797947402180885e-06, "loss": 0.7642, "step": 65220 }, { "epoch": 0.7949130440081411, "grad_norm": 2.4164533615112305, "learning_rate": 1.079474021808852e-06, "loss": 0.8579, "step": 65225 }, { "epoch": 0.7949739802322889, "grad_norm": 2.3248300552368164, "learning_rate": 1.0791533033996152e-06, "loss": 0.8815, "step": 65230 }, { "epoch": 0.7950349164564366, "grad_norm": 1.8723514080047607, "learning_rate": 1.0788325849903786e-06, "loss": 0.8623, "step": 65235 }, { "epoch": 0.7950958526805845, "grad_norm": 2.046613931655884, "learning_rate": 1.0785118665811419e-06, "loss": 0.781, "step": 65240 }, { "epoch": 0.7951567889047323, "grad_norm": 2.0165634155273438, "learning_rate": 1.078191148171905e-06, "loss": 0.8115, "step": 65245 }, { "epoch": 0.7952177251288801, "grad_norm": 1.9992516040802002, "learning_rate": 1.0778704297626683e-06, "loss": 0.8005, "step": 65250 }, { "epoch": 0.7952786613530279, "grad_norm": 1.8587723970413208, "learning_rate": 1.0775497113534318e-06, "loss": 0.859, "step": 65255 }, { "epoch": 0.7953395975771758, "grad_norm": 2.0546481609344482, "learning_rate": 1.077228992944195e-06, "loss": 0.7593, "step": 65260 }, { "epoch": 0.7954005338013236, "grad_norm": 2.3748228549957275, "learning_rate": 1.0769082745349584e-06, "loss": 0.8491, "step": 65265 }, { "epoch": 0.7954614700254713, "grad_norm": 1.926317572593689, "learning_rate": 1.0765875561257217e-06, "loss": 0.7767, "step": 65270 }, { "epoch": 0.7955224062496191, "grad_norm": 1.9870637655258179, "learning_rate": 1.0762668377164851e-06, "loss": 0.8612, "step": 65275 }, { "epoch": 0.7955833424737669, "grad_norm": 1.8430901765823364, "learning_rate": 1.0759461193072483e-06, "loss": 0.7993, "step": 65280 }, { "epoch": 0.7956442786979148, "grad_norm": 1.9654148817062378, "learning_rate": 1.0756254008980116e-06, "loss": 0.7786, "step": 65285 }, { "epoch": 0.7957052149220626, "grad_norm": 1.9184874296188354, "learning_rate": 1.075304682488775e-06, "loss": 0.8274, "step": 65290 }, { "epoch": 0.7957661511462104, "grad_norm": 2.358048439025879, "learning_rate": 1.0749839640795382e-06, "loss": 0.825, "step": 65295 }, { "epoch": 0.7958270873703582, "grad_norm": 2.2287981510162354, "learning_rate": 1.0746632456703015e-06, "loss": 0.8064, "step": 65300 }, { "epoch": 0.7958880235945059, "grad_norm": 1.9431439638137817, "learning_rate": 1.074342527261065e-06, "loss": 0.8171, "step": 65305 }, { "epoch": 0.7959489598186538, "grad_norm": 2.002580165863037, "learning_rate": 1.0740218088518283e-06, "loss": 0.768, "step": 65310 }, { "epoch": 0.7960098960428016, "grad_norm": 2.108625650405884, "learning_rate": 1.0737010904425916e-06, "loss": 0.7856, "step": 65315 }, { "epoch": 0.7960708322669494, "grad_norm": 1.90618097782135, "learning_rate": 1.0733803720333548e-06, "loss": 0.7902, "step": 65320 }, { "epoch": 0.7961317684910972, "grad_norm": 1.8116780519485474, "learning_rate": 1.073059653624118e-06, "loss": 0.8085, "step": 65325 }, { "epoch": 0.7961927047152451, "grad_norm": 2.0013914108276367, "learning_rate": 1.0727389352148815e-06, "loss": 0.7968, "step": 65330 }, { "epoch": 0.7962536409393929, "grad_norm": 2.0860655307769775, "learning_rate": 1.0724182168056447e-06, "loss": 0.7855, "step": 65335 }, { "epoch": 0.7963145771635406, "grad_norm": 1.84880793094635, "learning_rate": 1.072097498396408e-06, "loss": 0.8087, "step": 65340 }, { "epoch": 0.7963755133876884, "grad_norm": 2.4019742012023926, "learning_rate": 1.0717767799871714e-06, "loss": 0.8114, "step": 65345 }, { "epoch": 0.7964364496118362, "grad_norm": 1.615756630897522, "learning_rate": 1.0714560615779348e-06, "loss": 0.8063, "step": 65350 }, { "epoch": 0.7964973858359841, "grad_norm": 2.63836932182312, "learning_rate": 1.071135343168698e-06, "loss": 0.874, "step": 65355 }, { "epoch": 0.7965583220601319, "grad_norm": 1.9668620824813843, "learning_rate": 1.0708146247594613e-06, "loss": 0.8728, "step": 65360 }, { "epoch": 0.7966192582842797, "grad_norm": 1.795145869255066, "learning_rate": 1.0704939063502245e-06, "loss": 0.8905, "step": 65365 }, { "epoch": 0.7966801945084275, "grad_norm": 2.0446064472198486, "learning_rate": 1.070173187940988e-06, "loss": 0.8213, "step": 65370 }, { "epoch": 0.7967411307325752, "grad_norm": 2.0657362937927246, "learning_rate": 1.0698524695317512e-06, "loss": 0.7704, "step": 65375 }, { "epoch": 0.7968020669567231, "grad_norm": 1.9616385698318481, "learning_rate": 1.0695317511225146e-06, "loss": 0.757, "step": 65380 }, { "epoch": 0.7968630031808709, "grad_norm": 2.011695146560669, "learning_rate": 1.0692110327132778e-06, "loss": 0.7739, "step": 65385 }, { "epoch": 0.7969239394050187, "grad_norm": 1.7741725444793701, "learning_rate": 1.0688903143040413e-06, "loss": 0.7528, "step": 65390 }, { "epoch": 0.7969848756291665, "grad_norm": 2.0400078296661377, "learning_rate": 1.0685695958948045e-06, "loss": 0.8047, "step": 65395 }, { "epoch": 0.7970458118533144, "grad_norm": 2.251674175262451, "learning_rate": 1.0682488774855677e-06, "loss": 0.8092, "step": 65400 }, { "epoch": 0.7971067480774622, "grad_norm": 1.768898844718933, "learning_rate": 1.067928159076331e-06, "loss": 0.8129, "step": 65405 }, { "epoch": 0.7971676843016099, "grad_norm": 1.8433881998062134, "learning_rate": 1.0676074406670944e-06, "loss": 0.8089, "step": 65410 }, { "epoch": 0.7972286205257577, "grad_norm": 1.7673680782318115, "learning_rate": 1.0672867222578576e-06, "loss": 0.8216, "step": 65415 }, { "epoch": 0.7972895567499055, "grad_norm": 2.411195993423462, "learning_rate": 1.066966003848621e-06, "loss": 0.7931, "step": 65420 }, { "epoch": 0.7973504929740534, "grad_norm": 2.1894853115081787, "learning_rate": 1.0666452854393843e-06, "loss": 0.7307, "step": 65425 }, { "epoch": 0.7974114291982012, "grad_norm": 1.844909906387329, "learning_rate": 1.0663245670301477e-06, "loss": 0.7753, "step": 65430 }, { "epoch": 0.797472365422349, "grad_norm": 1.875116229057312, "learning_rate": 1.066003848620911e-06, "loss": 0.8386, "step": 65435 }, { "epoch": 0.7975333016464968, "grad_norm": 1.9756253957748413, "learning_rate": 1.0656831302116742e-06, "loss": 0.8018, "step": 65440 }, { "epoch": 0.7975942378706445, "grad_norm": 1.8914512395858765, "learning_rate": 1.0653624118024374e-06, "loss": 0.8596, "step": 65445 }, { "epoch": 0.7976551740947924, "grad_norm": 1.8669692277908325, "learning_rate": 1.0650416933932009e-06, "loss": 0.8679, "step": 65450 }, { "epoch": 0.7977161103189402, "grad_norm": 1.8363336324691772, "learning_rate": 1.064720974983964e-06, "loss": 0.7723, "step": 65455 }, { "epoch": 0.797777046543088, "grad_norm": 2.1282975673675537, "learning_rate": 1.0644002565747275e-06, "loss": 0.8309, "step": 65460 }, { "epoch": 0.7978379827672358, "grad_norm": 2.008728265762329, "learning_rate": 1.0640795381654908e-06, "loss": 0.8198, "step": 65465 }, { "epoch": 0.7978989189913837, "grad_norm": 2.226874828338623, "learning_rate": 1.0637588197562542e-06, "loss": 0.8286, "step": 65470 }, { "epoch": 0.7979598552155315, "grad_norm": 2.5360119342803955, "learning_rate": 1.0634381013470174e-06, "loss": 0.7542, "step": 65475 }, { "epoch": 0.7980207914396792, "grad_norm": 2.1018552780151367, "learning_rate": 1.0631173829377807e-06, "loss": 0.7388, "step": 65480 }, { "epoch": 0.798081727663827, "grad_norm": 2.0583038330078125, "learning_rate": 1.0627966645285439e-06, "loss": 0.7404, "step": 65485 }, { "epoch": 0.7981426638879748, "grad_norm": 1.905556321144104, "learning_rate": 1.0624759461193073e-06, "loss": 0.7608, "step": 65490 }, { "epoch": 0.7982036001121227, "grad_norm": 1.9789822101593018, "learning_rate": 1.0621552277100706e-06, "loss": 0.803, "step": 65495 }, { "epoch": 0.7982645363362705, "grad_norm": 1.5858290195465088, "learning_rate": 1.061834509300834e-06, "loss": 0.806, "step": 65500 }, { "epoch": 0.7983254725604183, "grad_norm": 2.2718429565429688, "learning_rate": 1.0615137908915972e-06, "loss": 0.8468, "step": 65505 }, { "epoch": 0.7983864087845661, "grad_norm": 1.9393495321273804, "learning_rate": 1.0611930724823607e-06, "loss": 0.837, "step": 65510 }, { "epoch": 0.7984473450087138, "grad_norm": 1.9020534753799438, "learning_rate": 1.0608723540731239e-06, "loss": 0.8386, "step": 65515 }, { "epoch": 0.7985082812328617, "grad_norm": 2.3895630836486816, "learning_rate": 1.0605516356638871e-06, "loss": 0.8578, "step": 65520 }, { "epoch": 0.7985692174570095, "grad_norm": 2.114956855773926, "learning_rate": 1.0602309172546503e-06, "loss": 0.7849, "step": 65525 }, { "epoch": 0.7986301536811573, "grad_norm": 1.75348699092865, "learning_rate": 1.0599101988454138e-06, "loss": 0.8363, "step": 65530 }, { "epoch": 0.7986910899053051, "grad_norm": 2.2753896713256836, "learning_rate": 1.0595894804361772e-06, "loss": 0.8677, "step": 65535 }, { "epoch": 0.798752026129453, "grad_norm": 1.9089140892028809, "learning_rate": 1.0592687620269405e-06, "loss": 0.8534, "step": 65540 }, { "epoch": 0.7988129623536008, "grad_norm": 1.7734887599945068, "learning_rate": 1.0589480436177037e-06, "loss": 0.7328, "step": 65545 }, { "epoch": 0.7988738985777485, "grad_norm": 2.039022445678711, "learning_rate": 1.0586273252084671e-06, "loss": 0.8832, "step": 65550 }, { "epoch": 0.7989348348018963, "grad_norm": 1.7766677141189575, "learning_rate": 1.0583066067992304e-06, "loss": 0.7843, "step": 65555 }, { "epoch": 0.7989957710260441, "grad_norm": 1.8777276277542114, "learning_rate": 1.0579858883899936e-06, "loss": 0.8256, "step": 65560 }, { "epoch": 0.799056707250192, "grad_norm": 1.898122787475586, "learning_rate": 1.057665169980757e-06, "loss": 0.7185, "step": 65565 }, { "epoch": 0.7991176434743398, "grad_norm": 2.2875328063964844, "learning_rate": 1.0573444515715203e-06, "loss": 0.8567, "step": 65570 }, { "epoch": 0.7991785796984876, "grad_norm": 1.9709652662277222, "learning_rate": 1.0570237331622837e-06, "loss": 0.7952, "step": 65575 }, { "epoch": 0.7992395159226354, "grad_norm": 2.059861660003662, "learning_rate": 1.056703014753047e-06, "loss": 0.8559, "step": 65580 }, { "epoch": 0.7993004521467831, "grad_norm": 2.023508310317993, "learning_rate": 1.0563822963438104e-06, "loss": 0.8097, "step": 65585 }, { "epoch": 0.799361388370931, "grad_norm": 1.9720215797424316, "learning_rate": 1.0560615779345736e-06, "loss": 0.7557, "step": 65590 }, { "epoch": 0.7994223245950788, "grad_norm": 1.7367876768112183, "learning_rate": 1.0557408595253368e-06, "loss": 0.8365, "step": 65595 }, { "epoch": 0.7994832608192266, "grad_norm": 1.822325587272644, "learning_rate": 1.0554201411161e-06, "loss": 0.7931, "step": 65600 }, { "epoch": 0.7995441970433744, "grad_norm": 1.7748481035232544, "learning_rate": 1.0550994227068635e-06, "loss": 0.7319, "step": 65605 }, { "epoch": 0.7996051332675223, "grad_norm": 1.7021514177322388, "learning_rate": 1.0547787042976267e-06, "loss": 0.8166, "step": 65610 }, { "epoch": 0.7996660694916701, "grad_norm": 2.21984601020813, "learning_rate": 1.0544579858883902e-06, "loss": 0.7433, "step": 65615 }, { "epoch": 0.7997270057158178, "grad_norm": 1.875167727470398, "learning_rate": 1.0541372674791534e-06, "loss": 0.8219, "step": 65620 }, { "epoch": 0.7997879419399656, "grad_norm": 1.695925235748291, "learning_rate": 1.0538165490699168e-06, "loss": 0.7598, "step": 65625 }, { "epoch": 0.7998488781641134, "grad_norm": 1.9801315069198608, "learning_rate": 1.05349583066068e-06, "loss": 0.8814, "step": 65630 }, { "epoch": 0.7999098143882613, "grad_norm": 2.0156121253967285, "learning_rate": 1.0531751122514433e-06, "loss": 0.8657, "step": 65635 }, { "epoch": 0.7999707506124091, "grad_norm": 1.9165376424789429, "learning_rate": 1.0528543938422065e-06, "loss": 0.8089, "step": 65640 }, { "epoch": 0.8000316868365569, "grad_norm": 1.99141526222229, "learning_rate": 1.05253367543297e-06, "loss": 0.7903, "step": 65645 }, { "epoch": 0.8000926230607047, "grad_norm": 2.10347056388855, "learning_rate": 1.0522129570237332e-06, "loss": 0.8578, "step": 65650 }, { "epoch": 0.8001535592848524, "grad_norm": 2.004131317138672, "learning_rate": 1.0518922386144966e-06, "loss": 0.841, "step": 65655 }, { "epoch": 0.8002144955090003, "grad_norm": 1.6484471559524536, "learning_rate": 1.0515715202052598e-06, "loss": 0.7657, "step": 65660 }, { "epoch": 0.8002754317331481, "grad_norm": 1.8086488246917725, "learning_rate": 1.0512508017960233e-06, "loss": 0.7673, "step": 65665 }, { "epoch": 0.8003363679572959, "grad_norm": 1.8405029773712158, "learning_rate": 1.0509300833867865e-06, "loss": 0.7734, "step": 65670 }, { "epoch": 0.8003973041814437, "grad_norm": 2.572664260864258, "learning_rate": 1.0506093649775497e-06, "loss": 0.787, "step": 65675 }, { "epoch": 0.8004582404055915, "grad_norm": 1.8880553245544434, "learning_rate": 1.050288646568313e-06, "loss": 0.8334, "step": 65680 }, { "epoch": 0.8005191766297393, "grad_norm": 2.193917751312256, "learning_rate": 1.0499679281590764e-06, "loss": 0.8657, "step": 65685 }, { "epoch": 0.8005801128538871, "grad_norm": 1.945781946182251, "learning_rate": 1.0496472097498396e-06, "loss": 0.8282, "step": 65690 }, { "epoch": 0.8006410490780349, "grad_norm": 2.173558235168457, "learning_rate": 1.049326491340603e-06, "loss": 0.7584, "step": 65695 }, { "epoch": 0.8007019853021827, "grad_norm": 2.1697139739990234, "learning_rate": 1.0490057729313663e-06, "loss": 0.8298, "step": 65700 }, { "epoch": 0.8007629215263306, "grad_norm": 2.1254115104675293, "learning_rate": 1.0486850545221298e-06, "loss": 0.7881, "step": 65705 }, { "epoch": 0.8008238577504784, "grad_norm": 1.9404100179672241, "learning_rate": 1.048364336112893e-06, "loss": 0.8169, "step": 65710 }, { "epoch": 0.8008847939746262, "grad_norm": 1.9104297161102295, "learning_rate": 1.0480436177036562e-06, "loss": 0.7881, "step": 65715 }, { "epoch": 0.8009457301987739, "grad_norm": 1.718923568725586, "learning_rate": 1.0477228992944194e-06, "loss": 0.7901, "step": 65720 }, { "epoch": 0.8010066664229217, "grad_norm": 1.673364520072937, "learning_rate": 1.0474021808851829e-06, "loss": 0.7861, "step": 65725 }, { "epoch": 0.8010676026470696, "grad_norm": 2.2682602405548096, "learning_rate": 1.0470814624759463e-06, "loss": 0.8373, "step": 65730 }, { "epoch": 0.8011285388712174, "grad_norm": 1.9318721294403076, "learning_rate": 1.0467607440667095e-06, "loss": 0.8237, "step": 65735 }, { "epoch": 0.8011894750953652, "grad_norm": 1.9338080883026123, "learning_rate": 1.0464400256574728e-06, "loss": 0.7665, "step": 65740 }, { "epoch": 0.801250411319513, "grad_norm": 2.242845058441162, "learning_rate": 1.0461193072482362e-06, "loss": 0.8584, "step": 65745 }, { "epoch": 0.8013113475436608, "grad_norm": 2.6471006870269775, "learning_rate": 1.0457985888389994e-06, "loss": 0.7442, "step": 65750 }, { "epoch": 0.8013722837678086, "grad_norm": 1.9126269817352295, "learning_rate": 1.0454778704297627e-06, "loss": 0.7772, "step": 65755 }, { "epoch": 0.8014332199919564, "grad_norm": 1.9686223268508911, "learning_rate": 1.0451571520205261e-06, "loss": 0.8189, "step": 65760 }, { "epoch": 0.8014941562161042, "grad_norm": 1.8707046508789062, "learning_rate": 1.0448364336112893e-06, "loss": 0.7627, "step": 65765 }, { "epoch": 0.801555092440252, "grad_norm": 1.5743006467819214, "learning_rate": 1.0445157152020528e-06, "loss": 0.7341, "step": 65770 }, { "epoch": 0.8016160286643998, "grad_norm": 2.1164236068725586, "learning_rate": 1.044194996792816e-06, "loss": 0.801, "step": 65775 }, { "epoch": 0.8016769648885477, "grad_norm": 2.19966459274292, "learning_rate": 1.0438742783835792e-06, "loss": 0.8352, "step": 65780 }, { "epoch": 0.8017379011126955, "grad_norm": 2.1263463497161865, "learning_rate": 1.0435535599743427e-06, "loss": 0.8727, "step": 65785 }, { "epoch": 0.8017988373368432, "grad_norm": 1.8058280944824219, "learning_rate": 1.043232841565106e-06, "loss": 0.7284, "step": 65790 }, { "epoch": 0.801859773560991, "grad_norm": 1.9653379917144775, "learning_rate": 1.0429121231558691e-06, "loss": 0.7506, "step": 65795 }, { "epoch": 0.8019207097851389, "grad_norm": 1.9960857629776, "learning_rate": 1.0425914047466326e-06, "loss": 0.7871, "step": 65800 }, { "epoch": 0.8019816460092867, "grad_norm": 2.0516862869262695, "learning_rate": 1.0422706863373958e-06, "loss": 0.7615, "step": 65805 }, { "epoch": 0.8020425822334345, "grad_norm": 1.6943297386169434, "learning_rate": 1.0419499679281592e-06, "loss": 0.8175, "step": 65810 }, { "epoch": 0.8021035184575823, "grad_norm": 1.6813130378723145, "learning_rate": 1.0416292495189225e-06, "loss": 0.8239, "step": 65815 }, { "epoch": 0.8021644546817301, "grad_norm": 1.853713870048523, "learning_rate": 1.0413085311096857e-06, "loss": 0.8607, "step": 65820 }, { "epoch": 0.8022253909058779, "grad_norm": 2.010676145553589, "learning_rate": 1.0409878127004491e-06, "loss": 0.7878, "step": 65825 }, { "epoch": 0.8022863271300257, "grad_norm": 2.0943076610565186, "learning_rate": 1.0406670942912124e-06, "loss": 0.8025, "step": 65830 }, { "epoch": 0.8023472633541735, "grad_norm": 1.894837498664856, "learning_rate": 1.0403463758819756e-06, "loss": 0.8106, "step": 65835 }, { "epoch": 0.8024081995783213, "grad_norm": 2.0302186012268066, "learning_rate": 1.040025657472739e-06, "loss": 0.8062, "step": 65840 }, { "epoch": 0.8024691358024691, "grad_norm": 2.104740619659424, "learning_rate": 1.0397049390635023e-06, "loss": 0.7769, "step": 65845 }, { "epoch": 0.802530072026617, "grad_norm": 2.00239896774292, "learning_rate": 1.0393842206542657e-06, "loss": 0.8995, "step": 65850 }, { "epoch": 0.8025910082507648, "grad_norm": 1.970786213874817, "learning_rate": 1.039063502245029e-06, "loss": 0.8029, "step": 65855 }, { "epoch": 0.8026519444749125, "grad_norm": 1.858392357826233, "learning_rate": 1.0387427838357924e-06, "loss": 0.841, "step": 65860 }, { "epoch": 0.8027128806990603, "grad_norm": 2.031808376312256, "learning_rate": 1.0384220654265556e-06, "loss": 0.7913, "step": 65865 }, { "epoch": 0.8027738169232081, "grad_norm": 2.3654215335845947, "learning_rate": 1.0381013470173188e-06, "loss": 0.7528, "step": 65870 }, { "epoch": 0.802834753147356, "grad_norm": 2.224334716796875, "learning_rate": 1.037780628608082e-06, "loss": 0.9038, "step": 65875 }, { "epoch": 0.8028956893715038, "grad_norm": 2.080869197845459, "learning_rate": 1.0374599101988455e-06, "loss": 0.8456, "step": 65880 }, { "epoch": 0.8029566255956516, "grad_norm": 1.84147310256958, "learning_rate": 1.037139191789609e-06, "loss": 0.7369, "step": 65885 }, { "epoch": 0.8030175618197994, "grad_norm": 1.9524534940719604, "learning_rate": 1.0368184733803722e-06, "loss": 0.7929, "step": 65890 }, { "epoch": 0.8030784980439472, "grad_norm": 1.9603817462921143, "learning_rate": 1.0364977549711354e-06, "loss": 0.8097, "step": 65895 }, { "epoch": 0.803139434268095, "grad_norm": 2.0731937885284424, "learning_rate": 1.0361770365618988e-06, "loss": 0.7933, "step": 65900 }, { "epoch": 0.8032003704922428, "grad_norm": 1.7635496854782104, "learning_rate": 1.035856318152662e-06, "loss": 0.7881, "step": 65905 }, { "epoch": 0.8032613067163906, "grad_norm": 2.1041481494903564, "learning_rate": 1.0355355997434253e-06, "loss": 0.8112, "step": 65910 }, { "epoch": 0.8033222429405384, "grad_norm": 1.7460500001907349, "learning_rate": 1.0352148813341885e-06, "loss": 0.758, "step": 65915 }, { "epoch": 0.8033831791646863, "grad_norm": 1.7731903791427612, "learning_rate": 1.034894162924952e-06, "loss": 0.8174, "step": 65920 }, { "epoch": 0.8034441153888341, "grad_norm": 1.9409352540969849, "learning_rate": 1.0345734445157154e-06, "loss": 0.7975, "step": 65925 }, { "epoch": 0.8035050516129818, "grad_norm": 1.918778657913208, "learning_rate": 1.0342527261064786e-06, "loss": 0.7542, "step": 65930 }, { "epoch": 0.8035659878371296, "grad_norm": 2.1906538009643555, "learning_rate": 1.0339320076972419e-06, "loss": 0.8449, "step": 65935 }, { "epoch": 0.8036269240612774, "grad_norm": 2.0943288803100586, "learning_rate": 1.0336112892880053e-06, "loss": 0.844, "step": 65940 }, { "epoch": 0.8036878602854253, "grad_norm": 1.7711846828460693, "learning_rate": 1.0332905708787685e-06, "loss": 0.8032, "step": 65945 }, { "epoch": 0.8037487965095731, "grad_norm": 1.8599461317062378, "learning_rate": 1.0329698524695318e-06, "loss": 0.8641, "step": 65950 }, { "epoch": 0.8038097327337209, "grad_norm": 2.093165636062622, "learning_rate": 1.0326491340602952e-06, "loss": 0.8553, "step": 65955 }, { "epoch": 0.8038706689578687, "grad_norm": 1.843029260635376, "learning_rate": 1.0323284156510584e-06, "loss": 0.764, "step": 65960 }, { "epoch": 0.8039316051820165, "grad_norm": 1.7922782897949219, "learning_rate": 1.0320076972418219e-06, "loss": 0.8226, "step": 65965 }, { "epoch": 0.8039925414061643, "grad_norm": 1.8477427959442139, "learning_rate": 1.031686978832585e-06, "loss": 0.716, "step": 65970 }, { "epoch": 0.8040534776303121, "grad_norm": 1.9196960926055908, "learning_rate": 1.0313662604233483e-06, "loss": 0.815, "step": 65975 }, { "epoch": 0.8041144138544599, "grad_norm": 1.7216994762420654, "learning_rate": 1.0310455420141118e-06, "loss": 0.8275, "step": 65980 }, { "epoch": 0.8041753500786077, "grad_norm": 2.274719476699829, "learning_rate": 1.030724823604875e-06, "loss": 0.6897, "step": 65985 }, { "epoch": 0.8042362863027556, "grad_norm": 1.944791555404663, "learning_rate": 1.0304041051956382e-06, "loss": 0.8295, "step": 65990 }, { "epoch": 0.8042972225269034, "grad_norm": 2.052293062210083, "learning_rate": 1.0300833867864017e-06, "loss": 0.8187, "step": 65995 }, { "epoch": 0.8043581587510511, "grad_norm": 1.7254644632339478, "learning_rate": 1.0297626683771649e-06, "loss": 0.8097, "step": 66000 }, { "epoch": 0.8044190949751989, "grad_norm": 2.0682106018066406, "learning_rate": 1.0294419499679283e-06, "loss": 0.7935, "step": 66005 }, { "epoch": 0.8044800311993467, "grad_norm": 2.510010004043579, "learning_rate": 1.0291212315586916e-06, "loss": 0.8378, "step": 66010 }, { "epoch": 0.8045409674234946, "grad_norm": 1.7900731563568115, "learning_rate": 1.0288005131494548e-06, "loss": 0.8459, "step": 66015 }, { "epoch": 0.8046019036476424, "grad_norm": 1.671764850616455, "learning_rate": 1.0284797947402182e-06, "loss": 0.812, "step": 66020 }, { "epoch": 0.8046628398717902, "grad_norm": 1.7322282791137695, "learning_rate": 1.0281590763309815e-06, "loss": 0.8661, "step": 66025 }, { "epoch": 0.804723776095938, "grad_norm": 1.923895001411438, "learning_rate": 1.0278383579217447e-06, "loss": 0.6887, "step": 66030 }, { "epoch": 0.8047847123200857, "grad_norm": 1.958532691001892, "learning_rate": 1.0275176395125081e-06, "loss": 0.7807, "step": 66035 }, { "epoch": 0.8048456485442336, "grad_norm": 1.6452105045318604, "learning_rate": 1.0271969211032714e-06, "loss": 0.8363, "step": 66040 }, { "epoch": 0.8049065847683814, "grad_norm": 1.7205305099487305, "learning_rate": 1.0268762026940348e-06, "loss": 0.7418, "step": 66045 }, { "epoch": 0.8049675209925292, "grad_norm": 1.9544258117675781, "learning_rate": 1.026555484284798e-06, "loss": 0.834, "step": 66050 }, { "epoch": 0.805028457216677, "grad_norm": 2.0235109329223633, "learning_rate": 1.0262347658755613e-06, "loss": 0.7781, "step": 66055 }, { "epoch": 0.8050893934408249, "grad_norm": 2.0362730026245117, "learning_rate": 1.0259140474663247e-06, "loss": 0.8374, "step": 66060 }, { "epoch": 0.8051503296649727, "grad_norm": 1.7723299264907837, "learning_rate": 1.025593329057088e-06, "loss": 0.7954, "step": 66065 }, { "epoch": 0.8052112658891204, "grad_norm": 1.7714037895202637, "learning_rate": 1.0252726106478512e-06, "loss": 0.7688, "step": 66070 }, { "epoch": 0.8052722021132682, "grad_norm": 1.7768925428390503, "learning_rate": 1.0249518922386146e-06, "loss": 0.8645, "step": 66075 }, { "epoch": 0.805333138337416, "grad_norm": 1.7681597471237183, "learning_rate": 1.024631173829378e-06, "loss": 0.7428, "step": 66080 }, { "epoch": 0.8053940745615639, "grad_norm": 2.3286361694335938, "learning_rate": 1.0243104554201413e-06, "loss": 0.8368, "step": 66085 }, { "epoch": 0.8054550107857117, "grad_norm": 1.7191678285598755, "learning_rate": 1.0239897370109045e-06, "loss": 0.7962, "step": 66090 }, { "epoch": 0.8055159470098595, "grad_norm": 2.6295723915100098, "learning_rate": 1.0236690186016677e-06, "loss": 0.8739, "step": 66095 }, { "epoch": 0.8055768832340073, "grad_norm": 2.073655605316162, "learning_rate": 1.0233483001924312e-06, "loss": 0.7782, "step": 66100 }, { "epoch": 0.805637819458155, "grad_norm": 1.6346195936203003, "learning_rate": 1.0230275817831944e-06, "loss": 0.7923, "step": 66105 }, { "epoch": 0.8056987556823029, "grad_norm": 1.9513071775436401, "learning_rate": 1.0227068633739576e-06, "loss": 0.7802, "step": 66110 }, { "epoch": 0.8057596919064507, "grad_norm": 2.1002962589263916, "learning_rate": 1.022386144964721e-06, "loss": 0.8175, "step": 66115 }, { "epoch": 0.8058206281305985, "grad_norm": 2.0992088317871094, "learning_rate": 1.0220654265554845e-06, "loss": 0.8226, "step": 66120 }, { "epoch": 0.8058815643547463, "grad_norm": 1.7931569814682007, "learning_rate": 1.0217447081462477e-06, "loss": 0.7934, "step": 66125 }, { "epoch": 0.8059425005788942, "grad_norm": 2.098025321960449, "learning_rate": 1.021423989737011e-06, "loss": 0.7273, "step": 66130 }, { "epoch": 0.806003436803042, "grad_norm": 2.3690578937530518, "learning_rate": 1.0211032713277742e-06, "loss": 0.7552, "step": 66135 }, { "epoch": 0.8060643730271897, "grad_norm": 1.6815091371536255, "learning_rate": 1.0207825529185376e-06, "loss": 0.7964, "step": 66140 }, { "epoch": 0.8061253092513375, "grad_norm": 2.3211734294891357, "learning_rate": 1.0204618345093008e-06, "loss": 0.8113, "step": 66145 }, { "epoch": 0.8061862454754853, "grad_norm": 2.0756657123565674, "learning_rate": 1.0201411161000643e-06, "loss": 0.8654, "step": 66150 }, { "epoch": 0.8062471816996332, "grad_norm": 2.2302136421203613, "learning_rate": 1.0198203976908275e-06, "loss": 0.8634, "step": 66155 }, { "epoch": 0.806308117923781, "grad_norm": 2.20941424369812, "learning_rate": 1.019499679281591e-06, "loss": 0.7935, "step": 66160 }, { "epoch": 0.8063690541479288, "grad_norm": 2.0719525814056396, "learning_rate": 1.0191789608723542e-06, "loss": 0.8102, "step": 66165 }, { "epoch": 0.8064299903720766, "grad_norm": 1.8429487943649292, "learning_rate": 1.0188582424631174e-06, "loss": 0.7493, "step": 66170 }, { "epoch": 0.8064909265962243, "grad_norm": 1.728262186050415, "learning_rate": 1.0185375240538809e-06, "loss": 0.8235, "step": 66175 }, { "epoch": 0.8065518628203722, "grad_norm": 2.126875400543213, "learning_rate": 1.018216805644644e-06, "loss": 0.8284, "step": 66180 }, { "epoch": 0.80661279904452, "grad_norm": 1.903498649597168, "learning_rate": 1.0178960872354073e-06, "loss": 0.8071, "step": 66185 }, { "epoch": 0.8066737352686678, "grad_norm": 1.8770095109939575, "learning_rate": 1.0175753688261708e-06, "loss": 0.8167, "step": 66190 }, { "epoch": 0.8067346714928156, "grad_norm": 2.0552237033843994, "learning_rate": 1.017254650416934e-06, "loss": 0.8018, "step": 66195 }, { "epoch": 0.8067956077169635, "grad_norm": 1.844972848892212, "learning_rate": 1.0169339320076974e-06, "loss": 0.7345, "step": 66200 }, { "epoch": 0.8068565439411113, "grad_norm": 2.332395076751709, "learning_rate": 1.0166132135984607e-06, "loss": 0.814, "step": 66205 }, { "epoch": 0.806917480165259, "grad_norm": 2.336116313934326, "learning_rate": 1.0162924951892239e-06, "loss": 0.7845, "step": 66210 }, { "epoch": 0.8069784163894068, "grad_norm": 1.9630945920944214, "learning_rate": 1.0159717767799873e-06, "loss": 0.7442, "step": 66215 }, { "epoch": 0.8070393526135546, "grad_norm": 1.8361310958862305, "learning_rate": 1.0156510583707505e-06, "loss": 0.8713, "step": 66220 }, { "epoch": 0.8071002888377025, "grad_norm": 2.0788228511810303, "learning_rate": 1.0153303399615138e-06, "loss": 0.75, "step": 66225 }, { "epoch": 0.8071612250618503, "grad_norm": 1.7907077074050903, "learning_rate": 1.0150096215522772e-06, "loss": 0.7496, "step": 66230 }, { "epoch": 0.8072221612859981, "grad_norm": 1.9481534957885742, "learning_rate": 1.0146889031430407e-06, "loss": 0.7899, "step": 66235 }, { "epoch": 0.8072830975101459, "grad_norm": 1.970079779624939, "learning_rate": 1.0143681847338039e-06, "loss": 0.8722, "step": 66240 }, { "epoch": 0.8073440337342936, "grad_norm": 2.002333879470825, "learning_rate": 1.0140474663245671e-06, "loss": 0.8409, "step": 66245 }, { "epoch": 0.8074049699584415, "grad_norm": 1.9896395206451416, "learning_rate": 1.0137267479153303e-06, "loss": 0.7572, "step": 66250 }, { "epoch": 0.8074659061825893, "grad_norm": 2.0662894248962402, "learning_rate": 1.0134060295060938e-06, "loss": 0.8122, "step": 66255 }, { "epoch": 0.8075268424067371, "grad_norm": 2.134347915649414, "learning_rate": 1.013085311096857e-06, "loss": 0.8335, "step": 66260 }, { "epoch": 0.8075877786308849, "grad_norm": 2.176630973815918, "learning_rate": 1.0127645926876202e-06, "loss": 0.7754, "step": 66265 }, { "epoch": 0.8076487148550328, "grad_norm": 2.0274107456207275, "learning_rate": 1.0124438742783837e-06, "loss": 0.8285, "step": 66270 }, { "epoch": 0.8077096510791806, "grad_norm": 3.856203079223633, "learning_rate": 1.0121231558691471e-06, "loss": 0.8682, "step": 66275 }, { "epoch": 0.8077705873033283, "grad_norm": 1.9344860315322876, "learning_rate": 1.0118024374599103e-06, "loss": 0.7558, "step": 66280 }, { "epoch": 0.8078315235274761, "grad_norm": 2.05265736579895, "learning_rate": 1.0114817190506736e-06, "loss": 0.7857, "step": 66285 }, { "epoch": 0.8078924597516239, "grad_norm": 1.853692889213562, "learning_rate": 1.0111610006414368e-06, "loss": 0.7436, "step": 66290 }, { "epoch": 0.8079533959757718, "grad_norm": 1.9619765281677246, "learning_rate": 1.0108402822322002e-06, "loss": 0.7198, "step": 66295 }, { "epoch": 0.8080143321999196, "grad_norm": 1.9462615251541138, "learning_rate": 1.0105195638229635e-06, "loss": 0.7511, "step": 66300 }, { "epoch": 0.8080752684240674, "grad_norm": 2.1128664016723633, "learning_rate": 1.010198845413727e-06, "loss": 0.7665, "step": 66305 }, { "epoch": 0.8081362046482152, "grad_norm": 2.0182504653930664, "learning_rate": 1.0098781270044901e-06, "loss": 0.8371, "step": 66310 }, { "epoch": 0.8081971408723629, "grad_norm": 1.826880693435669, "learning_rate": 1.0095574085952536e-06, "loss": 0.8275, "step": 66315 }, { "epoch": 0.8082580770965108, "grad_norm": 2.0834786891937256, "learning_rate": 1.0092366901860168e-06, "loss": 0.8787, "step": 66320 }, { "epoch": 0.8083190133206586, "grad_norm": 2.0581040382385254, "learning_rate": 1.00891597177678e-06, "loss": 0.8741, "step": 66325 }, { "epoch": 0.8083799495448064, "grad_norm": 2.2848031520843506, "learning_rate": 1.0085952533675433e-06, "loss": 0.8209, "step": 66330 }, { "epoch": 0.8084408857689542, "grad_norm": 2.1016645431518555, "learning_rate": 1.0082745349583067e-06, "loss": 0.8102, "step": 66335 }, { "epoch": 0.808501821993102, "grad_norm": 2.4387624263763428, "learning_rate": 1.00795381654907e-06, "loss": 0.8479, "step": 66340 }, { "epoch": 0.8085627582172499, "grad_norm": 2.603253126144409, "learning_rate": 1.0076330981398334e-06, "loss": 0.8649, "step": 66345 }, { "epoch": 0.8086236944413976, "grad_norm": 1.697590947151184, "learning_rate": 1.0073123797305966e-06, "loss": 0.7857, "step": 66350 }, { "epoch": 0.8086846306655454, "grad_norm": 2.0169198513031006, "learning_rate": 1.00699166132136e-06, "loss": 0.8691, "step": 66355 }, { "epoch": 0.8087455668896932, "grad_norm": 1.8777503967285156, "learning_rate": 1.0066709429121233e-06, "loss": 0.7822, "step": 66360 }, { "epoch": 0.8088065031138411, "grad_norm": 1.9954349994659424, "learning_rate": 1.0063502245028865e-06, "loss": 0.7902, "step": 66365 }, { "epoch": 0.8088674393379889, "grad_norm": 2.135387420654297, "learning_rate": 1.0060295060936497e-06, "loss": 0.8302, "step": 66370 }, { "epoch": 0.8089283755621367, "grad_norm": 2.085745334625244, "learning_rate": 1.0057087876844132e-06, "loss": 0.8328, "step": 66375 }, { "epoch": 0.8089893117862845, "grad_norm": 1.8238835334777832, "learning_rate": 1.0053880692751764e-06, "loss": 0.7653, "step": 66380 }, { "epoch": 0.8090502480104322, "grad_norm": 1.863205075263977, "learning_rate": 1.0050673508659398e-06, "loss": 0.8638, "step": 66385 }, { "epoch": 0.8091111842345801, "grad_norm": 2.038085460662842, "learning_rate": 1.004746632456703e-06, "loss": 0.8116, "step": 66390 }, { "epoch": 0.8091721204587279, "grad_norm": 2.045050859451294, "learning_rate": 1.0044259140474665e-06, "loss": 0.8313, "step": 66395 }, { "epoch": 0.8092330566828757, "grad_norm": 1.9207327365875244, "learning_rate": 1.0041051956382297e-06, "loss": 0.8559, "step": 66400 }, { "epoch": 0.8092939929070235, "grad_norm": 1.8400928974151611, "learning_rate": 1.003784477228993e-06, "loss": 0.8562, "step": 66405 }, { "epoch": 0.8093549291311714, "grad_norm": 1.9413715600967407, "learning_rate": 1.0034637588197562e-06, "loss": 0.77, "step": 66410 }, { "epoch": 0.8094158653553192, "grad_norm": 1.9469263553619385, "learning_rate": 1.0031430404105196e-06, "loss": 0.8053, "step": 66415 }, { "epoch": 0.8094768015794669, "grad_norm": 2.3317768573760986, "learning_rate": 1.0028223220012829e-06, "loss": 0.8447, "step": 66420 }, { "epoch": 0.8095377378036147, "grad_norm": 1.9009146690368652, "learning_rate": 1.0025016035920463e-06, "loss": 0.7571, "step": 66425 }, { "epoch": 0.8095986740277625, "grad_norm": 1.9296932220458984, "learning_rate": 1.0021808851828095e-06, "loss": 0.8443, "step": 66430 }, { "epoch": 0.8096596102519104, "grad_norm": 1.833285927772522, "learning_rate": 1.001860166773573e-06, "loss": 0.8594, "step": 66435 }, { "epoch": 0.8097205464760582, "grad_norm": 2.323570489883423, "learning_rate": 1.0015394483643362e-06, "loss": 0.8118, "step": 66440 }, { "epoch": 0.809781482700206, "grad_norm": 2.361828088760376, "learning_rate": 1.0012187299550994e-06, "loss": 0.7811, "step": 66445 }, { "epoch": 0.8098424189243538, "grad_norm": 1.9731496572494507, "learning_rate": 1.0008980115458627e-06, "loss": 0.7703, "step": 66450 }, { "epoch": 0.8099033551485015, "grad_norm": 2.539863348007202, "learning_rate": 1.000577293136626e-06, "loss": 0.8722, "step": 66455 }, { "epoch": 0.8099642913726494, "grad_norm": 2.0236361026763916, "learning_rate": 1.0002565747273893e-06, "loss": 0.7584, "step": 66460 }, { "epoch": 0.8100252275967972, "grad_norm": 1.7318023443222046, "learning_rate": 9.999358563181528e-07, "loss": 0.8862, "step": 66465 }, { "epoch": 0.810086163820945, "grad_norm": 1.9369479417800903, "learning_rate": 9.996151379089162e-07, "loss": 0.8417, "step": 66470 }, { "epoch": 0.8101471000450928, "grad_norm": 1.9175013303756714, "learning_rate": 9.992944194996794e-07, "loss": 0.8053, "step": 66475 }, { "epoch": 0.8102080362692407, "grad_norm": 1.896450400352478, "learning_rate": 9.989737010904427e-07, "loss": 0.8094, "step": 66480 }, { "epoch": 0.8102689724933885, "grad_norm": 2.0697803497314453, "learning_rate": 9.98652982681206e-07, "loss": 0.8221, "step": 66485 }, { "epoch": 0.8103299087175362, "grad_norm": 2.086909294128418, "learning_rate": 9.983322642719693e-07, "loss": 0.9001, "step": 66490 }, { "epoch": 0.810390844941684, "grad_norm": 1.5723717212677002, "learning_rate": 9.980115458627326e-07, "loss": 0.7926, "step": 66495 }, { "epoch": 0.8104517811658318, "grad_norm": 1.792506217956543, "learning_rate": 9.97690827453496e-07, "loss": 0.7779, "step": 66500 }, { "epoch": 0.8105127173899797, "grad_norm": 2.2313520908355713, "learning_rate": 9.973701090442592e-07, "loss": 0.8204, "step": 66505 }, { "epoch": 0.8105736536141275, "grad_norm": 2.038831949234009, "learning_rate": 9.970493906350227e-07, "loss": 0.7578, "step": 66510 }, { "epoch": 0.8106345898382753, "grad_norm": 1.8296425342559814, "learning_rate": 9.96728672225786e-07, "loss": 0.8419, "step": 66515 }, { "epoch": 0.8106955260624231, "grad_norm": 1.7933003902435303, "learning_rate": 9.964079538165491e-07, "loss": 0.7546, "step": 66520 }, { "epoch": 0.8107564622865708, "grad_norm": 1.7445311546325684, "learning_rate": 9.960872354073124e-07, "loss": 0.8176, "step": 66525 }, { "epoch": 0.8108173985107187, "grad_norm": 1.6475437879562378, "learning_rate": 9.957665169980758e-07, "loss": 0.7264, "step": 66530 }, { "epoch": 0.8108783347348665, "grad_norm": 2.550039291381836, "learning_rate": 9.95445798588839e-07, "loss": 0.7768, "step": 66535 }, { "epoch": 0.8109392709590143, "grad_norm": 2.063878059387207, "learning_rate": 9.951250801796025e-07, "loss": 0.7548, "step": 66540 }, { "epoch": 0.8110002071831621, "grad_norm": 2.161999225616455, "learning_rate": 9.948043617703657e-07, "loss": 0.8728, "step": 66545 }, { "epoch": 0.81106114340731, "grad_norm": 2.297410488128662, "learning_rate": 9.944836433611291e-07, "loss": 0.8009, "step": 66550 }, { "epoch": 0.8111220796314578, "grad_norm": 2.1111180782318115, "learning_rate": 9.941629249518924e-07, "loss": 0.8501, "step": 66555 }, { "epoch": 0.8111830158556055, "grad_norm": 1.9267375469207764, "learning_rate": 9.938422065426556e-07, "loss": 0.8591, "step": 66560 }, { "epoch": 0.8112439520797533, "grad_norm": 2.4683101177215576, "learning_rate": 9.935214881334188e-07, "loss": 0.8236, "step": 66565 }, { "epoch": 0.8113048883039011, "grad_norm": 1.8995909690856934, "learning_rate": 9.932007697241823e-07, "loss": 0.8077, "step": 66570 }, { "epoch": 0.811365824528049, "grad_norm": 2.00455641746521, "learning_rate": 9.928800513149455e-07, "loss": 0.8029, "step": 66575 }, { "epoch": 0.8114267607521968, "grad_norm": 1.9889435768127441, "learning_rate": 9.92559332905709e-07, "loss": 0.7692, "step": 66580 }, { "epoch": 0.8114876969763446, "grad_norm": 2.0763235092163086, "learning_rate": 9.922386144964722e-07, "loss": 0.7943, "step": 66585 }, { "epoch": 0.8115486332004924, "grad_norm": 1.9165594577789307, "learning_rate": 9.919178960872356e-07, "loss": 0.7326, "step": 66590 }, { "epoch": 0.8116095694246401, "grad_norm": 2.0450432300567627, "learning_rate": 9.915971776779988e-07, "loss": 0.8133, "step": 66595 }, { "epoch": 0.811670505648788, "grad_norm": 1.9060540199279785, "learning_rate": 9.91276459268762e-07, "loss": 0.8036, "step": 66600 }, { "epoch": 0.8117314418729358, "grad_norm": 1.838990330696106, "learning_rate": 9.909557408595253e-07, "loss": 0.8602, "step": 66605 }, { "epoch": 0.8117923780970836, "grad_norm": 1.9493021965026855, "learning_rate": 9.906350224502887e-07, "loss": 0.8597, "step": 66610 }, { "epoch": 0.8118533143212314, "grad_norm": 1.8753578662872314, "learning_rate": 9.90314304041052e-07, "loss": 0.7237, "step": 66615 }, { "epoch": 0.8119142505453792, "grad_norm": 1.6899282932281494, "learning_rate": 9.899935856318154e-07, "loss": 0.7765, "step": 66620 }, { "epoch": 0.811975186769527, "grad_norm": 2.1175050735473633, "learning_rate": 9.896728672225786e-07, "loss": 0.828, "step": 66625 }, { "epoch": 0.8120361229936748, "grad_norm": 2.0852837562561035, "learning_rate": 9.89352148813342e-07, "loss": 0.7335, "step": 66630 }, { "epoch": 0.8120970592178226, "grad_norm": 2.7269787788391113, "learning_rate": 9.890314304041053e-07, "loss": 0.7589, "step": 66635 }, { "epoch": 0.8121579954419704, "grad_norm": 1.8550747632980347, "learning_rate": 9.887107119948685e-07, "loss": 0.8327, "step": 66640 }, { "epoch": 0.8122189316661182, "grad_norm": 2.346496105194092, "learning_rate": 9.883899935856317e-07, "loss": 0.8229, "step": 66645 }, { "epoch": 0.8122798678902661, "grad_norm": 1.7686740159988403, "learning_rate": 9.880692751763952e-07, "loss": 0.8081, "step": 66650 }, { "epoch": 0.8123408041144139, "grad_norm": 1.7480427026748657, "learning_rate": 9.877485567671586e-07, "loss": 0.8278, "step": 66655 }, { "epoch": 0.8124017403385616, "grad_norm": 2.18131947517395, "learning_rate": 9.874278383579219e-07, "loss": 0.7585, "step": 66660 }, { "epoch": 0.8124626765627094, "grad_norm": 1.8441542387008667, "learning_rate": 9.87107119948685e-07, "loss": 0.8185, "step": 66665 }, { "epoch": 0.8125236127868573, "grad_norm": 1.9435172080993652, "learning_rate": 9.867864015394485e-07, "loss": 0.8086, "step": 66670 }, { "epoch": 0.8125845490110051, "grad_norm": 1.7332258224487305, "learning_rate": 9.864656831302118e-07, "loss": 0.8643, "step": 66675 }, { "epoch": 0.8126454852351529, "grad_norm": 1.8411884307861328, "learning_rate": 9.86144964720975e-07, "loss": 0.8017, "step": 66680 }, { "epoch": 0.8127064214593007, "grad_norm": 1.9507213830947876, "learning_rate": 9.858242463117382e-07, "loss": 0.8013, "step": 66685 }, { "epoch": 0.8127673576834485, "grad_norm": 2.0196166038513184, "learning_rate": 9.855035279025017e-07, "loss": 0.7075, "step": 66690 }, { "epoch": 0.8128282939075963, "grad_norm": 2.1843032836914062, "learning_rate": 9.85182809493265e-07, "loss": 0.8026, "step": 66695 }, { "epoch": 0.8128892301317441, "grad_norm": 1.9290449619293213, "learning_rate": 9.848620910840283e-07, "loss": 0.8652, "step": 66700 }, { "epoch": 0.8129501663558919, "grad_norm": 2.4294848442077637, "learning_rate": 9.845413726747916e-07, "loss": 0.8277, "step": 66705 }, { "epoch": 0.8130111025800397, "grad_norm": 2.068179130554199, "learning_rate": 9.84220654265555e-07, "loss": 0.808, "step": 66710 }, { "epoch": 0.8130720388041875, "grad_norm": 2.2246811389923096, "learning_rate": 9.838999358563182e-07, "loss": 0.8427, "step": 66715 }, { "epoch": 0.8131329750283354, "grad_norm": 2.4120969772338867, "learning_rate": 9.835792174470814e-07, "loss": 0.8029, "step": 66720 }, { "epoch": 0.8131939112524832, "grad_norm": 2.48173451423645, "learning_rate": 9.832584990378449e-07, "loss": 0.8896, "step": 66725 }, { "epoch": 0.8132548474766309, "grad_norm": 1.9818884134292603, "learning_rate": 9.829377806286081e-07, "loss": 0.8365, "step": 66730 }, { "epoch": 0.8133157837007787, "grad_norm": 2.2709927558898926, "learning_rate": 9.826170622193716e-07, "loss": 0.7383, "step": 66735 }, { "epoch": 0.8133767199249266, "grad_norm": 1.780146837234497, "learning_rate": 9.822963438101348e-07, "loss": 0.864, "step": 66740 }, { "epoch": 0.8134376561490744, "grad_norm": 2.2460291385650635, "learning_rate": 9.81975625400898e-07, "loss": 0.822, "step": 66745 }, { "epoch": 0.8134985923732222, "grad_norm": 1.8478853702545166, "learning_rate": 9.816549069916615e-07, "loss": 0.7991, "step": 66750 }, { "epoch": 0.81355952859737, "grad_norm": 1.7623286247253418, "learning_rate": 9.813341885824247e-07, "loss": 0.808, "step": 66755 }, { "epoch": 0.8136204648215178, "grad_norm": 1.722246527671814, "learning_rate": 9.81013470173188e-07, "loss": 0.8221, "step": 66760 }, { "epoch": 0.8136814010456656, "grad_norm": 1.778686285018921, "learning_rate": 9.806927517639514e-07, "loss": 0.8774, "step": 66765 }, { "epoch": 0.8137423372698134, "grad_norm": 1.9757051467895508, "learning_rate": 9.803720333547146e-07, "loss": 0.7375, "step": 66770 }, { "epoch": 0.8138032734939612, "grad_norm": 1.7544547319412231, "learning_rate": 9.80051314945478e-07, "loss": 0.7982, "step": 66775 }, { "epoch": 0.813864209718109, "grad_norm": 1.8788466453552246, "learning_rate": 9.797305965362412e-07, "loss": 0.7872, "step": 66780 }, { "epoch": 0.8139251459422568, "grad_norm": 2.032424211502075, "learning_rate": 9.794098781270047e-07, "loss": 0.799, "step": 66785 }, { "epoch": 0.8139860821664047, "grad_norm": 2.013551712036133, "learning_rate": 9.79089159717768e-07, "loss": 0.8796, "step": 66790 }, { "epoch": 0.8140470183905525, "grad_norm": 1.741341233253479, "learning_rate": 9.787684413085311e-07, "loss": 0.7853, "step": 66795 }, { "epoch": 0.8141079546147002, "grad_norm": 1.870848536491394, "learning_rate": 9.784477228992944e-07, "loss": 0.826, "step": 66800 }, { "epoch": 0.814168890838848, "grad_norm": 2.1296279430389404, "learning_rate": 9.781270044900578e-07, "loss": 0.7271, "step": 66805 }, { "epoch": 0.8142298270629958, "grad_norm": 1.9059700965881348, "learning_rate": 9.77806286080821e-07, "loss": 0.8024, "step": 66810 }, { "epoch": 0.8142907632871437, "grad_norm": 2.4767913818359375, "learning_rate": 9.774855676715845e-07, "loss": 0.8496, "step": 66815 }, { "epoch": 0.8143516995112915, "grad_norm": 1.6669073104858398, "learning_rate": 9.771648492623477e-07, "loss": 0.8018, "step": 66820 }, { "epoch": 0.8144126357354393, "grad_norm": 1.7946913242340088, "learning_rate": 9.768441308531112e-07, "loss": 0.8115, "step": 66825 }, { "epoch": 0.8144735719595871, "grad_norm": 1.795013427734375, "learning_rate": 9.765234124438744e-07, "loss": 0.8584, "step": 66830 }, { "epoch": 0.8145345081837349, "grad_norm": 1.7705273628234863, "learning_rate": 9.762026940346376e-07, "loss": 0.8212, "step": 66835 }, { "epoch": 0.8145954444078827, "grad_norm": 1.9838500022888184, "learning_rate": 9.758819756254008e-07, "loss": 0.8428, "step": 66840 }, { "epoch": 0.8146563806320305, "grad_norm": 1.7228634357452393, "learning_rate": 9.755612572161643e-07, "loss": 0.8451, "step": 66845 }, { "epoch": 0.8147173168561783, "grad_norm": 2.1828973293304443, "learning_rate": 9.752405388069277e-07, "loss": 0.8364, "step": 66850 }, { "epoch": 0.8147782530803261, "grad_norm": 2.1584317684173584, "learning_rate": 9.74919820397691e-07, "loss": 0.8493, "step": 66855 }, { "epoch": 0.814839189304474, "grad_norm": 2.3826828002929688, "learning_rate": 9.745991019884542e-07, "loss": 0.8799, "step": 66860 }, { "epoch": 0.8149001255286218, "grad_norm": 2.1845614910125732, "learning_rate": 9.742783835792176e-07, "loss": 0.8154, "step": 66865 }, { "epoch": 0.8149610617527695, "grad_norm": 2.263810873031616, "learning_rate": 9.739576651699808e-07, "loss": 0.8595, "step": 66870 }, { "epoch": 0.8150219979769173, "grad_norm": 2.281876564025879, "learning_rate": 9.73636946760744e-07, "loss": 0.8407, "step": 66875 }, { "epoch": 0.8150829342010651, "grad_norm": 1.8224047422409058, "learning_rate": 9.733162283515073e-07, "loss": 0.7902, "step": 66880 }, { "epoch": 0.815143870425213, "grad_norm": 1.8855541944503784, "learning_rate": 9.729955099422707e-07, "loss": 0.8284, "step": 66885 }, { "epoch": 0.8152048066493608, "grad_norm": 2.1839523315429688, "learning_rate": 9.726747915330342e-07, "loss": 0.843, "step": 66890 }, { "epoch": 0.8152657428735086, "grad_norm": 2.112305164337158, "learning_rate": 9.723540731237974e-07, "loss": 0.7724, "step": 66895 }, { "epoch": 0.8153266790976564, "grad_norm": 1.8062067031860352, "learning_rate": 9.720333547145606e-07, "loss": 0.8212, "step": 66900 }, { "epoch": 0.8153876153218041, "grad_norm": 2.545978307723999, "learning_rate": 9.71712636305324e-07, "loss": 0.7884, "step": 66905 }, { "epoch": 0.815448551545952, "grad_norm": 2.1718292236328125, "learning_rate": 9.713919178960873e-07, "loss": 0.8496, "step": 66910 }, { "epoch": 0.8155094877700998, "grad_norm": 1.6426928043365479, "learning_rate": 9.710711994868505e-07, "loss": 0.8371, "step": 66915 }, { "epoch": 0.8155704239942476, "grad_norm": 1.7008371353149414, "learning_rate": 9.70750481077614e-07, "loss": 0.8751, "step": 66920 }, { "epoch": 0.8156313602183954, "grad_norm": 1.9267592430114746, "learning_rate": 9.704297626683772e-07, "loss": 0.79, "step": 66925 }, { "epoch": 0.8156922964425433, "grad_norm": 1.9727298021316528, "learning_rate": 9.701090442591406e-07, "loss": 0.7975, "step": 66930 }, { "epoch": 0.8157532326666911, "grad_norm": 2.447208881378174, "learning_rate": 9.697883258499039e-07, "loss": 0.8269, "step": 66935 }, { "epoch": 0.8158141688908388, "grad_norm": 1.8372727632522583, "learning_rate": 9.69467607440667e-07, "loss": 0.8257, "step": 66940 }, { "epoch": 0.8158751051149866, "grad_norm": 1.7265924215316772, "learning_rate": 9.691468890314305e-07, "loss": 0.8352, "step": 66945 }, { "epoch": 0.8159360413391344, "grad_norm": 1.8643205165863037, "learning_rate": 9.688261706221938e-07, "loss": 0.8499, "step": 66950 }, { "epoch": 0.8159969775632823, "grad_norm": 2.0481362342834473, "learning_rate": 9.68505452212957e-07, "loss": 0.8198, "step": 66955 }, { "epoch": 0.8160579137874301, "grad_norm": 2.0937082767486572, "learning_rate": 9.681847338037204e-07, "loss": 0.869, "step": 66960 }, { "epoch": 0.8161188500115779, "grad_norm": 2.6122400760650635, "learning_rate": 9.678640153944837e-07, "loss": 0.8382, "step": 66965 }, { "epoch": 0.8161797862357257, "grad_norm": 2.0694496631622314, "learning_rate": 9.675432969852471e-07, "loss": 0.7833, "step": 66970 }, { "epoch": 0.8162407224598734, "grad_norm": 2.0606045722961426, "learning_rate": 9.672225785760103e-07, "loss": 0.876, "step": 66975 }, { "epoch": 0.8163016586840213, "grad_norm": 2.2609567642211914, "learning_rate": 9.669018601667736e-07, "loss": 0.8726, "step": 66980 }, { "epoch": 0.8163625949081691, "grad_norm": 2.080451726913452, "learning_rate": 9.66581141757537e-07, "loss": 0.8409, "step": 66985 }, { "epoch": 0.8164235311323169, "grad_norm": 1.7786732912063599, "learning_rate": 9.662604233483002e-07, "loss": 0.7938, "step": 66990 }, { "epoch": 0.8164844673564647, "grad_norm": 1.8979289531707764, "learning_rate": 9.659397049390635e-07, "loss": 0.8356, "step": 66995 }, { "epoch": 0.8165454035806126, "grad_norm": 2.572667360305786, "learning_rate": 9.65618986529827e-07, "loss": 0.8807, "step": 67000 }, { "epoch": 0.8166063398047604, "grad_norm": 2.0450172424316406, "learning_rate": 9.652982681205903e-07, "loss": 0.7853, "step": 67005 }, { "epoch": 0.8166672760289081, "grad_norm": 1.9106255769729614, "learning_rate": 9.649775497113536e-07, "loss": 0.7936, "step": 67010 }, { "epoch": 0.8167282122530559, "grad_norm": 2.0505003929138184, "learning_rate": 9.646568313021168e-07, "loss": 0.8279, "step": 67015 }, { "epoch": 0.8167891484772037, "grad_norm": 1.7879652976989746, "learning_rate": 9.6433611289288e-07, "loss": 0.8555, "step": 67020 }, { "epoch": 0.8168500847013516, "grad_norm": 1.9108537435531616, "learning_rate": 9.640153944836435e-07, "loss": 0.8745, "step": 67025 }, { "epoch": 0.8169110209254994, "grad_norm": 1.9622159004211426, "learning_rate": 9.636946760744067e-07, "loss": 0.849, "step": 67030 }, { "epoch": 0.8169719571496472, "grad_norm": 1.8437680006027222, "learning_rate": 9.6337395766517e-07, "loss": 0.8518, "step": 67035 }, { "epoch": 0.817032893373795, "grad_norm": 2.2790091037750244, "learning_rate": 9.630532392559334e-07, "loss": 0.8221, "step": 67040 }, { "epoch": 0.8170938295979427, "grad_norm": 2.104039430618286, "learning_rate": 9.627325208466968e-07, "loss": 0.7913, "step": 67045 }, { "epoch": 0.8171547658220906, "grad_norm": 1.9684253931045532, "learning_rate": 9.6241180243746e-07, "loss": 0.8088, "step": 67050 }, { "epoch": 0.8172157020462384, "grad_norm": 2.1220862865448, "learning_rate": 9.620910840282233e-07, "loss": 0.7639, "step": 67055 }, { "epoch": 0.8172766382703862, "grad_norm": 1.8098866939544678, "learning_rate": 9.617703656189865e-07, "loss": 0.7538, "step": 67060 }, { "epoch": 0.817337574494534, "grad_norm": 2.0013363361358643, "learning_rate": 9.6144964720975e-07, "loss": 0.8101, "step": 67065 }, { "epoch": 0.8173985107186819, "grad_norm": 2.2569358348846436, "learning_rate": 9.611289288005132e-07, "loss": 0.8844, "step": 67070 }, { "epoch": 0.8174594469428297, "grad_norm": 1.8046448230743408, "learning_rate": 9.608082103912766e-07, "loss": 0.7808, "step": 67075 }, { "epoch": 0.8175203831669774, "grad_norm": 1.8304619789123535, "learning_rate": 9.604874919820398e-07, "loss": 0.7803, "step": 67080 }, { "epoch": 0.8175813193911252, "grad_norm": 1.9565476179122925, "learning_rate": 9.601667735728033e-07, "loss": 0.8014, "step": 67085 }, { "epoch": 0.817642255615273, "grad_norm": 2.0403172969818115, "learning_rate": 9.598460551635665e-07, "loss": 0.7375, "step": 67090 }, { "epoch": 0.8177031918394209, "grad_norm": 1.7404637336730957, "learning_rate": 9.595253367543297e-07, "loss": 0.8444, "step": 67095 }, { "epoch": 0.8177641280635687, "grad_norm": 1.7217693328857422, "learning_rate": 9.592046183450932e-07, "loss": 0.8297, "step": 67100 }, { "epoch": 0.8178250642877165, "grad_norm": 1.9978832006454468, "learning_rate": 9.588838999358564e-07, "loss": 0.8165, "step": 67105 }, { "epoch": 0.8178860005118643, "grad_norm": 1.7839080095291138, "learning_rate": 9.585631815266196e-07, "loss": 0.8172, "step": 67110 }, { "epoch": 0.817946936736012, "grad_norm": 1.9501762390136719, "learning_rate": 9.58242463117383e-07, "loss": 0.7719, "step": 67115 }, { "epoch": 0.8180078729601599, "grad_norm": 2.154982566833496, "learning_rate": 9.579217447081463e-07, "loss": 0.8334, "step": 67120 }, { "epoch": 0.8180688091843077, "grad_norm": 2.151155710220337, "learning_rate": 9.576010262989097e-07, "loss": 0.8009, "step": 67125 }, { "epoch": 0.8181297454084555, "grad_norm": 2.2847583293914795, "learning_rate": 9.57280307889673e-07, "loss": 0.7459, "step": 67130 }, { "epoch": 0.8181906816326033, "grad_norm": 2.1803271770477295, "learning_rate": 9.569595894804362e-07, "loss": 0.8105, "step": 67135 }, { "epoch": 0.8182516178567512, "grad_norm": 2.201658010482788, "learning_rate": 9.566388710711996e-07, "loss": 0.8086, "step": 67140 }, { "epoch": 0.818312554080899, "grad_norm": 1.8727205991744995, "learning_rate": 9.563181526619629e-07, "loss": 0.7589, "step": 67145 }, { "epoch": 0.8183734903050467, "grad_norm": 2.232663154602051, "learning_rate": 9.55997434252726e-07, "loss": 0.744, "step": 67150 }, { "epoch": 0.8184344265291945, "grad_norm": 1.9606577157974243, "learning_rate": 9.556767158434895e-07, "loss": 0.7421, "step": 67155 }, { "epoch": 0.8184953627533423, "grad_norm": 2.0698888301849365, "learning_rate": 9.553559974342528e-07, "loss": 0.7782, "step": 67160 }, { "epoch": 0.8185562989774902, "grad_norm": 1.834546446800232, "learning_rate": 9.550352790250162e-07, "loss": 0.814, "step": 67165 }, { "epoch": 0.818617235201638, "grad_norm": 2.008059501647949, "learning_rate": 9.547145606157794e-07, "loss": 0.7813, "step": 67170 }, { "epoch": 0.8186781714257858, "grad_norm": 1.8989100456237793, "learning_rate": 9.543938422065427e-07, "loss": 0.8155, "step": 67175 }, { "epoch": 0.8187391076499336, "grad_norm": 1.9349831342697144, "learning_rate": 9.54073123797306e-07, "loss": 0.8741, "step": 67180 }, { "epoch": 0.8188000438740813, "grad_norm": 2.251884698867798, "learning_rate": 9.537524053880693e-07, "loss": 0.7665, "step": 67185 }, { "epoch": 0.8188609800982292, "grad_norm": 2.244525194168091, "learning_rate": 9.534316869788327e-07, "loss": 0.8433, "step": 67190 }, { "epoch": 0.818921916322377, "grad_norm": 2.016404628753662, "learning_rate": 9.531109685695959e-07, "loss": 0.8277, "step": 67195 }, { "epoch": 0.8189828525465248, "grad_norm": 2.150820732116699, "learning_rate": 9.527902501603593e-07, "loss": 0.8267, "step": 67200 }, { "epoch": 0.8190437887706726, "grad_norm": 2.240213632583618, "learning_rate": 9.524695317511227e-07, "loss": 0.797, "step": 67205 }, { "epoch": 0.8191047249948205, "grad_norm": 2.0583858489990234, "learning_rate": 9.521488133418859e-07, "loss": 0.8302, "step": 67210 }, { "epoch": 0.8191656612189683, "grad_norm": 1.9754691123962402, "learning_rate": 9.518280949326491e-07, "loss": 0.7229, "step": 67215 }, { "epoch": 0.819226597443116, "grad_norm": 2.180385112762451, "learning_rate": 9.515073765234126e-07, "loss": 0.7818, "step": 67220 }, { "epoch": 0.8192875336672638, "grad_norm": 2.1079585552215576, "learning_rate": 9.511866581141759e-07, "loss": 0.7803, "step": 67225 }, { "epoch": 0.8193484698914116, "grad_norm": 2.1223270893096924, "learning_rate": 9.508659397049391e-07, "loss": 0.7746, "step": 67230 }, { "epoch": 0.8194094061155595, "grad_norm": 1.9222970008850098, "learning_rate": 9.505452212957024e-07, "loss": 0.7943, "step": 67235 }, { "epoch": 0.8194703423397073, "grad_norm": 2.6398046016693115, "learning_rate": 9.502245028864658e-07, "loss": 0.8483, "step": 67240 }, { "epoch": 0.8195312785638551, "grad_norm": 1.7120354175567627, "learning_rate": 9.499037844772291e-07, "loss": 0.781, "step": 67245 }, { "epoch": 0.8195922147880029, "grad_norm": 1.9805481433868408, "learning_rate": 9.495830660679924e-07, "loss": 0.8696, "step": 67250 }, { "epoch": 0.8196531510121506, "grad_norm": 2.1623077392578125, "learning_rate": 9.492623476587556e-07, "loss": 0.7558, "step": 67255 }, { "epoch": 0.8197140872362985, "grad_norm": 1.7460283041000366, "learning_rate": 9.48941629249519e-07, "loss": 0.8021, "step": 67260 }, { "epoch": 0.8197750234604463, "grad_norm": 1.6596341133117676, "learning_rate": 9.486209108402824e-07, "loss": 0.8595, "step": 67265 }, { "epoch": 0.8198359596845941, "grad_norm": 1.9406614303588867, "learning_rate": 9.483001924310456e-07, "loss": 0.7898, "step": 67270 }, { "epoch": 0.8198968959087419, "grad_norm": 2.1378631591796875, "learning_rate": 9.479794740218089e-07, "loss": 0.8042, "step": 67275 }, { "epoch": 0.8199578321328898, "grad_norm": 2.288238286972046, "learning_rate": 9.476587556125723e-07, "loss": 0.8265, "step": 67280 }, { "epoch": 0.8200187683570376, "grad_norm": 1.8146580457687378, "learning_rate": 9.473380372033356e-07, "loss": 0.8799, "step": 67285 }, { "epoch": 0.8200797045811853, "grad_norm": 2.6199965476989746, "learning_rate": 9.470173187940988e-07, "loss": 0.8005, "step": 67290 }, { "epoch": 0.8201406408053331, "grad_norm": 2.02138352394104, "learning_rate": 9.466966003848622e-07, "loss": 0.8423, "step": 67295 }, { "epoch": 0.8202015770294809, "grad_norm": 2.353301525115967, "learning_rate": 9.463758819756255e-07, "loss": 0.8507, "step": 67300 }, { "epoch": 0.8202625132536288, "grad_norm": 1.919676661491394, "learning_rate": 9.460551635663888e-07, "loss": 0.8523, "step": 67305 }, { "epoch": 0.8203234494777766, "grad_norm": 2.024916410446167, "learning_rate": 9.45734445157152e-07, "loss": 0.8231, "step": 67310 }, { "epoch": 0.8203843857019244, "grad_norm": 2.190398693084717, "learning_rate": 9.454137267479154e-07, "loss": 0.7945, "step": 67315 }, { "epoch": 0.8204453219260722, "grad_norm": 1.7685915231704712, "learning_rate": 9.450930083386787e-07, "loss": 0.795, "step": 67320 }, { "epoch": 0.8205062581502199, "grad_norm": 1.993286371231079, "learning_rate": 9.447722899294421e-07, "loss": 0.8064, "step": 67325 }, { "epoch": 0.8205671943743678, "grad_norm": 1.924533486366272, "learning_rate": 9.444515715202053e-07, "loss": 0.8512, "step": 67330 }, { "epoch": 0.8206281305985156, "grad_norm": 1.7459063529968262, "learning_rate": 9.441308531109686e-07, "loss": 0.7964, "step": 67335 }, { "epoch": 0.8206890668226634, "grad_norm": 2.3009414672851562, "learning_rate": 9.43810134701732e-07, "loss": 0.8002, "step": 67340 }, { "epoch": 0.8207500030468112, "grad_norm": 1.9402425289154053, "learning_rate": 9.434894162924953e-07, "loss": 0.7289, "step": 67345 }, { "epoch": 0.820810939270959, "grad_norm": 2.000455856323242, "learning_rate": 9.431686978832585e-07, "loss": 0.8382, "step": 67350 }, { "epoch": 0.8208718754951069, "grad_norm": 1.9074034690856934, "learning_rate": 9.428479794740218e-07, "loss": 0.8848, "step": 67355 }, { "epoch": 0.8209328117192546, "grad_norm": 1.7435944080352783, "learning_rate": 9.425272610647852e-07, "loss": 0.8121, "step": 67360 }, { "epoch": 0.8209937479434024, "grad_norm": 1.8105967044830322, "learning_rate": 9.422065426555485e-07, "loss": 0.8488, "step": 67365 }, { "epoch": 0.8210546841675502, "grad_norm": 1.981113314628601, "learning_rate": 9.418858242463117e-07, "loss": 0.8, "step": 67370 }, { "epoch": 0.821115620391698, "grad_norm": 3.246119260787964, "learning_rate": 9.415651058370752e-07, "loss": 0.8246, "step": 67375 }, { "epoch": 0.8211765566158459, "grad_norm": 1.97255277633667, "learning_rate": 9.412443874278385e-07, "loss": 0.8096, "step": 67380 }, { "epoch": 0.8212374928399937, "grad_norm": 2.343165159225464, "learning_rate": 9.409236690186017e-07, "loss": 0.8378, "step": 67385 }, { "epoch": 0.8212984290641415, "grad_norm": 1.8880724906921387, "learning_rate": 9.40602950609365e-07, "loss": 0.8753, "step": 67390 }, { "epoch": 0.8213593652882892, "grad_norm": 1.7574182748794556, "learning_rate": 9.402822322001284e-07, "loss": 0.7556, "step": 67395 }, { "epoch": 0.8214203015124371, "grad_norm": 2.011380672454834, "learning_rate": 9.399615137908918e-07, "loss": 0.8368, "step": 67400 }, { "epoch": 0.8214812377365849, "grad_norm": 2.060508966445923, "learning_rate": 9.39640795381655e-07, "loss": 0.8282, "step": 67405 }, { "epoch": 0.8215421739607327, "grad_norm": 1.9038894176483154, "learning_rate": 9.393200769724182e-07, "loss": 0.7725, "step": 67410 }, { "epoch": 0.8216031101848805, "grad_norm": 2.147287130355835, "learning_rate": 9.389993585631816e-07, "loss": 0.8396, "step": 67415 }, { "epoch": 0.8216640464090283, "grad_norm": 2.0553038120269775, "learning_rate": 9.38678640153945e-07, "loss": 0.7612, "step": 67420 }, { "epoch": 0.8217249826331762, "grad_norm": 1.996516227722168, "learning_rate": 9.383579217447082e-07, "loss": 0.7997, "step": 67425 }, { "epoch": 0.8217859188573239, "grad_norm": 2.0114710330963135, "learning_rate": 9.380372033354714e-07, "loss": 0.8588, "step": 67430 }, { "epoch": 0.8218468550814717, "grad_norm": 1.8007787466049194, "learning_rate": 9.377164849262349e-07, "loss": 0.777, "step": 67435 }, { "epoch": 0.8219077913056195, "grad_norm": 2.125378370285034, "learning_rate": 9.373957665169982e-07, "loss": 0.8015, "step": 67440 }, { "epoch": 0.8219687275297674, "grad_norm": 2.4164113998413086, "learning_rate": 9.370750481077614e-07, "loss": 0.8084, "step": 67445 }, { "epoch": 0.8220296637539152, "grad_norm": 2.1756274700164795, "learning_rate": 9.367543296985248e-07, "loss": 0.8533, "step": 67450 }, { "epoch": 0.822090599978063, "grad_norm": 2.0130040645599365, "learning_rate": 9.364336112892881e-07, "loss": 0.8444, "step": 67455 }, { "epoch": 0.8221515362022108, "grad_norm": 1.8674707412719727, "learning_rate": 9.361128928800514e-07, "loss": 0.8506, "step": 67460 }, { "epoch": 0.8222124724263585, "grad_norm": 1.6331586837768555, "learning_rate": 9.357921744708147e-07, "loss": 0.7718, "step": 67465 }, { "epoch": 0.8222734086505064, "grad_norm": 2.2687673568725586, "learning_rate": 9.35471456061578e-07, "loss": 0.8052, "step": 67470 }, { "epoch": 0.8223343448746542, "grad_norm": 1.7890554666519165, "learning_rate": 9.351507376523413e-07, "loss": 0.7987, "step": 67475 }, { "epoch": 0.822395281098802, "grad_norm": 1.7780721187591553, "learning_rate": 9.348300192431047e-07, "loss": 0.7615, "step": 67480 }, { "epoch": 0.8224562173229498, "grad_norm": 2.057783603668213, "learning_rate": 9.345093008338679e-07, "loss": 0.7727, "step": 67485 }, { "epoch": 0.8225171535470976, "grad_norm": 1.887007236480713, "learning_rate": 9.341885824246312e-07, "loss": 0.8513, "step": 67490 }, { "epoch": 0.8225780897712455, "grad_norm": 2.047556161880493, "learning_rate": 9.338678640153946e-07, "loss": 0.761, "step": 67495 }, { "epoch": 0.8226390259953932, "grad_norm": 2.490737199783325, "learning_rate": 9.335471456061579e-07, "loss": 0.8413, "step": 67500 }, { "epoch": 0.822699962219541, "grad_norm": 1.987083077430725, "learning_rate": 9.332264271969211e-07, "loss": 0.7283, "step": 67505 }, { "epoch": 0.8227608984436888, "grad_norm": 2.208296775817871, "learning_rate": 9.329057087876845e-07, "loss": 0.7914, "step": 67510 }, { "epoch": 0.8228218346678366, "grad_norm": 1.9439021348953247, "learning_rate": 9.325849903784478e-07, "loss": 0.8587, "step": 67515 }, { "epoch": 0.8228827708919845, "grad_norm": 1.4814350605010986, "learning_rate": 9.322642719692111e-07, "loss": 0.7703, "step": 67520 }, { "epoch": 0.8229437071161323, "grad_norm": 1.7898528575897217, "learning_rate": 9.319435535599744e-07, "loss": 0.6881, "step": 67525 }, { "epoch": 0.8230046433402801, "grad_norm": 1.8957254886627197, "learning_rate": 9.316228351507377e-07, "loss": 0.7778, "step": 67530 }, { "epoch": 0.8230655795644278, "grad_norm": 1.8052741289138794, "learning_rate": 9.31302116741501e-07, "loss": 0.8396, "step": 67535 }, { "epoch": 0.8231265157885757, "grad_norm": 1.997180700302124, "learning_rate": 9.309813983322644e-07, "loss": 0.7653, "step": 67540 }, { "epoch": 0.8231874520127235, "grad_norm": 1.9153002500534058, "learning_rate": 9.306606799230276e-07, "loss": 0.8409, "step": 67545 }, { "epoch": 0.8232483882368713, "grad_norm": 2.109243392944336, "learning_rate": 9.303399615137909e-07, "loss": 0.7718, "step": 67550 }, { "epoch": 0.8233093244610191, "grad_norm": 2.113718271255493, "learning_rate": 9.300192431045544e-07, "loss": 0.7946, "step": 67555 }, { "epoch": 0.8233702606851669, "grad_norm": 2.1895556449890137, "learning_rate": 9.296985246953176e-07, "loss": 0.8325, "step": 67560 }, { "epoch": 0.8234311969093148, "grad_norm": 2.1584322452545166, "learning_rate": 9.293778062860808e-07, "loss": 0.7987, "step": 67565 }, { "epoch": 0.8234921331334625, "grad_norm": 1.7595751285552979, "learning_rate": 9.290570878768442e-07, "loss": 0.7901, "step": 67570 }, { "epoch": 0.8235530693576103, "grad_norm": 2.0220978260040283, "learning_rate": 9.287363694676076e-07, "loss": 0.7804, "step": 67575 }, { "epoch": 0.8236140055817581, "grad_norm": 1.7850834131240845, "learning_rate": 9.284156510583708e-07, "loss": 0.8597, "step": 67580 }, { "epoch": 0.823674941805906, "grad_norm": 1.7325439453125, "learning_rate": 9.280949326491341e-07, "loss": 0.8569, "step": 67585 }, { "epoch": 0.8237358780300538, "grad_norm": 2.1927568912506104, "learning_rate": 9.277742142398974e-07, "loss": 0.839, "step": 67590 }, { "epoch": 0.8237968142542016, "grad_norm": 1.7539273500442505, "learning_rate": 9.274534958306608e-07, "loss": 0.7822, "step": 67595 }, { "epoch": 0.8238577504783493, "grad_norm": 1.7440298795700073, "learning_rate": 9.271327774214241e-07, "loss": 0.8784, "step": 67600 }, { "epoch": 0.8239186867024971, "grad_norm": 2.6039769649505615, "learning_rate": 9.268120590121873e-07, "loss": 0.7902, "step": 67605 }, { "epoch": 0.823979622926645, "grad_norm": 2.2346909046173096, "learning_rate": 9.264913406029506e-07, "loss": 0.8227, "step": 67610 }, { "epoch": 0.8240405591507928, "grad_norm": 1.7395052909851074, "learning_rate": 9.261706221937141e-07, "loss": 0.8005, "step": 67615 }, { "epoch": 0.8241014953749406, "grad_norm": 2.1547412872314453, "learning_rate": 9.258499037844773e-07, "loss": 0.8523, "step": 67620 }, { "epoch": 0.8241624315990884, "grad_norm": 2.262887716293335, "learning_rate": 9.255291853752406e-07, "loss": 0.773, "step": 67625 }, { "epoch": 0.8242233678232362, "grad_norm": 1.9604650735855103, "learning_rate": 9.252084669660039e-07, "loss": 0.8642, "step": 67630 }, { "epoch": 0.824284304047384, "grad_norm": 2.077314615249634, "learning_rate": 9.248877485567673e-07, "loss": 0.8609, "step": 67635 }, { "epoch": 0.8243452402715318, "grad_norm": 2.121857166290283, "learning_rate": 9.245670301475305e-07, "loss": 0.7887, "step": 67640 }, { "epoch": 0.8244061764956796, "grad_norm": 1.988699197769165, "learning_rate": 9.242463117382939e-07, "loss": 0.9004, "step": 67645 }, { "epoch": 0.8244671127198274, "grad_norm": 1.994253396987915, "learning_rate": 9.239255933290571e-07, "loss": 0.7849, "step": 67650 }, { "epoch": 0.8245280489439752, "grad_norm": 1.9985688924789429, "learning_rate": 9.236048749198205e-07, "loss": 0.7789, "step": 67655 }, { "epoch": 0.8245889851681231, "grad_norm": 1.9154789447784424, "learning_rate": 9.232841565105838e-07, "loss": 0.8326, "step": 67660 }, { "epoch": 0.8246499213922709, "grad_norm": 1.9380356073379517, "learning_rate": 9.229634381013471e-07, "loss": 0.8475, "step": 67665 }, { "epoch": 0.8247108576164186, "grad_norm": 2.7872252464294434, "learning_rate": 9.226427196921103e-07, "loss": 0.8783, "step": 67670 }, { "epoch": 0.8247717938405664, "grad_norm": 1.744553565979004, "learning_rate": 9.223220012828738e-07, "loss": 0.8457, "step": 67675 }, { "epoch": 0.8248327300647142, "grad_norm": 1.9327248334884644, "learning_rate": 9.22001282873637e-07, "loss": 0.7749, "step": 67680 }, { "epoch": 0.8248936662888621, "grad_norm": 2.0209808349609375, "learning_rate": 9.216805644644003e-07, "loss": 0.8408, "step": 67685 }, { "epoch": 0.8249546025130099, "grad_norm": 1.9821051359176636, "learning_rate": 9.213598460551637e-07, "loss": 0.8522, "step": 67690 }, { "epoch": 0.8250155387371577, "grad_norm": 2.006086587905884, "learning_rate": 9.21039127645927e-07, "loss": 0.8241, "step": 67695 }, { "epoch": 0.8250764749613055, "grad_norm": 1.8760522603988647, "learning_rate": 9.207184092366902e-07, "loss": 0.7798, "step": 67700 }, { "epoch": 0.8251374111854533, "grad_norm": 1.840742588043213, "learning_rate": 9.203976908274536e-07, "loss": 0.7625, "step": 67705 }, { "epoch": 0.8251983474096011, "grad_norm": 1.9841790199279785, "learning_rate": 9.200769724182169e-07, "loss": 0.7789, "step": 67710 }, { "epoch": 0.8252592836337489, "grad_norm": 2.1328988075256348, "learning_rate": 9.197562540089802e-07, "loss": 0.7974, "step": 67715 }, { "epoch": 0.8253202198578967, "grad_norm": 2.050917625427246, "learning_rate": 9.194355355997435e-07, "loss": 0.7705, "step": 67720 }, { "epoch": 0.8253811560820445, "grad_norm": 1.9828723669052124, "learning_rate": 9.191148171905068e-07, "loss": 0.8275, "step": 67725 }, { "epoch": 0.8254420923061924, "grad_norm": 2.206700086593628, "learning_rate": 9.187940987812702e-07, "loss": 0.915, "step": 67730 }, { "epoch": 0.8255030285303402, "grad_norm": 2.1877241134643555, "learning_rate": 9.184733803720335e-07, "loss": 0.8266, "step": 67735 }, { "epoch": 0.8255639647544879, "grad_norm": 1.7278631925582886, "learning_rate": 9.181526619627967e-07, "loss": 0.7917, "step": 67740 }, { "epoch": 0.8256249009786357, "grad_norm": 2.4439685344696045, "learning_rate": 9.1783194355356e-07, "loss": 0.812, "step": 67745 }, { "epoch": 0.8256858372027835, "grad_norm": 1.9840404987335205, "learning_rate": 9.175112251443235e-07, "loss": 0.7505, "step": 67750 }, { "epoch": 0.8257467734269314, "grad_norm": 2.1545958518981934, "learning_rate": 9.171905067350867e-07, "loss": 0.8452, "step": 67755 }, { "epoch": 0.8258077096510792, "grad_norm": 2.063152313232422, "learning_rate": 9.168697883258499e-07, "loss": 0.7831, "step": 67760 }, { "epoch": 0.825868645875227, "grad_norm": 1.8427976369857788, "learning_rate": 9.165490699166133e-07, "loss": 0.8403, "step": 67765 }, { "epoch": 0.8259295820993748, "grad_norm": 1.9030581712722778, "learning_rate": 9.162283515073767e-07, "loss": 0.7921, "step": 67770 }, { "epoch": 0.8259905183235225, "grad_norm": 1.9078551530838013, "learning_rate": 9.159076330981399e-07, "loss": 0.8033, "step": 67775 }, { "epoch": 0.8260514545476704, "grad_norm": 1.918112874031067, "learning_rate": 9.155869146889032e-07, "loss": 0.775, "step": 67780 }, { "epoch": 0.8261123907718182, "grad_norm": 2.066038131713867, "learning_rate": 9.152661962796665e-07, "loss": 0.8414, "step": 67785 }, { "epoch": 0.826173326995966, "grad_norm": 2.0376203060150146, "learning_rate": 9.149454778704299e-07, "loss": 0.7716, "step": 67790 }, { "epoch": 0.8262342632201138, "grad_norm": 1.8156477212905884, "learning_rate": 9.146247594611932e-07, "loss": 0.8809, "step": 67795 }, { "epoch": 0.8262951994442617, "grad_norm": 2.182791233062744, "learning_rate": 9.143040410519565e-07, "loss": 0.8321, "step": 67800 }, { "epoch": 0.8263561356684095, "grad_norm": 1.8300459384918213, "learning_rate": 9.139833226427197e-07, "loss": 0.7835, "step": 67805 }, { "epoch": 0.8264170718925572, "grad_norm": 2.149376392364502, "learning_rate": 9.136626042334832e-07, "loss": 0.731, "step": 67810 }, { "epoch": 0.826478008116705, "grad_norm": 2.091848850250244, "learning_rate": 9.133418858242464e-07, "loss": 0.7986, "step": 67815 }, { "epoch": 0.8265389443408528, "grad_norm": 2.566675901412964, "learning_rate": 9.130211674150097e-07, "loss": 0.7836, "step": 67820 }, { "epoch": 0.8265998805650007, "grad_norm": 2.3870158195495605, "learning_rate": 9.12700449005773e-07, "loss": 0.7956, "step": 67825 }, { "epoch": 0.8266608167891485, "grad_norm": 2.012840509414673, "learning_rate": 9.123797305965364e-07, "loss": 0.8501, "step": 67830 }, { "epoch": 0.8267217530132963, "grad_norm": 2.004492998123169, "learning_rate": 9.120590121872996e-07, "loss": 0.8071, "step": 67835 }, { "epoch": 0.8267826892374441, "grad_norm": 2.742704153060913, "learning_rate": 9.11738293778063e-07, "loss": 0.783, "step": 67840 }, { "epoch": 0.8268436254615918, "grad_norm": 2.2449662685394287, "learning_rate": 9.114175753688262e-07, "loss": 0.8446, "step": 67845 }, { "epoch": 0.8269045616857397, "grad_norm": 1.7570412158966064, "learning_rate": 9.110968569595896e-07, "loss": 0.8628, "step": 67850 }, { "epoch": 0.8269654979098875, "grad_norm": 1.9733189344406128, "learning_rate": 9.107761385503529e-07, "loss": 0.7837, "step": 67855 }, { "epoch": 0.8270264341340353, "grad_norm": 1.7366833686828613, "learning_rate": 9.104554201411162e-07, "loss": 0.7481, "step": 67860 }, { "epoch": 0.8270873703581831, "grad_norm": 2.2196624279022217, "learning_rate": 9.101347017318794e-07, "loss": 0.7965, "step": 67865 }, { "epoch": 0.827148306582331, "grad_norm": 1.9642757177352905, "learning_rate": 9.098139833226429e-07, "loss": 0.8142, "step": 67870 }, { "epoch": 0.8272092428064788, "grad_norm": 2.054258346557617, "learning_rate": 9.094932649134061e-07, "loss": 0.7585, "step": 67875 }, { "epoch": 0.8272701790306265, "grad_norm": 2.112051010131836, "learning_rate": 9.091725465041694e-07, "loss": 0.793, "step": 67880 }, { "epoch": 0.8273311152547743, "grad_norm": 2.278989315032959, "learning_rate": 9.088518280949326e-07, "loss": 0.7849, "step": 67885 }, { "epoch": 0.8273920514789221, "grad_norm": 1.8271695375442505, "learning_rate": 9.085311096856961e-07, "loss": 0.811, "step": 67890 }, { "epoch": 0.82745298770307, "grad_norm": 1.7338273525238037, "learning_rate": 9.082103912764593e-07, "loss": 0.76, "step": 67895 }, { "epoch": 0.8275139239272178, "grad_norm": 2.2008421421051025, "learning_rate": 9.078896728672227e-07, "loss": 0.8453, "step": 67900 }, { "epoch": 0.8275748601513656, "grad_norm": 1.8810529708862305, "learning_rate": 9.075689544579859e-07, "loss": 0.7845, "step": 67905 }, { "epoch": 0.8276357963755134, "grad_norm": 1.8176319599151611, "learning_rate": 9.072482360487493e-07, "loss": 0.7516, "step": 67910 }, { "epoch": 0.8276967325996611, "grad_norm": 1.820462942123413, "learning_rate": 9.069275176395126e-07, "loss": 0.7296, "step": 67915 }, { "epoch": 0.827757668823809, "grad_norm": 2.165872573852539, "learning_rate": 9.066067992302759e-07, "loss": 0.8313, "step": 67920 }, { "epoch": 0.8278186050479568, "grad_norm": 1.9646754264831543, "learning_rate": 9.062860808210391e-07, "loss": 0.8289, "step": 67925 }, { "epoch": 0.8278795412721046, "grad_norm": 2.012021780014038, "learning_rate": 9.059653624118026e-07, "loss": 0.8133, "step": 67930 }, { "epoch": 0.8279404774962524, "grad_norm": 1.7855494022369385, "learning_rate": 9.056446440025658e-07, "loss": 0.7722, "step": 67935 }, { "epoch": 0.8280014137204003, "grad_norm": 1.947556972503662, "learning_rate": 9.053239255933291e-07, "loss": 0.7809, "step": 67940 }, { "epoch": 0.8280623499445481, "grad_norm": 1.9916772842407227, "learning_rate": 9.050032071840923e-07, "loss": 0.7676, "step": 67945 }, { "epoch": 0.8281232861686958, "grad_norm": 2.3700613975524902, "learning_rate": 9.046824887748558e-07, "loss": 0.8039, "step": 67950 }, { "epoch": 0.8281842223928436, "grad_norm": 1.864787220954895, "learning_rate": 9.04361770365619e-07, "loss": 0.8459, "step": 67955 }, { "epoch": 0.8282451586169914, "grad_norm": 2.2310690879821777, "learning_rate": 9.040410519563823e-07, "loss": 0.8613, "step": 67960 }, { "epoch": 0.8283060948411393, "grad_norm": 2.378180742263794, "learning_rate": 9.037203335471456e-07, "loss": 0.807, "step": 67965 }, { "epoch": 0.8283670310652871, "grad_norm": 2.1755011081695557, "learning_rate": 9.03399615137909e-07, "loss": 0.7919, "step": 67970 }, { "epoch": 0.8284279672894349, "grad_norm": 1.8884251117706299, "learning_rate": 9.030788967286724e-07, "loss": 0.7463, "step": 67975 }, { "epoch": 0.8284889035135827, "grad_norm": 1.7820996046066284, "learning_rate": 9.027581783194356e-07, "loss": 0.8613, "step": 67980 }, { "epoch": 0.8285498397377304, "grad_norm": 1.7242306470870972, "learning_rate": 9.02437459910199e-07, "loss": 0.8313, "step": 67985 }, { "epoch": 0.8286107759618783, "grad_norm": 2.4239301681518555, "learning_rate": 9.021167415009622e-07, "loss": 0.8377, "step": 67990 }, { "epoch": 0.8286717121860261, "grad_norm": 2.488039255142212, "learning_rate": 9.017960230917256e-07, "loss": 0.8185, "step": 67995 }, { "epoch": 0.8287326484101739, "grad_norm": 2.072028636932373, "learning_rate": 9.014753046824888e-07, "loss": 0.8029, "step": 68000 }, { "epoch": 0.8287935846343217, "grad_norm": 1.9235379695892334, "learning_rate": 9.011545862732523e-07, "loss": 0.8156, "step": 68005 }, { "epoch": 0.8288545208584696, "grad_norm": 2.0173659324645996, "learning_rate": 9.008338678640155e-07, "loss": 0.8457, "step": 68010 }, { "epoch": 0.8289154570826174, "grad_norm": 1.993533730506897, "learning_rate": 9.005131494547788e-07, "loss": 0.8845, "step": 68015 }, { "epoch": 0.8289763933067651, "grad_norm": 1.9582933187484741, "learning_rate": 9.00192431045542e-07, "loss": 0.8117, "step": 68020 }, { "epoch": 0.8290373295309129, "grad_norm": 2.1621780395507812, "learning_rate": 8.998717126363055e-07, "loss": 0.8356, "step": 68025 }, { "epoch": 0.8290982657550607, "grad_norm": 1.690948247909546, "learning_rate": 8.995509942270687e-07, "loss": 0.7976, "step": 68030 }, { "epoch": 0.8291592019792086, "grad_norm": 1.957962989807129, "learning_rate": 8.99230275817832e-07, "loss": 0.8388, "step": 68035 }, { "epoch": 0.8292201382033564, "grad_norm": 2.015042543411255, "learning_rate": 8.989095574085953e-07, "loss": 0.8267, "step": 68040 }, { "epoch": 0.8292810744275042, "grad_norm": 1.9529145956039429, "learning_rate": 8.985888389993587e-07, "loss": 0.8416, "step": 68045 }, { "epoch": 0.829342010651652, "grad_norm": 2.103381872177124, "learning_rate": 8.982681205901219e-07, "loss": 0.9017, "step": 68050 }, { "epoch": 0.8294029468757997, "grad_norm": 1.839318037033081, "learning_rate": 8.979474021808853e-07, "loss": 0.8453, "step": 68055 }, { "epoch": 0.8294638830999476, "grad_norm": 2.3601505756378174, "learning_rate": 8.976266837716485e-07, "loss": 0.807, "step": 68060 }, { "epoch": 0.8295248193240954, "grad_norm": 2.430178165435791, "learning_rate": 8.97305965362412e-07, "loss": 0.8741, "step": 68065 }, { "epoch": 0.8295857555482432, "grad_norm": 1.7057888507843018, "learning_rate": 8.969852469531752e-07, "loss": 0.8192, "step": 68070 }, { "epoch": 0.829646691772391, "grad_norm": 2.3432135581970215, "learning_rate": 8.966645285439385e-07, "loss": 0.8491, "step": 68075 }, { "epoch": 0.8297076279965389, "grad_norm": 1.953065037727356, "learning_rate": 8.963438101347017e-07, "loss": 0.828, "step": 68080 }, { "epoch": 0.8297685642206867, "grad_norm": 2.1220040321350098, "learning_rate": 8.960230917254652e-07, "loss": 0.8564, "step": 68085 }, { "epoch": 0.8298295004448344, "grad_norm": 1.7989660501480103, "learning_rate": 8.957023733162284e-07, "loss": 0.7715, "step": 68090 }, { "epoch": 0.8298904366689822, "grad_norm": 2.0680792331695557, "learning_rate": 8.953816549069917e-07, "loss": 0.8522, "step": 68095 }, { "epoch": 0.82995137289313, "grad_norm": 2.0379698276519775, "learning_rate": 8.95060936497755e-07, "loss": 0.7925, "step": 68100 }, { "epoch": 0.8300123091172779, "grad_norm": 1.701496958732605, "learning_rate": 8.947402180885184e-07, "loss": 0.7741, "step": 68105 }, { "epoch": 0.8300732453414257, "grad_norm": 2.1681840419769287, "learning_rate": 8.944194996792816e-07, "loss": 0.8437, "step": 68110 }, { "epoch": 0.8301341815655735, "grad_norm": 2.16548752784729, "learning_rate": 8.94098781270045e-07, "loss": 0.7613, "step": 68115 }, { "epoch": 0.8301951177897213, "grad_norm": 2.2389237880706787, "learning_rate": 8.937780628608082e-07, "loss": 0.771, "step": 68120 }, { "epoch": 0.830256054013869, "grad_norm": 1.7822601795196533, "learning_rate": 8.934573444515716e-07, "loss": 0.8197, "step": 68125 }, { "epoch": 0.8303169902380169, "grad_norm": 1.9444633722305298, "learning_rate": 8.931366260423349e-07, "loss": 0.7701, "step": 68130 }, { "epoch": 0.8303779264621647, "grad_norm": 2.4871888160705566, "learning_rate": 8.928159076330982e-07, "loss": 0.7074, "step": 68135 }, { "epoch": 0.8304388626863125, "grad_norm": 1.9078322649002075, "learning_rate": 8.924951892238614e-07, "loss": 0.7416, "step": 68140 }, { "epoch": 0.8304997989104603, "grad_norm": 1.9911457300186157, "learning_rate": 8.921744708146249e-07, "loss": 0.786, "step": 68145 }, { "epoch": 0.8305607351346082, "grad_norm": 2.0937724113464355, "learning_rate": 8.918537524053882e-07, "loss": 0.8906, "step": 68150 }, { "epoch": 0.830621671358756, "grad_norm": 1.8588297367095947, "learning_rate": 8.915330339961514e-07, "loss": 0.8068, "step": 68155 }, { "epoch": 0.8306826075829037, "grad_norm": 2.0674939155578613, "learning_rate": 8.912123155869147e-07, "loss": 0.8377, "step": 68160 }, { "epoch": 0.8307435438070515, "grad_norm": 1.9390467405319214, "learning_rate": 8.908915971776781e-07, "loss": 0.7897, "step": 68165 }, { "epoch": 0.8308044800311993, "grad_norm": 2.349672794342041, "learning_rate": 8.905708787684414e-07, "loss": 0.7627, "step": 68170 }, { "epoch": 0.8308654162553472, "grad_norm": 1.7110661268234253, "learning_rate": 8.902501603592047e-07, "loss": 0.8299, "step": 68175 }, { "epoch": 0.830926352479495, "grad_norm": 1.7358171939849854, "learning_rate": 8.899294419499679e-07, "loss": 0.8571, "step": 68180 }, { "epoch": 0.8309872887036428, "grad_norm": 1.9440109729766846, "learning_rate": 8.896087235407313e-07, "loss": 0.8436, "step": 68185 }, { "epoch": 0.8310482249277906, "grad_norm": 2.1659657955169678, "learning_rate": 8.892880051314947e-07, "loss": 0.8486, "step": 68190 }, { "epoch": 0.8311091611519383, "grad_norm": 1.9776561260223389, "learning_rate": 8.889672867222579e-07, "loss": 0.8182, "step": 68195 }, { "epoch": 0.8311700973760862, "grad_norm": 2.308969259262085, "learning_rate": 8.886465683130211e-07, "loss": 0.8779, "step": 68200 }, { "epoch": 0.831231033600234, "grad_norm": 1.9228765964508057, "learning_rate": 8.883258499037846e-07, "loss": 0.9191, "step": 68205 }, { "epoch": 0.8312919698243818, "grad_norm": 2.3104844093322754, "learning_rate": 8.880051314945479e-07, "loss": 0.8294, "step": 68210 }, { "epoch": 0.8313529060485296, "grad_norm": 2.034470796585083, "learning_rate": 8.876844130853111e-07, "loss": 0.8223, "step": 68215 }, { "epoch": 0.8314138422726775, "grad_norm": 2.065481185913086, "learning_rate": 8.873636946760745e-07, "loss": 0.7553, "step": 68220 }, { "epoch": 0.8314747784968253, "grad_norm": 1.8548636436462402, "learning_rate": 8.870429762668378e-07, "loss": 0.809, "step": 68225 }, { "epoch": 0.831535714720973, "grad_norm": 2.0214486122131348, "learning_rate": 8.867222578576011e-07, "loss": 0.7782, "step": 68230 }, { "epoch": 0.8315966509451208, "grad_norm": 2.2323479652404785, "learning_rate": 8.864015394483644e-07, "loss": 0.8712, "step": 68235 }, { "epoch": 0.8316575871692686, "grad_norm": 1.9278538227081299, "learning_rate": 8.860808210391277e-07, "loss": 0.7779, "step": 68240 }, { "epoch": 0.8317185233934165, "grad_norm": 2.1558399200439453, "learning_rate": 8.85760102629891e-07, "loss": 0.853, "step": 68245 }, { "epoch": 0.8317794596175643, "grad_norm": 1.9018604755401611, "learning_rate": 8.854393842206544e-07, "loss": 0.8039, "step": 68250 }, { "epoch": 0.8318403958417121, "grad_norm": 2.1893258094787598, "learning_rate": 8.851186658114176e-07, "loss": 0.7946, "step": 68255 }, { "epoch": 0.8319013320658599, "grad_norm": 1.777551531791687, "learning_rate": 8.847979474021809e-07, "loss": 0.7572, "step": 68260 }, { "epoch": 0.8319622682900076, "grad_norm": 1.8390096426010132, "learning_rate": 8.844772289929443e-07, "loss": 0.8247, "step": 68265 }, { "epoch": 0.8320232045141555, "grad_norm": 1.7590099573135376, "learning_rate": 8.841565105837076e-07, "loss": 0.8189, "step": 68270 }, { "epoch": 0.8320841407383033, "grad_norm": 1.8418697118759155, "learning_rate": 8.838357921744708e-07, "loss": 0.8714, "step": 68275 }, { "epoch": 0.8321450769624511, "grad_norm": 1.9198389053344727, "learning_rate": 8.835150737652343e-07, "loss": 0.8785, "step": 68280 }, { "epoch": 0.8322060131865989, "grad_norm": 1.9741079807281494, "learning_rate": 8.831943553559975e-07, "loss": 0.7773, "step": 68285 }, { "epoch": 0.8322669494107467, "grad_norm": 1.9511890411376953, "learning_rate": 8.828736369467608e-07, "loss": 0.7683, "step": 68290 }, { "epoch": 0.8323278856348946, "grad_norm": 1.964524745941162, "learning_rate": 8.825529185375241e-07, "loss": 0.8015, "step": 68295 }, { "epoch": 0.8323888218590423, "grad_norm": 1.8689504861831665, "learning_rate": 8.822322001282875e-07, "loss": 0.8091, "step": 68300 }, { "epoch": 0.8324497580831901, "grad_norm": 2.5376131534576416, "learning_rate": 8.819114817190507e-07, "loss": 0.8411, "step": 68305 }, { "epoch": 0.8325106943073379, "grad_norm": 1.8149096965789795, "learning_rate": 8.815907633098141e-07, "loss": 0.7557, "step": 68310 }, { "epoch": 0.8325716305314858, "grad_norm": 1.6889866590499878, "learning_rate": 8.812700449005773e-07, "loss": 0.8336, "step": 68315 }, { "epoch": 0.8326325667556336, "grad_norm": 1.8005506992340088, "learning_rate": 8.809493264913407e-07, "loss": 0.8595, "step": 68320 }, { "epoch": 0.8326935029797814, "grad_norm": 1.8605514764785767, "learning_rate": 8.806286080821041e-07, "loss": 0.723, "step": 68325 }, { "epoch": 0.8327544392039292, "grad_norm": 2.0788040161132812, "learning_rate": 8.803078896728673e-07, "loss": 0.7842, "step": 68330 }, { "epoch": 0.8328153754280769, "grad_norm": 2.0899736881256104, "learning_rate": 8.799871712636305e-07, "loss": 0.8059, "step": 68335 }, { "epoch": 0.8328763116522248, "grad_norm": 2.0482165813446045, "learning_rate": 8.79666452854394e-07, "loss": 0.8242, "step": 68340 }, { "epoch": 0.8329372478763726, "grad_norm": 2.129775047302246, "learning_rate": 8.793457344451573e-07, "loss": 0.7894, "step": 68345 }, { "epoch": 0.8329981841005204, "grad_norm": 1.983076572418213, "learning_rate": 8.790250160359205e-07, "loss": 0.7926, "step": 68350 }, { "epoch": 0.8330591203246682, "grad_norm": 2.2701220512390137, "learning_rate": 8.787042976266838e-07, "loss": 0.8173, "step": 68355 }, { "epoch": 0.833120056548816, "grad_norm": 2.0441601276397705, "learning_rate": 8.783835792174472e-07, "loss": 0.8283, "step": 68360 }, { "epoch": 0.8331809927729639, "grad_norm": 1.722161889076233, "learning_rate": 8.780628608082105e-07, "loss": 0.7674, "step": 68365 }, { "epoch": 0.8332419289971116, "grad_norm": 2.3186023235321045, "learning_rate": 8.777421423989738e-07, "loss": 0.7403, "step": 68370 }, { "epoch": 0.8333028652212594, "grad_norm": 2.204420328140259, "learning_rate": 8.77421423989737e-07, "loss": 0.8246, "step": 68375 }, { "epoch": 0.8333638014454072, "grad_norm": 1.6230913400650024, "learning_rate": 8.771007055805004e-07, "loss": 0.747, "step": 68380 }, { "epoch": 0.833424737669555, "grad_norm": 1.8888016939163208, "learning_rate": 8.767799871712638e-07, "loss": 0.8239, "step": 68385 }, { "epoch": 0.8334856738937029, "grad_norm": 2.349759340286255, "learning_rate": 8.76459268762027e-07, "loss": 0.7857, "step": 68390 }, { "epoch": 0.8335466101178507, "grad_norm": 1.9557695388793945, "learning_rate": 8.761385503527903e-07, "loss": 0.8157, "step": 68395 }, { "epoch": 0.8336075463419985, "grad_norm": 1.750781536102295, "learning_rate": 8.758178319435537e-07, "loss": 0.7701, "step": 68400 }, { "epoch": 0.8336684825661462, "grad_norm": 1.8951575756072998, "learning_rate": 8.75497113534317e-07, "loss": 0.8016, "step": 68405 }, { "epoch": 0.833729418790294, "grad_norm": 1.796047568321228, "learning_rate": 8.751763951250802e-07, "loss": 0.8433, "step": 68410 }, { "epoch": 0.8337903550144419, "grad_norm": 2.0321872234344482, "learning_rate": 8.748556767158436e-07, "loss": 0.7211, "step": 68415 }, { "epoch": 0.8338512912385897, "grad_norm": 1.882797122001648, "learning_rate": 8.745349583066069e-07, "loss": 0.7897, "step": 68420 }, { "epoch": 0.8339122274627375, "grad_norm": 1.7594670057296753, "learning_rate": 8.742142398973702e-07, "loss": 0.7791, "step": 68425 }, { "epoch": 0.8339731636868853, "grad_norm": 2.4539520740509033, "learning_rate": 8.738935214881335e-07, "loss": 0.791, "step": 68430 }, { "epoch": 0.8340340999110332, "grad_norm": 1.7817888259887695, "learning_rate": 8.735728030788968e-07, "loss": 0.7795, "step": 68435 }, { "epoch": 0.8340950361351809, "grad_norm": 2.089219570159912, "learning_rate": 8.732520846696601e-07, "loss": 0.7876, "step": 68440 }, { "epoch": 0.8341559723593287, "grad_norm": 1.9409008026123047, "learning_rate": 8.729313662604235e-07, "loss": 0.7965, "step": 68445 }, { "epoch": 0.8342169085834765, "grad_norm": 2.224740505218506, "learning_rate": 8.726106478511867e-07, "loss": 0.8136, "step": 68450 }, { "epoch": 0.8342778448076243, "grad_norm": 1.9927860498428345, "learning_rate": 8.7228992944195e-07, "loss": 0.8385, "step": 68455 }, { "epoch": 0.8343387810317722, "grad_norm": 1.8833297491073608, "learning_rate": 8.719692110327134e-07, "loss": 0.8347, "step": 68460 }, { "epoch": 0.83439971725592, "grad_norm": 1.9138447046279907, "learning_rate": 8.716484926234767e-07, "loss": 0.8366, "step": 68465 }, { "epoch": 0.8344606534800678, "grad_norm": 2.2953927516937256, "learning_rate": 8.713277742142399e-07, "loss": 0.8276, "step": 68470 }, { "epoch": 0.8345215897042155, "grad_norm": 2.2956788539886475, "learning_rate": 8.710070558050033e-07, "loss": 0.8535, "step": 68475 }, { "epoch": 0.8345825259283634, "grad_norm": 1.9195005893707275, "learning_rate": 8.706863373957666e-07, "loss": 0.7288, "step": 68480 }, { "epoch": 0.8346434621525112, "grad_norm": 2.2113869190216064, "learning_rate": 8.703656189865299e-07, "loss": 0.8517, "step": 68485 }, { "epoch": 0.834704398376659, "grad_norm": 1.959191083908081, "learning_rate": 8.700449005772931e-07, "loss": 0.8987, "step": 68490 }, { "epoch": 0.8347653346008068, "grad_norm": 2.0384082794189453, "learning_rate": 8.697241821680565e-07, "loss": 0.8083, "step": 68495 }, { "epoch": 0.8348262708249546, "grad_norm": 2.150212287902832, "learning_rate": 8.694034637588199e-07, "loss": 0.8742, "step": 68500 }, { "epoch": 0.8348872070491025, "grad_norm": 2.2164855003356934, "learning_rate": 8.690827453495832e-07, "loss": 0.8211, "step": 68505 }, { "epoch": 0.8349481432732502, "grad_norm": 1.8235503435134888, "learning_rate": 8.687620269403464e-07, "loss": 0.8386, "step": 68510 }, { "epoch": 0.835009079497398, "grad_norm": 1.9152613878250122, "learning_rate": 8.684413085311097e-07, "loss": 0.8397, "step": 68515 }, { "epoch": 0.8350700157215458, "grad_norm": 2.106968641281128, "learning_rate": 8.681205901218732e-07, "loss": 0.8653, "step": 68520 }, { "epoch": 0.8351309519456936, "grad_norm": 2.035177230834961, "learning_rate": 8.677998717126364e-07, "loss": 0.8404, "step": 68525 }, { "epoch": 0.8351918881698415, "grad_norm": 1.8813804388046265, "learning_rate": 8.674791533033996e-07, "loss": 0.7931, "step": 68530 }, { "epoch": 0.8352528243939893, "grad_norm": 2.4853172302246094, "learning_rate": 8.671584348941629e-07, "loss": 0.7933, "step": 68535 }, { "epoch": 0.835313760618137, "grad_norm": 1.836779236793518, "learning_rate": 8.668377164849264e-07, "loss": 0.8142, "step": 68540 }, { "epoch": 0.8353746968422848, "grad_norm": 1.8957722187042236, "learning_rate": 8.665169980756896e-07, "loss": 0.7958, "step": 68545 }, { "epoch": 0.8354356330664326, "grad_norm": 1.9443178176879883, "learning_rate": 8.661962796664528e-07, "loss": 0.8551, "step": 68550 }, { "epoch": 0.8354965692905805, "grad_norm": 1.8574761152267456, "learning_rate": 8.658755612572162e-07, "loss": 0.8194, "step": 68555 }, { "epoch": 0.8355575055147283, "grad_norm": 2.096526861190796, "learning_rate": 8.655548428479796e-07, "loss": 0.8156, "step": 68560 }, { "epoch": 0.8356184417388761, "grad_norm": 2.2172367572784424, "learning_rate": 8.652341244387428e-07, "loss": 0.881, "step": 68565 }, { "epoch": 0.8356793779630239, "grad_norm": 1.6892958879470825, "learning_rate": 8.649134060295062e-07, "loss": 0.8313, "step": 68570 }, { "epoch": 0.8357403141871717, "grad_norm": 1.7300009727478027, "learning_rate": 8.645926876202694e-07, "loss": 0.7737, "step": 68575 }, { "epoch": 0.8358012504113195, "grad_norm": 1.936267375946045, "learning_rate": 8.642719692110329e-07, "loss": 0.8274, "step": 68580 }, { "epoch": 0.8358621866354673, "grad_norm": 2.2566158771514893, "learning_rate": 8.639512508017961e-07, "loss": 0.8726, "step": 68585 }, { "epoch": 0.8359231228596151, "grad_norm": 1.937835454940796, "learning_rate": 8.636305323925594e-07, "loss": 0.8463, "step": 68590 }, { "epoch": 0.8359840590837629, "grad_norm": 1.7095521688461304, "learning_rate": 8.633098139833227e-07, "loss": 0.7694, "step": 68595 }, { "epoch": 0.8360449953079108, "grad_norm": 2.178426504135132, "learning_rate": 8.629890955740861e-07, "loss": 0.8103, "step": 68600 }, { "epoch": 0.8361059315320586, "grad_norm": 2.055694341659546, "learning_rate": 8.626683771648493e-07, "loss": 0.8171, "step": 68605 }, { "epoch": 0.8361668677562063, "grad_norm": 2.0151429176330566, "learning_rate": 8.623476587556126e-07, "loss": 0.7883, "step": 68610 }, { "epoch": 0.8362278039803541, "grad_norm": 2.352154493331909, "learning_rate": 8.62026940346376e-07, "loss": 0.7724, "step": 68615 }, { "epoch": 0.836288740204502, "grad_norm": 1.8952770233154297, "learning_rate": 8.617062219371393e-07, "loss": 0.8297, "step": 68620 }, { "epoch": 0.8363496764286498, "grad_norm": 1.953246831893921, "learning_rate": 8.613855035279025e-07, "loss": 0.8686, "step": 68625 }, { "epoch": 0.8364106126527976, "grad_norm": 2.2955379486083984, "learning_rate": 8.610647851186659e-07, "loss": 0.7585, "step": 68630 }, { "epoch": 0.8364715488769454, "grad_norm": 1.676496148109436, "learning_rate": 8.607440667094292e-07, "loss": 0.7747, "step": 68635 }, { "epoch": 0.8365324851010932, "grad_norm": 1.8126646280288696, "learning_rate": 8.604233483001925e-07, "loss": 0.7685, "step": 68640 }, { "epoch": 0.836593421325241, "grad_norm": 2.025735855102539, "learning_rate": 8.601026298909558e-07, "loss": 0.8662, "step": 68645 }, { "epoch": 0.8366543575493888, "grad_norm": 1.7840536832809448, "learning_rate": 8.597819114817191e-07, "loss": 0.7675, "step": 68650 }, { "epoch": 0.8367152937735366, "grad_norm": 2.282691717147827, "learning_rate": 8.594611930724824e-07, "loss": 0.8124, "step": 68655 }, { "epoch": 0.8367762299976844, "grad_norm": 1.8782025575637817, "learning_rate": 8.591404746632458e-07, "loss": 0.7872, "step": 68660 }, { "epoch": 0.8368371662218322, "grad_norm": 2.403719902038574, "learning_rate": 8.58819756254009e-07, "loss": 0.8264, "step": 68665 }, { "epoch": 0.8368981024459801, "grad_norm": 1.741446614265442, "learning_rate": 8.584990378447723e-07, "loss": 0.8223, "step": 68670 }, { "epoch": 0.8369590386701279, "grad_norm": 2.178834915161133, "learning_rate": 8.581783194355358e-07, "loss": 0.7956, "step": 68675 }, { "epoch": 0.8370199748942756, "grad_norm": 1.8253827095031738, "learning_rate": 8.57857601026299e-07, "loss": 0.8309, "step": 68680 }, { "epoch": 0.8370809111184234, "grad_norm": 2.0437662601470947, "learning_rate": 8.575368826170622e-07, "loss": 0.859, "step": 68685 }, { "epoch": 0.8371418473425712, "grad_norm": 2.213794469833374, "learning_rate": 8.572161642078256e-07, "loss": 0.78, "step": 68690 }, { "epoch": 0.8372027835667191, "grad_norm": 2.4612228870391846, "learning_rate": 8.56895445798589e-07, "loss": 0.7971, "step": 68695 }, { "epoch": 0.8372637197908669, "grad_norm": 1.99196457862854, "learning_rate": 8.565747273893522e-07, "loss": 0.7545, "step": 68700 }, { "epoch": 0.8373246560150147, "grad_norm": 2.049684762954712, "learning_rate": 8.562540089801155e-07, "loss": 0.8111, "step": 68705 }, { "epoch": 0.8373855922391625, "grad_norm": 1.8806592226028442, "learning_rate": 8.559332905708788e-07, "loss": 0.8123, "step": 68710 }, { "epoch": 0.8374465284633102, "grad_norm": 2.2057340145111084, "learning_rate": 8.556125721616422e-07, "loss": 0.8374, "step": 68715 }, { "epoch": 0.8375074646874581, "grad_norm": 1.9881088733673096, "learning_rate": 8.552918537524055e-07, "loss": 0.7596, "step": 68720 }, { "epoch": 0.8375684009116059, "grad_norm": 2.2372796535491943, "learning_rate": 8.549711353431687e-07, "loss": 0.7969, "step": 68725 }, { "epoch": 0.8376293371357537, "grad_norm": 1.8510595560073853, "learning_rate": 8.54650416933932e-07, "loss": 0.8105, "step": 68730 }, { "epoch": 0.8376902733599015, "grad_norm": 1.7955423593521118, "learning_rate": 8.543296985246955e-07, "loss": 0.7671, "step": 68735 }, { "epoch": 0.8377512095840494, "grad_norm": 2.048942804336548, "learning_rate": 8.540089801154587e-07, "loss": 0.8039, "step": 68740 }, { "epoch": 0.8378121458081972, "grad_norm": 1.9758867025375366, "learning_rate": 8.53688261706222e-07, "loss": 0.8171, "step": 68745 }, { "epoch": 0.8378730820323449, "grad_norm": 2.0346405506134033, "learning_rate": 8.533675432969853e-07, "loss": 0.8571, "step": 68750 }, { "epoch": 0.8379340182564927, "grad_norm": 1.9162380695343018, "learning_rate": 8.530468248877487e-07, "loss": 0.7949, "step": 68755 }, { "epoch": 0.8379949544806405, "grad_norm": 1.7350183725357056, "learning_rate": 8.527261064785119e-07, "loss": 0.7855, "step": 68760 }, { "epoch": 0.8380558907047884, "grad_norm": 1.7535815238952637, "learning_rate": 8.524053880692753e-07, "loss": 0.7863, "step": 68765 }, { "epoch": 0.8381168269289362, "grad_norm": 1.8632090091705322, "learning_rate": 8.520846696600385e-07, "loss": 0.7494, "step": 68770 }, { "epoch": 0.838177763153084, "grad_norm": 1.604161262512207, "learning_rate": 8.517639512508019e-07, "loss": 0.8739, "step": 68775 }, { "epoch": 0.8382386993772318, "grad_norm": 1.844541311264038, "learning_rate": 8.514432328415652e-07, "loss": 0.7311, "step": 68780 }, { "epoch": 0.8382996356013795, "grad_norm": 2.480205535888672, "learning_rate": 8.511225144323285e-07, "loss": 0.7893, "step": 68785 }, { "epoch": 0.8383605718255274, "grad_norm": 1.9865057468414307, "learning_rate": 8.508017960230917e-07, "loss": 0.8177, "step": 68790 }, { "epoch": 0.8384215080496752, "grad_norm": 2.058349132537842, "learning_rate": 8.504810776138552e-07, "loss": 0.8177, "step": 68795 }, { "epoch": 0.838482444273823, "grad_norm": 1.9402486085891724, "learning_rate": 8.501603592046184e-07, "loss": 0.7668, "step": 68800 }, { "epoch": 0.8385433804979708, "grad_norm": 1.7519875764846802, "learning_rate": 8.498396407953817e-07, "loss": 0.7961, "step": 68805 }, { "epoch": 0.8386043167221187, "grad_norm": 2.537325143814087, "learning_rate": 8.49518922386145e-07, "loss": 0.7424, "step": 68810 }, { "epoch": 0.8386652529462665, "grad_norm": 2.6706488132476807, "learning_rate": 8.491982039769084e-07, "loss": 0.8753, "step": 68815 }, { "epoch": 0.8387261891704142, "grad_norm": 1.9461625814437866, "learning_rate": 8.488774855676716e-07, "loss": 0.7855, "step": 68820 }, { "epoch": 0.838787125394562, "grad_norm": 1.9677191972732544, "learning_rate": 8.48556767158435e-07, "loss": 0.7639, "step": 68825 }, { "epoch": 0.8388480616187098, "grad_norm": 1.6953901052474976, "learning_rate": 8.482360487491982e-07, "loss": 0.8111, "step": 68830 }, { "epoch": 0.8389089978428577, "grad_norm": 1.8963772058486938, "learning_rate": 8.479153303399616e-07, "loss": 0.8479, "step": 68835 }, { "epoch": 0.8389699340670055, "grad_norm": 2.0201966762542725, "learning_rate": 8.475946119307249e-07, "loss": 0.7999, "step": 68840 }, { "epoch": 0.8390308702911533, "grad_norm": 1.9677588939666748, "learning_rate": 8.472738935214882e-07, "loss": 0.81, "step": 68845 }, { "epoch": 0.8390918065153011, "grad_norm": 1.7676630020141602, "learning_rate": 8.469531751122514e-07, "loss": 0.7509, "step": 68850 }, { "epoch": 0.8391527427394488, "grad_norm": 1.9452258348464966, "learning_rate": 8.466324567030149e-07, "loss": 0.8588, "step": 68855 }, { "epoch": 0.8392136789635967, "grad_norm": 2.0313756465911865, "learning_rate": 8.463117382937781e-07, "loss": 0.7865, "step": 68860 }, { "epoch": 0.8392746151877445, "grad_norm": 1.8287806510925293, "learning_rate": 8.459910198845414e-07, "loss": 0.7872, "step": 68865 }, { "epoch": 0.8393355514118923, "grad_norm": 1.6686147451400757, "learning_rate": 8.456703014753047e-07, "loss": 0.8335, "step": 68870 }, { "epoch": 0.8393964876360401, "grad_norm": 1.9822765588760376, "learning_rate": 8.453495830660681e-07, "loss": 0.8609, "step": 68875 }, { "epoch": 0.839457423860188, "grad_norm": 2.4574806690216064, "learning_rate": 8.450288646568313e-07, "loss": 0.8559, "step": 68880 }, { "epoch": 0.8395183600843358, "grad_norm": 1.6642659902572632, "learning_rate": 8.447081462475947e-07, "loss": 0.8042, "step": 68885 }, { "epoch": 0.8395792963084835, "grad_norm": 2.1832003593444824, "learning_rate": 8.443874278383581e-07, "loss": 0.8266, "step": 68890 }, { "epoch": 0.8396402325326313, "grad_norm": 2.010403871536255, "learning_rate": 8.440667094291213e-07, "loss": 0.9068, "step": 68895 }, { "epoch": 0.8397011687567791, "grad_norm": 2.0428452491760254, "learning_rate": 8.437459910198846e-07, "loss": 0.8563, "step": 68900 }, { "epoch": 0.839762104980927, "grad_norm": 2.1950340270996094, "learning_rate": 8.434252726106479e-07, "loss": 0.8279, "step": 68905 }, { "epoch": 0.8398230412050748, "grad_norm": 2.150930643081665, "learning_rate": 8.431045542014113e-07, "loss": 0.8733, "step": 68910 }, { "epoch": 0.8398839774292226, "grad_norm": 1.7672641277313232, "learning_rate": 8.427838357921746e-07, "loss": 0.8861, "step": 68915 }, { "epoch": 0.8399449136533704, "grad_norm": 1.8878086805343628, "learning_rate": 8.424631173829379e-07, "loss": 0.7405, "step": 68920 }, { "epoch": 0.8400058498775181, "grad_norm": 2.1931188106536865, "learning_rate": 8.421423989737011e-07, "loss": 0.8719, "step": 68925 }, { "epoch": 0.840066786101666, "grad_norm": 1.763560175895691, "learning_rate": 8.418216805644646e-07, "loss": 0.8145, "step": 68930 }, { "epoch": 0.8401277223258138, "grad_norm": 1.951302170753479, "learning_rate": 8.415009621552278e-07, "loss": 0.8496, "step": 68935 }, { "epoch": 0.8401886585499616, "grad_norm": 2.172358274459839, "learning_rate": 8.411802437459911e-07, "loss": 0.7659, "step": 68940 }, { "epoch": 0.8402495947741094, "grad_norm": 2.655384063720703, "learning_rate": 8.408595253367544e-07, "loss": 0.8499, "step": 68945 }, { "epoch": 0.8403105309982573, "grad_norm": 2.278139591217041, "learning_rate": 8.405388069275178e-07, "loss": 0.8298, "step": 68950 }, { "epoch": 0.8403714672224051, "grad_norm": 1.812608242034912, "learning_rate": 8.40218088518281e-07, "loss": 0.7908, "step": 68955 }, { "epoch": 0.8404324034465528, "grad_norm": 1.7349978685379028, "learning_rate": 8.398973701090444e-07, "loss": 0.8222, "step": 68960 }, { "epoch": 0.8404933396707006, "grad_norm": 1.9006989002227783, "learning_rate": 8.395766516998076e-07, "loss": 0.8182, "step": 68965 }, { "epoch": 0.8405542758948484, "grad_norm": 1.7390393018722534, "learning_rate": 8.39255933290571e-07, "loss": 0.8114, "step": 68970 }, { "epoch": 0.8406152121189963, "grad_norm": 2.0331456661224365, "learning_rate": 8.389352148813343e-07, "loss": 0.8564, "step": 68975 }, { "epoch": 0.8406761483431441, "grad_norm": 1.8382097482681274, "learning_rate": 8.386144964720976e-07, "loss": 0.8075, "step": 68980 }, { "epoch": 0.8407370845672919, "grad_norm": 2.118990659713745, "learning_rate": 8.382937780628608e-07, "loss": 0.7196, "step": 68985 }, { "epoch": 0.8407980207914397, "grad_norm": 2.1850244998931885, "learning_rate": 8.379730596536243e-07, "loss": 0.8195, "step": 68990 }, { "epoch": 0.8408589570155874, "grad_norm": 1.6823817491531372, "learning_rate": 8.376523412443875e-07, "loss": 0.8088, "step": 68995 }, { "epoch": 0.8409198932397353, "grad_norm": 1.7302229404449463, "learning_rate": 8.373316228351508e-07, "loss": 0.8264, "step": 69000 }, { "epoch": 0.8409808294638831, "grad_norm": 2.016085386276245, "learning_rate": 8.37010904425914e-07, "loss": 0.8292, "step": 69005 }, { "epoch": 0.8410417656880309, "grad_norm": 2.04158878326416, "learning_rate": 8.366901860166775e-07, "loss": 0.6921, "step": 69010 }, { "epoch": 0.8411027019121787, "grad_norm": 2.1732423305511475, "learning_rate": 8.363694676074407e-07, "loss": 0.7919, "step": 69015 }, { "epoch": 0.8411636381363266, "grad_norm": 3.1185896396636963, "learning_rate": 8.360487491982041e-07, "loss": 0.8194, "step": 69020 }, { "epoch": 0.8412245743604744, "grad_norm": 1.8716095685958862, "learning_rate": 8.357280307889673e-07, "loss": 0.765, "step": 69025 }, { "epoch": 0.8412855105846221, "grad_norm": 1.7498172521591187, "learning_rate": 8.354073123797307e-07, "loss": 0.8161, "step": 69030 }, { "epoch": 0.8413464468087699, "grad_norm": 2.035975217819214, "learning_rate": 8.35086593970494e-07, "loss": 0.791, "step": 69035 }, { "epoch": 0.8414073830329177, "grad_norm": 2.1433041095733643, "learning_rate": 8.347658755612573e-07, "loss": 0.8629, "step": 69040 }, { "epoch": 0.8414683192570656, "grad_norm": 2.2710819244384766, "learning_rate": 8.344451571520205e-07, "loss": 0.8321, "step": 69045 }, { "epoch": 0.8415292554812134, "grad_norm": 2.24092173576355, "learning_rate": 8.34124438742784e-07, "loss": 0.8075, "step": 69050 }, { "epoch": 0.8415901917053612, "grad_norm": 1.7312332391738892, "learning_rate": 8.338037203335472e-07, "loss": 0.8538, "step": 69055 }, { "epoch": 0.841651127929509, "grad_norm": 1.8449151515960693, "learning_rate": 8.334830019243105e-07, "loss": 0.7336, "step": 69060 }, { "epoch": 0.8417120641536567, "grad_norm": 1.6686666011810303, "learning_rate": 8.331622835150737e-07, "loss": 0.8696, "step": 69065 }, { "epoch": 0.8417730003778046, "grad_norm": 2.179561138153076, "learning_rate": 8.328415651058372e-07, "loss": 0.7978, "step": 69070 }, { "epoch": 0.8418339366019524, "grad_norm": 2.1364212036132812, "learning_rate": 8.325208466966004e-07, "loss": 0.8176, "step": 69075 }, { "epoch": 0.8418948728261002, "grad_norm": 2.248445510864258, "learning_rate": 8.322001282873638e-07, "loss": 0.8158, "step": 69080 }, { "epoch": 0.841955809050248, "grad_norm": 1.8945378065109253, "learning_rate": 8.31879409878127e-07, "loss": 0.7541, "step": 69085 }, { "epoch": 0.8420167452743959, "grad_norm": 1.7084161043167114, "learning_rate": 8.315586914688904e-07, "loss": 0.8407, "step": 69090 }, { "epoch": 0.8420776814985437, "grad_norm": 1.7886501550674438, "learning_rate": 8.312379730596538e-07, "loss": 0.8479, "step": 69095 }, { "epoch": 0.8421386177226914, "grad_norm": 1.9763437509536743, "learning_rate": 8.30917254650417e-07, "loss": 0.8986, "step": 69100 }, { "epoch": 0.8421995539468392, "grad_norm": 2.2094788551330566, "learning_rate": 8.305965362411802e-07, "loss": 0.812, "step": 69105 }, { "epoch": 0.842260490170987, "grad_norm": 1.8934569358825684, "learning_rate": 8.302758178319437e-07, "loss": 0.7729, "step": 69110 }, { "epoch": 0.8423214263951349, "grad_norm": 2.2358717918395996, "learning_rate": 8.29955099422707e-07, "loss": 0.8702, "step": 69115 }, { "epoch": 0.8423823626192827, "grad_norm": 2.1483373641967773, "learning_rate": 8.296343810134702e-07, "loss": 0.8661, "step": 69120 }, { "epoch": 0.8424432988434305, "grad_norm": 2.0195159912109375, "learning_rate": 8.293136626042334e-07, "loss": 0.7784, "step": 69125 }, { "epoch": 0.8425042350675783, "grad_norm": 1.987282633781433, "learning_rate": 8.289929441949969e-07, "loss": 0.8562, "step": 69130 }, { "epoch": 0.842565171291726, "grad_norm": 1.943329095840454, "learning_rate": 8.286722257857602e-07, "loss": 0.818, "step": 69135 }, { "epoch": 0.8426261075158739, "grad_norm": 1.9683781862258911, "learning_rate": 8.283515073765234e-07, "loss": 0.8263, "step": 69140 }, { "epoch": 0.8426870437400217, "grad_norm": 1.6757229566574097, "learning_rate": 8.280307889672867e-07, "loss": 0.8904, "step": 69145 }, { "epoch": 0.8427479799641695, "grad_norm": 1.9474507570266724, "learning_rate": 8.277100705580501e-07, "loss": 0.8001, "step": 69150 }, { "epoch": 0.8428089161883173, "grad_norm": 2.5872395038604736, "learning_rate": 8.273893521488134e-07, "loss": 0.8695, "step": 69155 }, { "epoch": 0.8428698524124651, "grad_norm": 1.748843789100647, "learning_rate": 8.270686337395767e-07, "loss": 0.7926, "step": 69160 }, { "epoch": 0.842930788636613, "grad_norm": 1.8416190147399902, "learning_rate": 8.2674791533034e-07, "loss": 0.7701, "step": 69165 }, { "epoch": 0.8429917248607607, "grad_norm": 1.9866265058517456, "learning_rate": 8.264271969211033e-07, "loss": 0.8068, "step": 69170 }, { "epoch": 0.8430526610849085, "grad_norm": 1.9382821321487427, "learning_rate": 8.261064785118667e-07, "loss": 0.9034, "step": 69175 }, { "epoch": 0.8431135973090563, "grad_norm": 2.4577391147613525, "learning_rate": 8.257857601026299e-07, "loss": 0.8435, "step": 69180 }, { "epoch": 0.8431745335332042, "grad_norm": 2.2199478149414062, "learning_rate": 8.254650416933934e-07, "loss": 0.8103, "step": 69185 }, { "epoch": 0.843235469757352, "grad_norm": 1.8586339950561523, "learning_rate": 8.251443232841566e-07, "loss": 0.8078, "step": 69190 }, { "epoch": 0.8432964059814998, "grad_norm": 1.6465860605239868, "learning_rate": 8.248236048749199e-07, "loss": 0.7462, "step": 69195 }, { "epoch": 0.8433573422056476, "grad_norm": 2.197896718978882, "learning_rate": 8.245028864656831e-07, "loss": 0.8988, "step": 69200 }, { "epoch": 0.8434182784297953, "grad_norm": 2.082690477371216, "learning_rate": 8.241821680564466e-07, "loss": 0.8612, "step": 69205 }, { "epoch": 0.8434792146539432, "grad_norm": 1.6885334253311157, "learning_rate": 8.238614496472098e-07, "loss": 0.7673, "step": 69210 }, { "epoch": 0.843540150878091, "grad_norm": 2.373206615447998, "learning_rate": 8.235407312379731e-07, "loss": 0.807, "step": 69215 }, { "epoch": 0.8436010871022388, "grad_norm": 2.2877745628356934, "learning_rate": 8.232200128287364e-07, "loss": 0.7923, "step": 69220 }, { "epoch": 0.8436620233263866, "grad_norm": 1.6568225622177124, "learning_rate": 8.228992944194998e-07, "loss": 0.7404, "step": 69225 }, { "epoch": 0.8437229595505344, "grad_norm": 2.059537172317505, "learning_rate": 8.22578576010263e-07, "loss": 0.8177, "step": 69230 }, { "epoch": 0.8437838957746823, "grad_norm": 2.18974232673645, "learning_rate": 8.222578576010264e-07, "loss": 0.7968, "step": 69235 }, { "epoch": 0.84384483199883, "grad_norm": 1.9912924766540527, "learning_rate": 8.219371391917896e-07, "loss": 0.8504, "step": 69240 }, { "epoch": 0.8439057682229778, "grad_norm": 1.6590101718902588, "learning_rate": 8.21616420782553e-07, "loss": 0.8043, "step": 69245 }, { "epoch": 0.8439667044471256, "grad_norm": 2.2503225803375244, "learning_rate": 8.212957023733163e-07, "loss": 0.824, "step": 69250 }, { "epoch": 0.8440276406712734, "grad_norm": 2.1713175773620605, "learning_rate": 8.209749839640796e-07, "loss": 0.8561, "step": 69255 }, { "epoch": 0.8440885768954213, "grad_norm": 2.0150091648101807, "learning_rate": 8.206542655548428e-07, "loss": 0.7656, "step": 69260 }, { "epoch": 0.8441495131195691, "grad_norm": 1.9478895664215088, "learning_rate": 8.203335471456063e-07, "loss": 0.8597, "step": 69265 }, { "epoch": 0.8442104493437169, "grad_norm": 2.13454270362854, "learning_rate": 8.200128287363696e-07, "loss": 0.8631, "step": 69270 }, { "epoch": 0.8442713855678646, "grad_norm": 1.7090802192687988, "learning_rate": 8.196921103271328e-07, "loss": 0.8474, "step": 69275 }, { "epoch": 0.8443323217920125, "grad_norm": 2.160569906234741, "learning_rate": 8.193713919178961e-07, "loss": 0.8817, "step": 69280 }, { "epoch": 0.8443932580161603, "grad_norm": 1.943145751953125, "learning_rate": 8.190506735086595e-07, "loss": 0.8734, "step": 69285 }, { "epoch": 0.8444541942403081, "grad_norm": 1.9629271030426025, "learning_rate": 8.187299550994228e-07, "loss": 0.7558, "step": 69290 }, { "epoch": 0.8445151304644559, "grad_norm": 1.6110674142837524, "learning_rate": 8.184092366901861e-07, "loss": 0.7405, "step": 69295 }, { "epoch": 0.8445760666886037, "grad_norm": 1.9631766080856323, "learning_rate": 8.180885182809493e-07, "loss": 0.8496, "step": 69300 }, { "epoch": 0.8446370029127516, "grad_norm": 2.0023751258850098, "learning_rate": 8.177677998717127e-07, "loss": 0.808, "step": 69305 }, { "epoch": 0.8446979391368993, "grad_norm": 2.031158924102783, "learning_rate": 8.174470814624761e-07, "loss": 0.7815, "step": 69310 }, { "epoch": 0.8447588753610471, "grad_norm": 2.168259620666504, "learning_rate": 8.171263630532393e-07, "loss": 0.8499, "step": 69315 }, { "epoch": 0.8448198115851949, "grad_norm": 1.9041409492492676, "learning_rate": 8.168056446440025e-07, "loss": 0.8131, "step": 69320 }, { "epoch": 0.8448807478093427, "grad_norm": 2.537628650665283, "learning_rate": 8.16484926234766e-07, "loss": 0.8155, "step": 69325 }, { "epoch": 0.8449416840334906, "grad_norm": 1.9989101886749268, "learning_rate": 8.161642078255293e-07, "loss": 0.8255, "step": 69330 }, { "epoch": 0.8450026202576384, "grad_norm": 1.739512324333191, "learning_rate": 8.158434894162925e-07, "loss": 0.7814, "step": 69335 }, { "epoch": 0.8450635564817862, "grad_norm": 1.6135270595550537, "learning_rate": 8.155227710070559e-07, "loss": 0.718, "step": 69340 }, { "epoch": 0.8451244927059339, "grad_norm": 2.2914035320281982, "learning_rate": 8.152020525978192e-07, "loss": 0.8971, "step": 69345 }, { "epoch": 0.8451854289300818, "grad_norm": 2.1754090785980225, "learning_rate": 8.148813341885825e-07, "loss": 0.9095, "step": 69350 }, { "epoch": 0.8452463651542296, "grad_norm": 1.8397189378738403, "learning_rate": 8.145606157793458e-07, "loss": 0.8446, "step": 69355 }, { "epoch": 0.8453073013783774, "grad_norm": 2.200188398361206, "learning_rate": 8.142398973701091e-07, "loss": 0.7756, "step": 69360 }, { "epoch": 0.8453682376025252, "grad_norm": 2.3715105056762695, "learning_rate": 8.139191789608724e-07, "loss": 0.7937, "step": 69365 }, { "epoch": 0.845429173826673, "grad_norm": 2.1364777088165283, "learning_rate": 8.135984605516358e-07, "loss": 0.8022, "step": 69370 }, { "epoch": 0.8454901100508209, "grad_norm": 2.2011916637420654, "learning_rate": 8.13277742142399e-07, "loss": 0.7949, "step": 69375 }, { "epoch": 0.8455510462749686, "grad_norm": 1.9193847179412842, "learning_rate": 8.129570237331623e-07, "loss": 0.8709, "step": 69380 }, { "epoch": 0.8456119824991164, "grad_norm": 1.8906168937683105, "learning_rate": 8.126363053239257e-07, "loss": 0.7244, "step": 69385 }, { "epoch": 0.8456729187232642, "grad_norm": 1.7324447631835938, "learning_rate": 8.12315586914689e-07, "loss": 0.8044, "step": 69390 }, { "epoch": 0.845733854947412, "grad_norm": 2.192469358444214, "learning_rate": 8.119948685054522e-07, "loss": 0.8003, "step": 69395 }, { "epoch": 0.8457947911715599, "grad_norm": 2.002108573913574, "learning_rate": 8.116741500962156e-07, "loss": 0.7869, "step": 69400 }, { "epoch": 0.8458557273957077, "grad_norm": 2.0469908714294434, "learning_rate": 8.113534316869789e-07, "loss": 0.8351, "step": 69405 }, { "epoch": 0.8459166636198555, "grad_norm": 1.874598503112793, "learning_rate": 8.110327132777422e-07, "loss": 0.8042, "step": 69410 }, { "epoch": 0.8459775998440032, "grad_norm": 1.7921719551086426, "learning_rate": 8.107119948685055e-07, "loss": 0.7819, "step": 69415 }, { "epoch": 0.846038536068151, "grad_norm": 2.1277637481689453, "learning_rate": 8.103912764592688e-07, "loss": 0.8873, "step": 69420 }, { "epoch": 0.8460994722922989, "grad_norm": 2.0118610858917236, "learning_rate": 8.100705580500321e-07, "loss": 0.7553, "step": 69425 }, { "epoch": 0.8461604085164467, "grad_norm": 2.0228066444396973, "learning_rate": 8.097498396407955e-07, "loss": 0.749, "step": 69430 }, { "epoch": 0.8462213447405945, "grad_norm": 2.390411138534546, "learning_rate": 8.094291212315587e-07, "loss": 0.807, "step": 69435 }, { "epoch": 0.8462822809647423, "grad_norm": 2.2492635250091553, "learning_rate": 8.09108402822322e-07, "loss": 0.8453, "step": 69440 }, { "epoch": 0.8463432171888902, "grad_norm": 1.9960451126098633, "learning_rate": 8.087876844130855e-07, "loss": 0.8148, "step": 69445 }, { "epoch": 0.8464041534130379, "grad_norm": 2.0299088954925537, "learning_rate": 8.084669660038487e-07, "loss": 0.7678, "step": 69450 }, { "epoch": 0.8464650896371857, "grad_norm": 1.7428702116012573, "learning_rate": 8.081462475946119e-07, "loss": 0.7772, "step": 69455 }, { "epoch": 0.8465260258613335, "grad_norm": 1.9556761980056763, "learning_rate": 8.078255291853753e-07, "loss": 0.7933, "step": 69460 }, { "epoch": 0.8465869620854813, "grad_norm": 2.0844082832336426, "learning_rate": 8.075048107761387e-07, "loss": 0.7879, "step": 69465 }, { "epoch": 0.8466478983096292, "grad_norm": 1.8529675006866455, "learning_rate": 8.071840923669019e-07, "loss": 0.8176, "step": 69470 }, { "epoch": 0.846708834533777, "grad_norm": 2.1227314472198486, "learning_rate": 8.068633739576652e-07, "loss": 0.8143, "step": 69475 }, { "epoch": 0.8467697707579248, "grad_norm": 1.9587054252624512, "learning_rate": 8.065426555484285e-07, "loss": 0.8486, "step": 69480 }, { "epoch": 0.8468307069820725, "grad_norm": 2.025899887084961, "learning_rate": 8.062219371391919e-07, "loss": 0.8082, "step": 69485 }, { "epoch": 0.8468916432062203, "grad_norm": 2.037292718887329, "learning_rate": 8.059012187299552e-07, "loss": 0.7652, "step": 69490 }, { "epoch": 0.8469525794303682, "grad_norm": 2.183858633041382, "learning_rate": 8.055805003207184e-07, "loss": 0.7794, "step": 69495 }, { "epoch": 0.847013515654516, "grad_norm": 2.2588915824890137, "learning_rate": 8.052597819114818e-07, "loss": 0.7523, "step": 69500 }, { "epoch": 0.8470744518786638, "grad_norm": 1.7076135873794556, "learning_rate": 8.049390635022452e-07, "loss": 0.7727, "step": 69505 }, { "epoch": 0.8471353881028116, "grad_norm": 2.1401100158691406, "learning_rate": 8.046183450930084e-07, "loss": 0.7479, "step": 69510 }, { "epoch": 0.8471963243269593, "grad_norm": 1.809566617012024, "learning_rate": 8.042976266837717e-07, "loss": 0.7799, "step": 69515 }, { "epoch": 0.8472572605511072, "grad_norm": 2.1479978561401367, "learning_rate": 8.039769082745351e-07, "loss": 0.8403, "step": 69520 }, { "epoch": 0.847318196775255, "grad_norm": 1.9482150077819824, "learning_rate": 8.036561898652984e-07, "loss": 0.8423, "step": 69525 }, { "epoch": 0.8473791329994028, "grad_norm": 1.8268942832946777, "learning_rate": 8.033354714560616e-07, "loss": 0.769, "step": 69530 }, { "epoch": 0.8474400692235506, "grad_norm": 2.0892114639282227, "learning_rate": 8.03014753046825e-07, "loss": 0.7257, "step": 69535 }, { "epoch": 0.8475010054476985, "grad_norm": 1.9568976163864136, "learning_rate": 8.026940346375883e-07, "loss": 0.8064, "step": 69540 }, { "epoch": 0.8475619416718463, "grad_norm": 2.0268449783325195, "learning_rate": 8.023733162283516e-07, "loss": 0.7779, "step": 69545 }, { "epoch": 0.847622877895994, "grad_norm": 1.6748327016830444, "learning_rate": 8.020525978191149e-07, "loss": 0.7818, "step": 69550 }, { "epoch": 0.8476838141201418, "grad_norm": 2.2158546447753906, "learning_rate": 8.017318794098782e-07, "loss": 0.8183, "step": 69555 }, { "epoch": 0.8477447503442896, "grad_norm": 1.896129846572876, "learning_rate": 8.014111610006415e-07, "loss": 0.8021, "step": 69560 }, { "epoch": 0.8478056865684375, "grad_norm": 2.220743179321289, "learning_rate": 8.010904425914049e-07, "loss": 0.8076, "step": 69565 }, { "epoch": 0.8478666227925853, "grad_norm": 2.383537530899048, "learning_rate": 8.007697241821681e-07, "loss": 0.8057, "step": 69570 }, { "epoch": 0.8479275590167331, "grad_norm": 2.1463558673858643, "learning_rate": 8.004490057729314e-07, "loss": 0.7957, "step": 69575 }, { "epoch": 0.8479884952408809, "grad_norm": 2.034860610961914, "learning_rate": 8.001282873636948e-07, "loss": 0.8582, "step": 69580 }, { "epoch": 0.8480494314650286, "grad_norm": 2.5975048542022705, "learning_rate": 7.998075689544581e-07, "loss": 0.7991, "step": 69585 }, { "epoch": 0.8481103676891765, "grad_norm": 2.1883325576782227, "learning_rate": 7.994868505452213e-07, "loss": 0.8232, "step": 69590 }, { "epoch": 0.8481713039133243, "grad_norm": 2.030155897140503, "learning_rate": 7.991661321359847e-07, "loss": 0.7939, "step": 69595 }, { "epoch": 0.8482322401374721, "grad_norm": 2.1058177947998047, "learning_rate": 7.98845413726748e-07, "loss": 0.779, "step": 69600 }, { "epoch": 0.8482931763616199, "grad_norm": 1.9216607809066772, "learning_rate": 7.985246953175113e-07, "loss": 0.8038, "step": 69605 }, { "epoch": 0.8483541125857678, "grad_norm": 2.042811155319214, "learning_rate": 7.982039769082746e-07, "loss": 0.7548, "step": 69610 }, { "epoch": 0.8484150488099156, "grad_norm": 1.9823745489120483, "learning_rate": 7.978832584990379e-07, "loss": 0.7953, "step": 69615 }, { "epoch": 0.8484759850340633, "grad_norm": 1.7372503280639648, "learning_rate": 7.975625400898013e-07, "loss": 0.8663, "step": 69620 }, { "epoch": 0.8485369212582111, "grad_norm": 1.7291983366012573, "learning_rate": 7.972418216805646e-07, "loss": 0.7885, "step": 69625 }, { "epoch": 0.8485978574823589, "grad_norm": 2.039052724838257, "learning_rate": 7.969211032713278e-07, "loss": 0.7899, "step": 69630 }, { "epoch": 0.8486587937065068, "grad_norm": 1.9354382753372192, "learning_rate": 7.966003848620911e-07, "loss": 0.8174, "step": 69635 }, { "epoch": 0.8487197299306546, "grad_norm": 2.264064073562622, "learning_rate": 7.962796664528546e-07, "loss": 0.7992, "step": 69640 }, { "epoch": 0.8487806661548024, "grad_norm": 1.9291926622390747, "learning_rate": 7.959589480436178e-07, "loss": 0.8392, "step": 69645 }, { "epoch": 0.8488416023789502, "grad_norm": 1.9225974082946777, "learning_rate": 7.95638229634381e-07, "loss": 0.7518, "step": 69650 }, { "epoch": 0.848902538603098, "grad_norm": 2.233363628387451, "learning_rate": 7.953175112251443e-07, "loss": 0.8355, "step": 69655 }, { "epoch": 0.8489634748272458, "grad_norm": 1.8548526763916016, "learning_rate": 7.949967928159078e-07, "loss": 0.8681, "step": 69660 }, { "epoch": 0.8490244110513936, "grad_norm": 2.0418450832366943, "learning_rate": 7.94676074406671e-07, "loss": 0.8162, "step": 69665 }, { "epoch": 0.8490853472755414, "grad_norm": 1.9101465940475464, "learning_rate": 7.943553559974342e-07, "loss": 0.8083, "step": 69670 }, { "epoch": 0.8491462834996892, "grad_norm": 2.0199854373931885, "learning_rate": 7.940346375881976e-07, "loss": 0.8172, "step": 69675 }, { "epoch": 0.8492072197238371, "grad_norm": 1.831424593925476, "learning_rate": 7.93713919178961e-07, "loss": 0.7561, "step": 69680 }, { "epoch": 0.8492681559479849, "grad_norm": 1.7283689975738525, "learning_rate": 7.933932007697243e-07, "loss": 0.8924, "step": 69685 }, { "epoch": 0.8493290921721326, "grad_norm": 2.1620566844940186, "learning_rate": 7.930724823604876e-07, "loss": 0.8395, "step": 69690 }, { "epoch": 0.8493900283962804, "grad_norm": 2.0718088150024414, "learning_rate": 7.927517639512508e-07, "loss": 0.7825, "step": 69695 }, { "epoch": 0.8494509646204282, "grad_norm": 1.7915620803833008, "learning_rate": 7.924310455420143e-07, "loss": 0.8499, "step": 69700 }, { "epoch": 0.8495119008445761, "grad_norm": 1.994071364402771, "learning_rate": 7.921103271327775e-07, "loss": 0.8118, "step": 69705 }, { "epoch": 0.8495728370687239, "grad_norm": 3.022152900695801, "learning_rate": 7.917896087235408e-07, "loss": 0.8325, "step": 69710 }, { "epoch": 0.8496337732928717, "grad_norm": 1.908717393875122, "learning_rate": 7.91468890314304e-07, "loss": 0.8292, "step": 69715 }, { "epoch": 0.8496947095170195, "grad_norm": 2.218621015548706, "learning_rate": 7.911481719050675e-07, "loss": 0.7788, "step": 69720 }, { "epoch": 0.8497556457411672, "grad_norm": 1.7259221076965332, "learning_rate": 7.908274534958307e-07, "loss": 0.764, "step": 69725 }, { "epoch": 0.8498165819653151, "grad_norm": 2.1652867794036865, "learning_rate": 7.90506735086594e-07, "loss": 0.8619, "step": 69730 }, { "epoch": 0.8498775181894629, "grad_norm": 2.22121000289917, "learning_rate": 7.901860166773573e-07, "loss": 0.8528, "step": 69735 }, { "epoch": 0.8499384544136107, "grad_norm": 2.043729305267334, "learning_rate": 7.898652982681207e-07, "loss": 0.8107, "step": 69740 }, { "epoch": 0.8499993906377585, "grad_norm": 2.1737220287323, "learning_rate": 7.895445798588839e-07, "loss": 0.8203, "step": 69745 }, { "epoch": 0.8500603268619064, "grad_norm": 2.0840494632720947, "learning_rate": 7.892238614496473e-07, "loss": 0.7861, "step": 69750 }, { "epoch": 0.8501212630860542, "grad_norm": 2.081254005432129, "learning_rate": 7.889031430404105e-07, "loss": 0.8526, "step": 69755 }, { "epoch": 0.8501821993102019, "grad_norm": 1.7865186929702759, "learning_rate": 7.88582424631174e-07, "loss": 0.7728, "step": 69760 }, { "epoch": 0.8502431355343497, "grad_norm": 1.9437085390090942, "learning_rate": 7.882617062219372e-07, "loss": 0.835, "step": 69765 }, { "epoch": 0.8503040717584975, "grad_norm": 1.9660207033157349, "learning_rate": 7.879409878127005e-07, "loss": 0.8135, "step": 69770 }, { "epoch": 0.8503650079826454, "grad_norm": 1.894332766532898, "learning_rate": 7.876202694034637e-07, "loss": 0.8093, "step": 69775 }, { "epoch": 0.8504259442067932, "grad_norm": 2.3208823204040527, "learning_rate": 7.872995509942272e-07, "loss": 0.8138, "step": 69780 }, { "epoch": 0.850486880430941, "grad_norm": 1.9917463064193726, "learning_rate": 7.869788325849904e-07, "loss": 0.7389, "step": 69785 }, { "epoch": 0.8505478166550888, "grad_norm": 2.21392560005188, "learning_rate": 7.866581141757537e-07, "loss": 0.8074, "step": 69790 }, { "epoch": 0.8506087528792365, "grad_norm": 2.0538103580474854, "learning_rate": 7.863373957665171e-07, "loss": 0.8091, "step": 69795 }, { "epoch": 0.8506696891033844, "grad_norm": 1.9974533319473267, "learning_rate": 7.860166773572804e-07, "loss": 0.841, "step": 69800 }, { "epoch": 0.8507306253275322, "grad_norm": 1.8896234035491943, "learning_rate": 7.856959589480436e-07, "loss": 0.8577, "step": 69805 }, { "epoch": 0.85079156155168, "grad_norm": 1.8886750936508179, "learning_rate": 7.85375240538807e-07, "loss": 0.8158, "step": 69810 }, { "epoch": 0.8508524977758278, "grad_norm": 1.9640640020370483, "learning_rate": 7.850545221295704e-07, "loss": 0.7879, "step": 69815 }, { "epoch": 0.8509134339999757, "grad_norm": 1.9390443563461304, "learning_rate": 7.847338037203336e-07, "loss": 0.7881, "step": 69820 }, { "epoch": 0.8509743702241235, "grad_norm": 2.363215446472168, "learning_rate": 7.844130853110969e-07, "loss": 0.7598, "step": 69825 }, { "epoch": 0.8510353064482712, "grad_norm": 2.030623435974121, "learning_rate": 7.840923669018602e-07, "loss": 0.7565, "step": 69830 }, { "epoch": 0.851096242672419, "grad_norm": 1.8064086437225342, "learning_rate": 7.837716484926236e-07, "loss": 0.7662, "step": 69835 }, { "epoch": 0.8511571788965668, "grad_norm": 1.8959709405899048, "learning_rate": 7.834509300833869e-07, "loss": 0.7988, "step": 69840 }, { "epoch": 0.8512181151207147, "grad_norm": 1.8451298475265503, "learning_rate": 7.831302116741501e-07, "loss": 0.877, "step": 69845 }, { "epoch": 0.8512790513448625, "grad_norm": 1.71628999710083, "learning_rate": 7.828094932649134e-07, "loss": 0.7718, "step": 69850 }, { "epoch": 0.8513399875690103, "grad_norm": 2.1803460121154785, "learning_rate": 7.824887748556769e-07, "loss": 0.8381, "step": 69855 }, { "epoch": 0.8514009237931581, "grad_norm": 2.3496010303497314, "learning_rate": 7.821680564464401e-07, "loss": 0.7999, "step": 69860 }, { "epoch": 0.8514618600173058, "grad_norm": 2.0349390506744385, "learning_rate": 7.818473380372034e-07, "loss": 0.8352, "step": 69865 }, { "epoch": 0.8515227962414537, "grad_norm": 1.8787342309951782, "learning_rate": 7.815266196279667e-07, "loss": 0.8259, "step": 69870 }, { "epoch": 0.8515837324656015, "grad_norm": 1.7717164754867554, "learning_rate": 7.812059012187301e-07, "loss": 0.8609, "step": 69875 }, { "epoch": 0.8516446686897493, "grad_norm": 1.7118570804595947, "learning_rate": 7.808851828094933e-07, "loss": 0.6864, "step": 69880 }, { "epoch": 0.8517056049138971, "grad_norm": 1.9007983207702637, "learning_rate": 7.805644644002567e-07, "loss": 0.8082, "step": 69885 }, { "epoch": 0.851766541138045, "grad_norm": 1.9877005815505981, "learning_rate": 7.802437459910199e-07, "loss": 0.863, "step": 69890 }, { "epoch": 0.8518274773621928, "grad_norm": 1.8435848951339722, "learning_rate": 7.799230275817833e-07, "loss": 0.786, "step": 69895 }, { "epoch": 0.8518884135863405, "grad_norm": 2.01755690574646, "learning_rate": 7.796023091725466e-07, "loss": 0.8393, "step": 69900 }, { "epoch": 0.8519493498104883, "grad_norm": 2.4448235034942627, "learning_rate": 7.792815907633099e-07, "loss": 0.8307, "step": 69905 }, { "epoch": 0.8520102860346361, "grad_norm": 1.940831184387207, "learning_rate": 7.789608723540731e-07, "loss": 0.7437, "step": 69910 }, { "epoch": 0.852071222258784, "grad_norm": 1.849534273147583, "learning_rate": 7.786401539448366e-07, "loss": 0.738, "step": 69915 }, { "epoch": 0.8521321584829318, "grad_norm": 2.1775009632110596, "learning_rate": 7.783194355355998e-07, "loss": 0.8062, "step": 69920 }, { "epoch": 0.8521930947070796, "grad_norm": 1.8242051601409912, "learning_rate": 7.779987171263631e-07, "loss": 0.7938, "step": 69925 }, { "epoch": 0.8522540309312274, "grad_norm": 2.0292532444000244, "learning_rate": 7.776779987171264e-07, "loss": 0.8484, "step": 69930 }, { "epoch": 0.8523149671553751, "grad_norm": 2.0142650604248047, "learning_rate": 7.773572803078898e-07, "loss": 0.8085, "step": 69935 }, { "epoch": 0.852375903379523, "grad_norm": 2.028418779373169, "learning_rate": 7.77036561898653e-07, "loss": 0.7398, "step": 69940 }, { "epoch": 0.8524368396036708, "grad_norm": 2.236558675765991, "learning_rate": 7.767158434894164e-07, "loss": 0.7708, "step": 69945 }, { "epoch": 0.8524977758278186, "grad_norm": 2.002354145050049, "learning_rate": 7.763951250801796e-07, "loss": 0.8363, "step": 69950 }, { "epoch": 0.8525587120519664, "grad_norm": 1.9264636039733887, "learning_rate": 7.76074406670943e-07, "loss": 0.8239, "step": 69955 }, { "epoch": 0.8526196482761143, "grad_norm": 1.8835386037826538, "learning_rate": 7.757536882617063e-07, "loss": 0.7603, "step": 69960 }, { "epoch": 0.8526805845002621, "grad_norm": 1.6621743440628052, "learning_rate": 7.754329698524696e-07, "loss": 0.7661, "step": 69965 }, { "epoch": 0.8527415207244098, "grad_norm": 1.8084932565689087, "learning_rate": 7.751122514432328e-07, "loss": 0.7935, "step": 69970 }, { "epoch": 0.8528024569485576, "grad_norm": 2.1990766525268555, "learning_rate": 7.747915330339963e-07, "loss": 0.8169, "step": 69975 }, { "epoch": 0.8528633931727054, "grad_norm": 2.3573672771453857, "learning_rate": 7.744708146247595e-07, "loss": 0.8667, "step": 69980 }, { "epoch": 0.8529243293968533, "grad_norm": 1.7663605213165283, "learning_rate": 7.741500962155228e-07, "loss": 0.7838, "step": 69985 }, { "epoch": 0.8529852656210011, "grad_norm": 2.1414525508880615, "learning_rate": 7.738293778062861e-07, "loss": 0.7894, "step": 69990 }, { "epoch": 0.8530462018451489, "grad_norm": 2.014078140258789, "learning_rate": 7.735086593970495e-07, "loss": 0.7788, "step": 69995 }, { "epoch": 0.8531071380692967, "grad_norm": 1.8891544342041016, "learning_rate": 7.731879409878127e-07, "loss": 0.7011, "step": 70000 }, { "epoch": 0.8531680742934444, "grad_norm": 1.6820056438446045, "learning_rate": 7.728672225785761e-07, "loss": 0.8225, "step": 70005 }, { "epoch": 0.8532290105175923, "grad_norm": 1.7990645170211792, "learning_rate": 7.725465041693393e-07, "loss": 0.7667, "step": 70010 }, { "epoch": 0.8532899467417401, "grad_norm": 1.728169560432434, "learning_rate": 7.722257857601027e-07, "loss": 0.738, "step": 70015 }, { "epoch": 0.8533508829658879, "grad_norm": 1.9682608842849731, "learning_rate": 7.71905067350866e-07, "loss": 0.7645, "step": 70020 }, { "epoch": 0.8534118191900357, "grad_norm": 2.09731125831604, "learning_rate": 7.715843489416293e-07, "loss": 0.8727, "step": 70025 }, { "epoch": 0.8534727554141835, "grad_norm": 2.34059476852417, "learning_rate": 7.712636305323925e-07, "loss": 0.8061, "step": 70030 }, { "epoch": 0.8535336916383314, "grad_norm": 1.8851628303527832, "learning_rate": 7.70942912123156e-07, "loss": 0.7617, "step": 70035 }, { "epoch": 0.8535946278624791, "grad_norm": 1.8989801406860352, "learning_rate": 7.706221937139193e-07, "loss": 0.8287, "step": 70040 }, { "epoch": 0.8536555640866269, "grad_norm": 2.1943790912628174, "learning_rate": 7.703014753046825e-07, "loss": 0.8233, "step": 70045 }, { "epoch": 0.8537165003107747, "grad_norm": 2.077164888381958, "learning_rate": 7.699807568954458e-07, "loss": 0.823, "step": 70050 }, { "epoch": 0.8537774365349226, "grad_norm": 2.0655791759490967, "learning_rate": 7.696600384862092e-07, "loss": 0.7427, "step": 70055 }, { "epoch": 0.8538383727590704, "grad_norm": 2.086717128753662, "learning_rate": 7.693393200769725e-07, "loss": 0.8644, "step": 70060 }, { "epoch": 0.8538993089832182, "grad_norm": 2.1426403522491455, "learning_rate": 7.690186016677358e-07, "loss": 0.8493, "step": 70065 }, { "epoch": 0.853960245207366, "grad_norm": 1.887250542640686, "learning_rate": 7.68697883258499e-07, "loss": 0.8262, "step": 70070 }, { "epoch": 0.8540211814315137, "grad_norm": 2.268444061279297, "learning_rate": 7.683771648492624e-07, "loss": 0.762, "step": 70075 }, { "epoch": 0.8540821176556616, "grad_norm": 1.874137282371521, "learning_rate": 7.680564464400258e-07, "loss": 0.7652, "step": 70080 }, { "epoch": 0.8541430538798094, "grad_norm": 1.919263243675232, "learning_rate": 7.67735728030789e-07, "loss": 0.7516, "step": 70085 }, { "epoch": 0.8542039901039572, "grad_norm": 1.7140778303146362, "learning_rate": 7.674150096215524e-07, "loss": 0.8151, "step": 70090 }, { "epoch": 0.854264926328105, "grad_norm": 2.142364501953125, "learning_rate": 7.670942912123157e-07, "loss": 0.7765, "step": 70095 }, { "epoch": 0.8543258625522528, "grad_norm": 1.9049335718154907, "learning_rate": 7.66773572803079e-07, "loss": 0.8056, "step": 70100 }, { "epoch": 0.8543867987764007, "grad_norm": 2.2416160106658936, "learning_rate": 7.664528543938422e-07, "loss": 0.7956, "step": 70105 }, { "epoch": 0.8544477350005484, "grad_norm": 1.8500494956970215, "learning_rate": 7.661321359846057e-07, "loss": 0.8567, "step": 70110 }, { "epoch": 0.8545086712246962, "grad_norm": 2.572474479675293, "learning_rate": 7.658114175753689e-07, "loss": 0.876, "step": 70115 }, { "epoch": 0.854569607448844, "grad_norm": 1.9820691347122192, "learning_rate": 7.654906991661322e-07, "loss": 0.8871, "step": 70120 }, { "epoch": 0.8546305436729918, "grad_norm": 2.0362050533294678, "learning_rate": 7.651699807568955e-07, "loss": 0.7879, "step": 70125 }, { "epoch": 0.8546914798971397, "grad_norm": 2.1957204341888428, "learning_rate": 7.648492623476589e-07, "loss": 0.8681, "step": 70130 }, { "epoch": 0.8547524161212875, "grad_norm": 1.7030260562896729, "learning_rate": 7.645285439384221e-07, "loss": 0.8488, "step": 70135 }, { "epoch": 0.8548133523454353, "grad_norm": 1.9437230825424194, "learning_rate": 7.642078255291855e-07, "loss": 0.7863, "step": 70140 }, { "epoch": 0.854874288569583, "grad_norm": 2.086254596710205, "learning_rate": 7.638871071199487e-07, "loss": 0.7828, "step": 70145 }, { "epoch": 0.8549352247937309, "grad_norm": 1.8225221633911133, "learning_rate": 7.635663887107121e-07, "loss": 0.8264, "step": 70150 }, { "epoch": 0.8549961610178787, "grad_norm": 1.8703997135162354, "learning_rate": 7.632456703014754e-07, "loss": 0.8423, "step": 70155 }, { "epoch": 0.8550570972420265, "grad_norm": 1.7919172048568726, "learning_rate": 7.629249518922387e-07, "loss": 0.7529, "step": 70160 }, { "epoch": 0.8551180334661743, "grad_norm": 1.7768574953079224, "learning_rate": 7.626042334830019e-07, "loss": 0.7761, "step": 70165 }, { "epoch": 0.8551789696903221, "grad_norm": 2.541780471801758, "learning_rate": 7.622835150737654e-07, "loss": 0.8835, "step": 70170 }, { "epoch": 0.85523990591447, "grad_norm": 2.1401937007904053, "learning_rate": 7.619627966645286e-07, "loss": 0.7963, "step": 70175 }, { "epoch": 0.8553008421386177, "grad_norm": 1.7612570524215698, "learning_rate": 7.616420782552919e-07, "loss": 0.7575, "step": 70180 }, { "epoch": 0.8553617783627655, "grad_norm": 2.1056323051452637, "learning_rate": 7.613213598460552e-07, "loss": 0.8553, "step": 70185 }, { "epoch": 0.8554227145869133, "grad_norm": 1.7373664379119873, "learning_rate": 7.610006414368186e-07, "loss": 0.7468, "step": 70190 }, { "epoch": 0.8554836508110611, "grad_norm": 1.9298663139343262, "learning_rate": 7.606799230275818e-07, "loss": 0.7684, "step": 70195 }, { "epoch": 0.855544587035209, "grad_norm": 1.732299566268921, "learning_rate": 7.603592046183452e-07, "loss": 0.8024, "step": 70200 }, { "epoch": 0.8556055232593568, "grad_norm": 1.7700212001800537, "learning_rate": 7.600384862091084e-07, "loss": 0.8201, "step": 70205 }, { "epoch": 0.8556664594835046, "grad_norm": 2.0412919521331787, "learning_rate": 7.597177677998718e-07, "loss": 0.7916, "step": 70210 }, { "epoch": 0.8557273957076523, "grad_norm": 2.1095938682556152, "learning_rate": 7.593970493906352e-07, "loss": 0.7711, "step": 70215 }, { "epoch": 0.8557883319318002, "grad_norm": 2.017638683319092, "learning_rate": 7.590763309813984e-07, "loss": 0.8516, "step": 70220 }, { "epoch": 0.855849268155948, "grad_norm": 2.0318996906280518, "learning_rate": 7.587556125721616e-07, "loss": 0.8151, "step": 70225 }, { "epoch": 0.8559102043800958, "grad_norm": 1.7674163579940796, "learning_rate": 7.584348941629251e-07, "loss": 0.7071, "step": 70230 }, { "epoch": 0.8559711406042436, "grad_norm": 2.420043468475342, "learning_rate": 7.581141757536884e-07, "loss": 0.815, "step": 70235 }, { "epoch": 0.8560320768283914, "grad_norm": 2.067873239517212, "learning_rate": 7.577934573444516e-07, "loss": 0.8, "step": 70240 }, { "epoch": 0.8560930130525393, "grad_norm": 1.7564163208007812, "learning_rate": 7.574727389352148e-07, "loss": 0.8365, "step": 70245 }, { "epoch": 0.856153949276687, "grad_norm": 1.6514571905136108, "learning_rate": 7.571520205259783e-07, "loss": 0.7388, "step": 70250 }, { "epoch": 0.8562148855008348, "grad_norm": 1.8321137428283691, "learning_rate": 7.568313021167416e-07, "loss": 0.8411, "step": 70255 }, { "epoch": 0.8562758217249826, "grad_norm": 2.0742409229278564, "learning_rate": 7.565105837075048e-07, "loss": 0.8156, "step": 70260 }, { "epoch": 0.8563367579491304, "grad_norm": 2.7492833137512207, "learning_rate": 7.561898652982681e-07, "loss": 0.8096, "step": 70265 }, { "epoch": 0.8563976941732783, "grad_norm": 2.0900328159332275, "learning_rate": 7.558691468890315e-07, "loss": 0.8145, "step": 70270 }, { "epoch": 0.8564586303974261, "grad_norm": 1.8852930068969727, "learning_rate": 7.555484284797949e-07, "loss": 0.8267, "step": 70275 }, { "epoch": 0.8565195666215739, "grad_norm": 2.3154966831207275, "learning_rate": 7.552277100705581e-07, "loss": 0.7684, "step": 70280 }, { "epoch": 0.8565805028457216, "grad_norm": 2.1081619262695312, "learning_rate": 7.549069916613214e-07, "loss": 0.8113, "step": 70285 }, { "epoch": 0.8566414390698694, "grad_norm": 1.7434113025665283, "learning_rate": 7.545862732520847e-07, "loss": 0.8019, "step": 70290 }, { "epoch": 0.8567023752940173, "grad_norm": 1.8681436777114868, "learning_rate": 7.542655548428481e-07, "loss": 0.8301, "step": 70295 }, { "epoch": 0.8567633115181651, "grad_norm": 2.1066441535949707, "learning_rate": 7.539448364336113e-07, "loss": 0.799, "step": 70300 }, { "epoch": 0.8568242477423129, "grad_norm": 2.1185805797576904, "learning_rate": 7.536241180243746e-07, "loss": 0.7533, "step": 70305 }, { "epoch": 0.8568851839664607, "grad_norm": 1.9476484060287476, "learning_rate": 7.53303399615138e-07, "loss": 0.8309, "step": 70310 }, { "epoch": 0.8569461201906086, "grad_norm": 2.4478724002838135, "learning_rate": 7.529826812059013e-07, "loss": 0.8896, "step": 70315 }, { "epoch": 0.8570070564147563, "grad_norm": 1.9200612306594849, "learning_rate": 7.526619627966645e-07, "loss": 0.784, "step": 70320 }, { "epoch": 0.8570679926389041, "grad_norm": 2.494607925415039, "learning_rate": 7.523412443874279e-07, "loss": 0.8068, "step": 70325 }, { "epoch": 0.8571289288630519, "grad_norm": 1.7195050716400146, "learning_rate": 7.520205259781912e-07, "loss": 0.7955, "step": 70330 }, { "epoch": 0.8571898650871997, "grad_norm": 1.9962868690490723, "learning_rate": 7.516998075689545e-07, "loss": 0.7835, "step": 70335 }, { "epoch": 0.8572508013113476, "grad_norm": 1.8306218385696411, "learning_rate": 7.513790891597178e-07, "loss": 0.7341, "step": 70340 }, { "epoch": 0.8573117375354954, "grad_norm": 1.9107742309570312, "learning_rate": 7.510583707504811e-07, "loss": 0.8369, "step": 70345 }, { "epoch": 0.8573726737596432, "grad_norm": 2.175208806991577, "learning_rate": 7.507376523412444e-07, "loss": 0.7883, "step": 70350 }, { "epoch": 0.8574336099837909, "grad_norm": 1.8961596488952637, "learning_rate": 7.504169339320078e-07, "loss": 0.8381, "step": 70355 }, { "epoch": 0.8574945462079387, "grad_norm": 2.0647952556610107, "learning_rate": 7.50096215522771e-07, "loss": 0.7622, "step": 70360 }, { "epoch": 0.8575554824320866, "grad_norm": 1.571718692779541, "learning_rate": 7.497754971135343e-07, "loss": 0.7407, "step": 70365 }, { "epoch": 0.8576164186562344, "grad_norm": 1.8839623928070068, "learning_rate": 7.494547787042977e-07, "loss": 0.7602, "step": 70370 }, { "epoch": 0.8576773548803822, "grad_norm": 1.9915778636932373, "learning_rate": 7.49134060295061e-07, "loss": 0.8891, "step": 70375 }, { "epoch": 0.85773829110453, "grad_norm": 1.94113028049469, "learning_rate": 7.488133418858242e-07, "loss": 0.8793, "step": 70380 }, { "epoch": 0.8577992273286779, "grad_norm": 1.8339481353759766, "learning_rate": 7.484926234765876e-07, "loss": 0.7704, "step": 70385 }, { "epoch": 0.8578601635528256, "grad_norm": 2.065511703491211, "learning_rate": 7.48171905067351e-07, "loss": 0.8548, "step": 70390 }, { "epoch": 0.8579210997769734, "grad_norm": 1.835667610168457, "learning_rate": 7.478511866581142e-07, "loss": 0.8788, "step": 70395 }, { "epoch": 0.8579820360011212, "grad_norm": 1.8499494791030884, "learning_rate": 7.475304682488775e-07, "loss": 0.7751, "step": 70400 }, { "epoch": 0.858042972225269, "grad_norm": 2.414706230163574, "learning_rate": 7.472097498396409e-07, "loss": 0.8261, "step": 70405 }, { "epoch": 0.8581039084494169, "grad_norm": 2.323258876800537, "learning_rate": 7.468890314304042e-07, "loss": 0.8675, "step": 70410 }, { "epoch": 0.8581648446735647, "grad_norm": 1.8760510683059692, "learning_rate": 7.465683130211675e-07, "loss": 0.8458, "step": 70415 }, { "epoch": 0.8582257808977125, "grad_norm": 2.064218044281006, "learning_rate": 7.462475946119307e-07, "loss": 0.7944, "step": 70420 }, { "epoch": 0.8582867171218602, "grad_norm": 2.1656203269958496, "learning_rate": 7.459268762026941e-07, "loss": 0.8805, "step": 70425 }, { "epoch": 0.858347653346008, "grad_norm": 2.166534185409546, "learning_rate": 7.456061577934575e-07, "loss": 0.7666, "step": 70430 }, { "epoch": 0.8584085895701559, "grad_norm": 2.245722770690918, "learning_rate": 7.452854393842207e-07, "loss": 0.8082, "step": 70435 }, { "epoch": 0.8584695257943037, "grad_norm": 1.676995873451233, "learning_rate": 7.449647209749839e-07, "loss": 0.8024, "step": 70440 }, { "epoch": 0.8585304620184515, "grad_norm": 1.9576231241226196, "learning_rate": 7.446440025657474e-07, "loss": 0.8538, "step": 70445 }, { "epoch": 0.8585913982425993, "grad_norm": 1.555679202079773, "learning_rate": 7.443232841565107e-07, "loss": 0.7823, "step": 70450 }, { "epoch": 0.858652334466747, "grad_norm": 1.749032974243164, "learning_rate": 7.440025657472739e-07, "loss": 0.8352, "step": 70455 }, { "epoch": 0.8587132706908949, "grad_norm": 2.1578903198242188, "learning_rate": 7.436818473380373e-07, "loss": 0.8145, "step": 70460 }, { "epoch": 0.8587742069150427, "grad_norm": 2.2119078636169434, "learning_rate": 7.433611289288006e-07, "loss": 0.8663, "step": 70465 }, { "epoch": 0.8588351431391905, "grad_norm": 2.239483118057251, "learning_rate": 7.430404105195639e-07, "loss": 0.7903, "step": 70470 }, { "epoch": 0.8588960793633383, "grad_norm": 2.1976981163024902, "learning_rate": 7.427196921103272e-07, "loss": 0.882, "step": 70475 }, { "epoch": 0.8589570155874862, "grad_norm": 2.2164392471313477, "learning_rate": 7.423989737010905e-07, "loss": 0.842, "step": 70480 }, { "epoch": 0.859017951811634, "grad_norm": 1.9851648807525635, "learning_rate": 7.420782552918538e-07, "loss": 0.7149, "step": 70485 }, { "epoch": 0.8590788880357817, "grad_norm": 2.5546927452087402, "learning_rate": 7.417575368826172e-07, "loss": 0.8379, "step": 70490 }, { "epoch": 0.8591398242599295, "grad_norm": 1.952141284942627, "learning_rate": 7.414368184733804e-07, "loss": 0.7427, "step": 70495 }, { "epoch": 0.8592007604840773, "grad_norm": 1.7906264066696167, "learning_rate": 7.411161000641437e-07, "loss": 0.8083, "step": 70500 }, { "epoch": 0.8592616967082252, "grad_norm": 2.025062322616577, "learning_rate": 7.407953816549071e-07, "loss": 0.8031, "step": 70505 }, { "epoch": 0.859322632932373, "grad_norm": 1.6667003631591797, "learning_rate": 7.404746632456704e-07, "loss": 0.8109, "step": 70510 }, { "epoch": 0.8593835691565208, "grad_norm": 2.7961432933807373, "learning_rate": 7.401539448364336e-07, "loss": 0.8055, "step": 70515 }, { "epoch": 0.8594445053806686, "grad_norm": 1.915589690208435, "learning_rate": 7.39833226427197e-07, "loss": 0.8177, "step": 70520 }, { "epoch": 0.8595054416048163, "grad_norm": 2.350184917449951, "learning_rate": 7.395125080179603e-07, "loss": 0.7543, "step": 70525 }, { "epoch": 0.8595663778289642, "grad_norm": 2.0349042415618896, "learning_rate": 7.391917896087236e-07, "loss": 0.8299, "step": 70530 }, { "epoch": 0.859627314053112, "grad_norm": 2.0577404499053955, "learning_rate": 7.388710711994869e-07, "loss": 0.834, "step": 70535 }, { "epoch": 0.8596882502772598, "grad_norm": 1.7646995782852173, "learning_rate": 7.385503527902502e-07, "loss": 0.8201, "step": 70540 }, { "epoch": 0.8597491865014076, "grad_norm": 2.0503225326538086, "learning_rate": 7.382296343810135e-07, "loss": 0.8866, "step": 70545 }, { "epoch": 0.8598101227255555, "grad_norm": 2.1568377017974854, "learning_rate": 7.379089159717769e-07, "loss": 0.8145, "step": 70550 }, { "epoch": 0.8598710589497033, "grad_norm": 1.8331724405288696, "learning_rate": 7.375881975625401e-07, "loss": 0.7561, "step": 70555 }, { "epoch": 0.859931995173851, "grad_norm": 1.891348123550415, "learning_rate": 7.372674791533034e-07, "loss": 0.8596, "step": 70560 }, { "epoch": 0.8599929313979988, "grad_norm": 1.7203577756881714, "learning_rate": 7.369467607440669e-07, "loss": 0.8249, "step": 70565 }, { "epoch": 0.8600538676221466, "grad_norm": 2.1096603870391846, "learning_rate": 7.366260423348301e-07, "loss": 0.7661, "step": 70570 }, { "epoch": 0.8601148038462945, "grad_norm": 1.9439276456832886, "learning_rate": 7.363053239255933e-07, "loss": 0.7957, "step": 70575 }, { "epoch": 0.8601757400704423, "grad_norm": 1.7599852085113525, "learning_rate": 7.359846055163567e-07, "loss": 0.8334, "step": 70580 }, { "epoch": 0.8602366762945901, "grad_norm": 2.0728888511657715, "learning_rate": 7.356638871071201e-07, "loss": 0.7296, "step": 70585 }, { "epoch": 0.8602976125187379, "grad_norm": 2.631971836090088, "learning_rate": 7.353431686978833e-07, "loss": 0.858, "step": 70590 }, { "epoch": 0.8603585487428856, "grad_norm": 1.889606237411499, "learning_rate": 7.350224502886466e-07, "loss": 0.8392, "step": 70595 }, { "epoch": 0.8604194849670335, "grad_norm": 2.1776139736175537, "learning_rate": 7.347017318794099e-07, "loss": 0.8338, "step": 70600 }, { "epoch": 0.8604804211911813, "grad_norm": 2.0330970287323, "learning_rate": 7.343810134701733e-07, "loss": 0.8173, "step": 70605 }, { "epoch": 0.8605413574153291, "grad_norm": 2.178025722503662, "learning_rate": 7.340602950609366e-07, "loss": 0.7936, "step": 70610 }, { "epoch": 0.8606022936394769, "grad_norm": 1.9601625204086304, "learning_rate": 7.337395766516998e-07, "loss": 0.8149, "step": 70615 }, { "epoch": 0.8606632298636248, "grad_norm": 1.6745545864105225, "learning_rate": 7.334188582424631e-07, "loss": 0.8313, "step": 70620 }, { "epoch": 0.8607241660877726, "grad_norm": 1.8419814109802246, "learning_rate": 7.330981398332266e-07, "loss": 0.7256, "step": 70625 }, { "epoch": 0.8607851023119203, "grad_norm": 2.0108611583709717, "learning_rate": 7.327774214239898e-07, "loss": 0.7552, "step": 70630 }, { "epoch": 0.8608460385360681, "grad_norm": 2.1829702854156494, "learning_rate": 7.324567030147531e-07, "loss": 0.8509, "step": 70635 }, { "epoch": 0.8609069747602159, "grad_norm": 2.001389265060425, "learning_rate": 7.321359846055164e-07, "loss": 0.8978, "step": 70640 }, { "epoch": 0.8609679109843638, "grad_norm": 1.8811196088790894, "learning_rate": 7.318152661962798e-07, "loss": 0.8227, "step": 70645 }, { "epoch": 0.8610288472085116, "grad_norm": 1.8927079439163208, "learning_rate": 7.31494547787043e-07, "loss": 0.842, "step": 70650 }, { "epoch": 0.8610897834326594, "grad_norm": 1.7973095178604126, "learning_rate": 7.311738293778064e-07, "loss": 0.8947, "step": 70655 }, { "epoch": 0.8611507196568072, "grad_norm": 1.9454225301742554, "learning_rate": 7.308531109685696e-07, "loss": 0.8188, "step": 70660 }, { "epoch": 0.8612116558809549, "grad_norm": 2.494520664215088, "learning_rate": 7.30532392559333e-07, "loss": 0.7497, "step": 70665 }, { "epoch": 0.8612725921051028, "grad_norm": 2.19750714302063, "learning_rate": 7.302116741500963e-07, "loss": 0.807, "step": 70670 }, { "epoch": 0.8613335283292506, "grad_norm": 2.107245445251465, "learning_rate": 7.298909557408596e-07, "loss": 0.7737, "step": 70675 }, { "epoch": 0.8613944645533984, "grad_norm": 1.8878147602081299, "learning_rate": 7.295702373316228e-07, "loss": 0.7286, "step": 70680 }, { "epoch": 0.8614554007775462, "grad_norm": 2.05119252204895, "learning_rate": 7.292495189223863e-07, "loss": 0.7894, "step": 70685 }, { "epoch": 0.861516337001694, "grad_norm": 1.9070971012115479, "learning_rate": 7.289288005131495e-07, "loss": 0.8197, "step": 70690 }, { "epoch": 0.8615772732258419, "grad_norm": 1.7018247842788696, "learning_rate": 7.286080821039128e-07, "loss": 0.759, "step": 70695 }, { "epoch": 0.8616382094499896, "grad_norm": 1.992620587348938, "learning_rate": 7.282873636946762e-07, "loss": 0.7286, "step": 70700 }, { "epoch": 0.8616991456741374, "grad_norm": 1.8814443349838257, "learning_rate": 7.279666452854395e-07, "loss": 0.7814, "step": 70705 }, { "epoch": 0.8617600818982852, "grad_norm": 2.8199026584625244, "learning_rate": 7.276459268762027e-07, "loss": 0.8159, "step": 70710 }, { "epoch": 0.8618210181224331, "grad_norm": 1.7089111804962158, "learning_rate": 7.273252084669661e-07, "loss": 0.8943, "step": 70715 }, { "epoch": 0.8618819543465809, "grad_norm": 2.0652599334716797, "learning_rate": 7.270044900577294e-07, "loss": 0.7979, "step": 70720 }, { "epoch": 0.8619428905707287, "grad_norm": 1.783717155456543, "learning_rate": 7.266837716484927e-07, "loss": 0.8207, "step": 70725 }, { "epoch": 0.8620038267948765, "grad_norm": 1.8452359437942505, "learning_rate": 7.26363053239256e-07, "loss": 0.768, "step": 70730 }, { "epoch": 0.8620647630190242, "grad_norm": 1.9692938327789307, "learning_rate": 7.260423348300193e-07, "loss": 0.8689, "step": 70735 }, { "epoch": 0.8621256992431721, "grad_norm": 1.9743680953979492, "learning_rate": 7.257216164207826e-07, "loss": 0.8652, "step": 70740 }, { "epoch": 0.8621866354673199, "grad_norm": 1.994480013847351, "learning_rate": 7.25400898011546e-07, "loss": 0.8001, "step": 70745 }, { "epoch": 0.8622475716914677, "grad_norm": 1.7803987264633179, "learning_rate": 7.250801796023092e-07, "loss": 0.6447, "step": 70750 }, { "epoch": 0.8623085079156155, "grad_norm": 2.1394975185394287, "learning_rate": 7.247594611930725e-07, "loss": 0.7975, "step": 70755 }, { "epoch": 0.8623694441397634, "grad_norm": 1.9841350317001343, "learning_rate": 7.24438742783836e-07, "loss": 0.8247, "step": 70760 }, { "epoch": 0.8624303803639112, "grad_norm": 1.6317217350006104, "learning_rate": 7.241180243745992e-07, "loss": 0.8605, "step": 70765 }, { "epoch": 0.8624913165880589, "grad_norm": 2.0467050075531006, "learning_rate": 7.237973059653624e-07, "loss": 0.7933, "step": 70770 }, { "epoch": 0.8625522528122067, "grad_norm": 1.758987545967102, "learning_rate": 7.234765875561258e-07, "loss": 0.7034, "step": 70775 }, { "epoch": 0.8626131890363545, "grad_norm": 2.034043788909912, "learning_rate": 7.231558691468892e-07, "loss": 0.8, "step": 70780 }, { "epoch": 0.8626741252605024, "grad_norm": 1.7962247133255005, "learning_rate": 7.228351507376524e-07, "loss": 0.8987, "step": 70785 }, { "epoch": 0.8627350614846502, "grad_norm": 1.8471996784210205, "learning_rate": 7.225144323284157e-07, "loss": 0.7654, "step": 70790 }, { "epoch": 0.862795997708798, "grad_norm": 2.5702662467956543, "learning_rate": 7.22193713919179e-07, "loss": 0.8147, "step": 70795 }, { "epoch": 0.8628569339329458, "grad_norm": 1.8656373023986816, "learning_rate": 7.218729955099424e-07, "loss": 0.7793, "step": 70800 }, { "epoch": 0.8629178701570935, "grad_norm": 2.0936388969421387, "learning_rate": 7.215522771007057e-07, "loss": 0.762, "step": 70805 }, { "epoch": 0.8629788063812414, "grad_norm": 2.1238155364990234, "learning_rate": 7.21231558691469e-07, "loss": 0.8242, "step": 70810 }, { "epoch": 0.8630397426053892, "grad_norm": 1.7561010122299194, "learning_rate": 7.209108402822322e-07, "loss": 0.823, "step": 70815 }, { "epoch": 0.863100678829537, "grad_norm": 2.2325868606567383, "learning_rate": 7.205901218729957e-07, "loss": 0.8949, "step": 70820 }, { "epoch": 0.8631616150536848, "grad_norm": 1.947174310684204, "learning_rate": 7.202694034637589e-07, "loss": 0.8289, "step": 70825 }, { "epoch": 0.8632225512778327, "grad_norm": 2.266702651977539, "learning_rate": 7.199486850545222e-07, "loss": 0.8417, "step": 70830 }, { "epoch": 0.8632834875019805, "grad_norm": 1.8961446285247803, "learning_rate": 7.196279666452854e-07, "loss": 0.8157, "step": 70835 }, { "epoch": 0.8633444237261282, "grad_norm": 2.1389992237091064, "learning_rate": 7.193072482360489e-07, "loss": 0.8164, "step": 70840 }, { "epoch": 0.863405359950276, "grad_norm": 1.7368522882461548, "learning_rate": 7.189865298268121e-07, "loss": 0.7451, "step": 70845 }, { "epoch": 0.8634662961744238, "grad_norm": 1.8477303981781006, "learning_rate": 7.186658114175755e-07, "loss": 0.8289, "step": 70850 }, { "epoch": 0.8635272323985717, "grad_norm": 2.2531816959381104, "learning_rate": 7.183450930083387e-07, "loss": 0.7852, "step": 70855 }, { "epoch": 0.8635881686227195, "grad_norm": 2.024812936782837, "learning_rate": 7.180243745991021e-07, "loss": 0.8439, "step": 70860 }, { "epoch": 0.8636491048468673, "grad_norm": 1.8892313241958618, "learning_rate": 7.177036561898653e-07, "loss": 0.8238, "step": 70865 }, { "epoch": 0.8637100410710151, "grad_norm": 2.087918996810913, "learning_rate": 7.173829377806287e-07, "loss": 0.846, "step": 70870 }, { "epoch": 0.8637709772951628, "grad_norm": 1.8067747354507446, "learning_rate": 7.170622193713919e-07, "loss": 0.7778, "step": 70875 }, { "epoch": 0.8638319135193107, "grad_norm": 2.2249691486358643, "learning_rate": 7.167415009621554e-07, "loss": 0.7856, "step": 70880 }, { "epoch": 0.8638928497434585, "grad_norm": 1.9080876111984253, "learning_rate": 7.164207825529186e-07, "loss": 0.82, "step": 70885 }, { "epoch": 0.8639537859676063, "grad_norm": 2.031740427017212, "learning_rate": 7.161000641436819e-07, "loss": 0.7916, "step": 70890 }, { "epoch": 0.8640147221917541, "grad_norm": 2.1081430912017822, "learning_rate": 7.157793457344451e-07, "loss": 0.8408, "step": 70895 }, { "epoch": 0.864075658415902, "grad_norm": 1.9026962518692017, "learning_rate": 7.154586273252086e-07, "loss": 0.7787, "step": 70900 }, { "epoch": 0.8641365946400498, "grad_norm": 2.08203125, "learning_rate": 7.151379089159718e-07, "loss": 0.8293, "step": 70905 }, { "epoch": 0.8641975308641975, "grad_norm": 2.08620548248291, "learning_rate": 7.148171905067351e-07, "loss": 0.841, "step": 70910 }, { "epoch": 0.8642584670883453, "grad_norm": 2.153424024581909, "learning_rate": 7.144964720974984e-07, "loss": 0.7962, "step": 70915 }, { "epoch": 0.8643194033124931, "grad_norm": 1.865575909614563, "learning_rate": 7.141757536882618e-07, "loss": 0.772, "step": 70920 }, { "epoch": 0.864380339536641, "grad_norm": 1.9189071655273438, "learning_rate": 7.13855035279025e-07, "loss": 0.8159, "step": 70925 }, { "epoch": 0.8644412757607888, "grad_norm": 2.1859290599823, "learning_rate": 7.135343168697884e-07, "loss": 0.7951, "step": 70930 }, { "epoch": 0.8645022119849366, "grad_norm": 2.0486342906951904, "learning_rate": 7.132135984605516e-07, "loss": 0.7987, "step": 70935 }, { "epoch": 0.8645631482090844, "grad_norm": 1.799006700515747, "learning_rate": 7.12892880051315e-07, "loss": 0.7451, "step": 70940 }, { "epoch": 0.8646240844332321, "grad_norm": 2.217620849609375, "learning_rate": 7.125721616420783e-07, "loss": 0.7914, "step": 70945 }, { "epoch": 0.86468502065738, "grad_norm": 2.166226625442505, "learning_rate": 7.122514432328416e-07, "loss": 0.7587, "step": 70950 }, { "epoch": 0.8647459568815278, "grad_norm": 2.2139055728912354, "learning_rate": 7.119307248236048e-07, "loss": 0.7503, "step": 70955 }, { "epoch": 0.8648068931056756, "grad_norm": 1.6505467891693115, "learning_rate": 7.116100064143683e-07, "loss": 0.7773, "step": 70960 }, { "epoch": 0.8648678293298234, "grad_norm": 2.0759663581848145, "learning_rate": 7.112892880051315e-07, "loss": 0.7951, "step": 70965 }, { "epoch": 0.8649287655539712, "grad_norm": 2.0964598655700684, "learning_rate": 7.109685695958948e-07, "loss": 0.8347, "step": 70970 }, { "epoch": 0.8649897017781191, "grad_norm": 1.8427464962005615, "learning_rate": 7.106478511866581e-07, "loss": 0.7698, "step": 70975 }, { "epoch": 0.8650506380022668, "grad_norm": 2.502978801727295, "learning_rate": 7.103271327774215e-07, "loss": 0.8204, "step": 70980 }, { "epoch": 0.8651115742264146, "grad_norm": 2.2077932357788086, "learning_rate": 7.100064143681848e-07, "loss": 0.7819, "step": 70985 }, { "epoch": 0.8651725104505624, "grad_norm": 1.7244858741760254, "learning_rate": 7.096856959589481e-07, "loss": 0.8342, "step": 70990 }, { "epoch": 0.8652334466747102, "grad_norm": 1.8060733079910278, "learning_rate": 7.093649775497113e-07, "loss": 0.7911, "step": 70995 }, { "epoch": 0.8652943828988581, "grad_norm": 2.113840341567993, "learning_rate": 7.090442591404747e-07, "loss": 0.7525, "step": 71000 }, { "epoch": 0.8653553191230059, "grad_norm": 2.186879873275757, "learning_rate": 7.087235407312381e-07, "loss": 0.7941, "step": 71005 }, { "epoch": 0.8654162553471537, "grad_norm": 1.974298357963562, "learning_rate": 7.084028223220013e-07, "loss": 0.8437, "step": 71010 }, { "epoch": 0.8654771915713014, "grad_norm": 2.4682891368865967, "learning_rate": 7.080821039127647e-07, "loss": 0.8103, "step": 71015 }, { "epoch": 0.8655381277954493, "grad_norm": 1.827484369277954, "learning_rate": 7.07761385503528e-07, "loss": 0.8067, "step": 71020 }, { "epoch": 0.8655990640195971, "grad_norm": 2.011186361312866, "learning_rate": 7.074406670942913e-07, "loss": 0.8419, "step": 71025 }, { "epoch": 0.8656600002437449, "grad_norm": 1.7079309225082397, "learning_rate": 7.071199486850545e-07, "loss": 0.7635, "step": 71030 }, { "epoch": 0.8657209364678927, "grad_norm": 2.3769543170928955, "learning_rate": 7.06799230275818e-07, "loss": 0.8186, "step": 71035 }, { "epoch": 0.8657818726920405, "grad_norm": 2.2388410568237305, "learning_rate": 7.064785118665812e-07, "loss": 0.797, "step": 71040 }, { "epoch": 0.8658428089161884, "grad_norm": 2.1562068462371826, "learning_rate": 7.061577934573445e-07, "loss": 0.8248, "step": 71045 }, { "epoch": 0.8659037451403361, "grad_norm": 2.0103225708007812, "learning_rate": 7.058370750481078e-07, "loss": 0.7662, "step": 71050 }, { "epoch": 0.8659646813644839, "grad_norm": 1.8920881748199463, "learning_rate": 7.055163566388712e-07, "loss": 0.818, "step": 71055 }, { "epoch": 0.8660256175886317, "grad_norm": 2.0327816009521484, "learning_rate": 7.051956382296344e-07, "loss": 0.8038, "step": 71060 }, { "epoch": 0.8660865538127795, "grad_norm": 1.9424118995666504, "learning_rate": 7.048749198203978e-07, "loss": 0.8266, "step": 71065 }, { "epoch": 0.8661474900369274, "grad_norm": 2.680391550064087, "learning_rate": 7.04554201411161e-07, "loss": 0.7862, "step": 71070 }, { "epoch": 0.8662084262610752, "grad_norm": 1.7177807092666626, "learning_rate": 7.042334830019244e-07, "loss": 0.7665, "step": 71075 }, { "epoch": 0.866269362485223, "grad_norm": 1.7992568016052246, "learning_rate": 7.039127645926877e-07, "loss": 0.8286, "step": 71080 }, { "epoch": 0.8663302987093707, "grad_norm": 2.1843597888946533, "learning_rate": 7.03592046183451e-07, "loss": 0.7942, "step": 71085 }, { "epoch": 0.8663912349335186, "grad_norm": 2.5068020820617676, "learning_rate": 7.032713277742142e-07, "loss": 0.8134, "step": 71090 }, { "epoch": 0.8664521711576664, "grad_norm": 1.6637498140335083, "learning_rate": 7.029506093649777e-07, "loss": 0.8619, "step": 71095 }, { "epoch": 0.8665131073818142, "grad_norm": 1.9550281763076782, "learning_rate": 7.026298909557409e-07, "loss": 0.8121, "step": 71100 }, { "epoch": 0.866574043605962, "grad_norm": 1.7949578762054443, "learning_rate": 7.023091725465042e-07, "loss": 0.8227, "step": 71105 }, { "epoch": 0.8666349798301098, "grad_norm": 2.017909288406372, "learning_rate": 7.019884541372675e-07, "loss": 0.7334, "step": 71110 }, { "epoch": 0.8666959160542577, "grad_norm": 1.8546042442321777, "learning_rate": 7.016677357280309e-07, "loss": 0.7907, "step": 71115 }, { "epoch": 0.8667568522784054, "grad_norm": 2.2737224102020264, "learning_rate": 7.013470173187941e-07, "loss": 0.8224, "step": 71120 }, { "epoch": 0.8668177885025532, "grad_norm": 2.2056422233581543, "learning_rate": 7.010262989095575e-07, "loss": 0.8063, "step": 71125 }, { "epoch": 0.866878724726701, "grad_norm": 2.1913228034973145, "learning_rate": 7.007055805003207e-07, "loss": 0.8299, "step": 71130 }, { "epoch": 0.8669396609508488, "grad_norm": 1.5930196046829224, "learning_rate": 7.003848620910841e-07, "loss": 0.7609, "step": 71135 }, { "epoch": 0.8670005971749967, "grad_norm": 1.7973047494888306, "learning_rate": 7.000641436818474e-07, "loss": 0.7667, "step": 71140 }, { "epoch": 0.8670615333991445, "grad_norm": 1.942455530166626, "learning_rate": 6.997434252726107e-07, "loss": 0.8257, "step": 71145 }, { "epoch": 0.8671224696232923, "grad_norm": 2.3835957050323486, "learning_rate": 6.994227068633739e-07, "loss": 0.8156, "step": 71150 }, { "epoch": 0.86718340584744, "grad_norm": 2.345191240310669, "learning_rate": 6.991019884541374e-07, "loss": 0.7244, "step": 71155 }, { "epoch": 0.8672443420715878, "grad_norm": 2.386852741241455, "learning_rate": 6.987812700449007e-07, "loss": 0.8102, "step": 71160 }, { "epoch": 0.8673052782957357, "grad_norm": 1.7816725969314575, "learning_rate": 6.984605516356639e-07, "loss": 0.8024, "step": 71165 }, { "epoch": 0.8673662145198835, "grad_norm": 2.120985507965088, "learning_rate": 6.981398332264272e-07, "loss": 0.8033, "step": 71170 }, { "epoch": 0.8674271507440313, "grad_norm": 1.9519654512405396, "learning_rate": 6.978191148171906e-07, "loss": 0.8968, "step": 71175 }, { "epoch": 0.8674880869681791, "grad_norm": 1.5588666200637817, "learning_rate": 6.974983964079539e-07, "loss": 0.7677, "step": 71180 }, { "epoch": 0.867549023192327, "grad_norm": 2.065495491027832, "learning_rate": 6.971776779987172e-07, "loss": 0.7566, "step": 71185 }, { "epoch": 0.8676099594164747, "grad_norm": 2.226557731628418, "learning_rate": 6.968569595894804e-07, "loss": 0.7873, "step": 71190 }, { "epoch": 0.8676708956406225, "grad_norm": 2.144517421722412, "learning_rate": 6.965362411802438e-07, "loss": 0.8163, "step": 71195 }, { "epoch": 0.8677318318647703, "grad_norm": 2.0307650566101074, "learning_rate": 6.962155227710072e-07, "loss": 0.7793, "step": 71200 }, { "epoch": 0.8677927680889181, "grad_norm": 1.893210530281067, "learning_rate": 6.958948043617704e-07, "loss": 0.8415, "step": 71205 }, { "epoch": 0.867853704313066, "grad_norm": 2.281102418899536, "learning_rate": 6.955740859525336e-07, "loss": 0.827, "step": 71210 }, { "epoch": 0.8679146405372138, "grad_norm": 1.8456851243972778, "learning_rate": 6.952533675432971e-07, "loss": 0.7779, "step": 71215 }, { "epoch": 0.8679755767613616, "grad_norm": 1.8861068487167358, "learning_rate": 6.949326491340604e-07, "loss": 0.749, "step": 71220 }, { "epoch": 0.8680365129855093, "grad_norm": 2.867901563644409, "learning_rate": 6.946119307248236e-07, "loss": 0.8927, "step": 71225 }, { "epoch": 0.8680974492096571, "grad_norm": 1.7538310289382935, "learning_rate": 6.94291212315587e-07, "loss": 0.759, "step": 71230 }, { "epoch": 0.868158385433805, "grad_norm": 2.200620174407959, "learning_rate": 6.939704939063503e-07, "loss": 0.7938, "step": 71235 }, { "epoch": 0.8682193216579528, "grad_norm": 1.6956284046173096, "learning_rate": 6.936497754971136e-07, "loss": 0.8295, "step": 71240 }, { "epoch": 0.8682802578821006, "grad_norm": 2.0418989658355713, "learning_rate": 6.933290570878769e-07, "loss": 0.7007, "step": 71245 }, { "epoch": 0.8683411941062484, "grad_norm": 1.6186078786849976, "learning_rate": 6.930083386786402e-07, "loss": 0.8036, "step": 71250 }, { "epoch": 0.8684021303303963, "grad_norm": 2.2740650177001953, "learning_rate": 6.926876202694035e-07, "loss": 0.8318, "step": 71255 }, { "epoch": 0.868463066554544, "grad_norm": 1.5630269050598145, "learning_rate": 6.923669018601669e-07, "loss": 0.8705, "step": 71260 }, { "epoch": 0.8685240027786918, "grad_norm": 2.3562533855438232, "learning_rate": 6.920461834509301e-07, "loss": 0.761, "step": 71265 }, { "epoch": 0.8685849390028396, "grad_norm": 2.13140869140625, "learning_rate": 6.917254650416934e-07, "loss": 0.8787, "step": 71270 }, { "epoch": 0.8686458752269874, "grad_norm": 1.8849787712097168, "learning_rate": 6.914047466324568e-07, "loss": 0.831, "step": 71275 }, { "epoch": 0.8687068114511353, "grad_norm": 2.095686674118042, "learning_rate": 6.910840282232201e-07, "loss": 0.857, "step": 71280 }, { "epoch": 0.8687677476752831, "grad_norm": 2.0969398021698, "learning_rate": 6.907633098139833e-07, "loss": 0.8022, "step": 71285 }, { "epoch": 0.8688286838994309, "grad_norm": 2.0436487197875977, "learning_rate": 6.904425914047467e-07, "loss": 0.7376, "step": 71290 }, { "epoch": 0.8688896201235786, "grad_norm": 2.7064688205718994, "learning_rate": 6.9012187299551e-07, "loss": 0.8178, "step": 71295 }, { "epoch": 0.8689505563477264, "grad_norm": 1.9280873537063599, "learning_rate": 6.898011545862733e-07, "loss": 0.8199, "step": 71300 }, { "epoch": 0.8690114925718743, "grad_norm": 2.0611555576324463, "learning_rate": 6.894804361770366e-07, "loss": 0.8268, "step": 71305 }, { "epoch": 0.8690724287960221, "grad_norm": 2.0557734966278076, "learning_rate": 6.891597177678e-07, "loss": 0.7997, "step": 71310 }, { "epoch": 0.8691333650201699, "grad_norm": 2.033822536468506, "learning_rate": 6.888389993585632e-07, "loss": 0.8391, "step": 71315 }, { "epoch": 0.8691943012443177, "grad_norm": 1.7737202644348145, "learning_rate": 6.885182809493266e-07, "loss": 0.7933, "step": 71320 }, { "epoch": 0.8692552374684656, "grad_norm": 1.913062334060669, "learning_rate": 6.881975625400898e-07, "loss": 0.828, "step": 71325 }, { "epoch": 0.8693161736926133, "grad_norm": 2.44545316696167, "learning_rate": 6.878768441308532e-07, "loss": 0.8487, "step": 71330 }, { "epoch": 0.8693771099167611, "grad_norm": 1.758527159690857, "learning_rate": 6.875561257216166e-07, "loss": 0.7997, "step": 71335 }, { "epoch": 0.8694380461409089, "grad_norm": 2.039684295654297, "learning_rate": 6.872354073123798e-07, "loss": 0.8286, "step": 71340 }, { "epoch": 0.8694989823650567, "grad_norm": 1.9576083421707153, "learning_rate": 6.86914688903143e-07, "loss": 0.8249, "step": 71345 }, { "epoch": 0.8695599185892046, "grad_norm": 1.9700603485107422, "learning_rate": 6.865939704939065e-07, "loss": 0.7655, "step": 71350 }, { "epoch": 0.8696208548133524, "grad_norm": 1.8862180709838867, "learning_rate": 6.862732520846698e-07, "loss": 0.8018, "step": 71355 }, { "epoch": 0.8696817910375002, "grad_norm": 2.175936698913574, "learning_rate": 6.85952533675433e-07, "loss": 0.861, "step": 71360 }, { "epoch": 0.8697427272616479, "grad_norm": 1.8382468223571777, "learning_rate": 6.856318152661962e-07, "loss": 0.7903, "step": 71365 }, { "epoch": 0.8698036634857957, "grad_norm": 2.080622911453247, "learning_rate": 6.853110968569597e-07, "loss": 0.8024, "step": 71370 }, { "epoch": 0.8698645997099436, "grad_norm": 2.163198709487915, "learning_rate": 6.84990378447723e-07, "loss": 0.8999, "step": 71375 }, { "epoch": 0.8699255359340914, "grad_norm": 2.0789949893951416, "learning_rate": 6.846696600384863e-07, "loss": 0.7535, "step": 71380 }, { "epoch": 0.8699864721582392, "grad_norm": 2.194931745529175, "learning_rate": 6.843489416292495e-07, "loss": 0.8353, "step": 71385 }, { "epoch": 0.870047408382387, "grad_norm": 2.179741382598877, "learning_rate": 6.840282232200129e-07, "loss": 0.7803, "step": 71390 }, { "epoch": 0.8701083446065349, "grad_norm": 2.0552186965942383, "learning_rate": 6.837075048107763e-07, "loss": 0.8782, "step": 71395 }, { "epoch": 0.8701692808306826, "grad_norm": 2.1894423961639404, "learning_rate": 6.833867864015395e-07, "loss": 0.7939, "step": 71400 }, { "epoch": 0.8702302170548304, "grad_norm": 1.8300093412399292, "learning_rate": 6.830660679923028e-07, "loss": 0.7781, "step": 71405 }, { "epoch": 0.8702911532789782, "grad_norm": 2.316291332244873, "learning_rate": 6.827453495830662e-07, "loss": 0.736, "step": 71410 }, { "epoch": 0.870352089503126, "grad_norm": 2.2137856483459473, "learning_rate": 6.824246311738295e-07, "loss": 0.8473, "step": 71415 }, { "epoch": 0.8704130257272739, "grad_norm": 2.8253281116485596, "learning_rate": 6.821039127645927e-07, "loss": 0.8141, "step": 71420 }, { "epoch": 0.8704739619514217, "grad_norm": 2.350191593170166, "learning_rate": 6.81783194355356e-07, "loss": 0.8232, "step": 71425 }, { "epoch": 0.8705348981755694, "grad_norm": 1.7557225227355957, "learning_rate": 6.814624759461194e-07, "loss": 0.8326, "step": 71430 }, { "epoch": 0.8705958343997172, "grad_norm": 2.0152196884155273, "learning_rate": 6.811417575368827e-07, "loss": 0.8063, "step": 71435 }, { "epoch": 0.870656770623865, "grad_norm": 1.9915484189987183, "learning_rate": 6.80821039127646e-07, "loss": 0.8133, "step": 71440 }, { "epoch": 0.8707177068480129, "grad_norm": 2.045936107635498, "learning_rate": 6.805003207184093e-07, "loss": 0.85, "step": 71445 }, { "epoch": 0.8707786430721607, "grad_norm": 2.032940626144409, "learning_rate": 6.801796023091726e-07, "loss": 0.848, "step": 71450 }, { "epoch": 0.8708395792963085, "grad_norm": 1.8087339401245117, "learning_rate": 6.79858883899936e-07, "loss": 0.8823, "step": 71455 }, { "epoch": 0.8709005155204563, "grad_norm": 2.201186418533325, "learning_rate": 6.795381654906992e-07, "loss": 0.8163, "step": 71460 }, { "epoch": 0.870961451744604, "grad_norm": 1.9723302125930786, "learning_rate": 6.792174470814625e-07, "loss": 0.899, "step": 71465 }, { "epoch": 0.8710223879687519, "grad_norm": 1.9368139505386353, "learning_rate": 6.788967286722258e-07, "loss": 0.8294, "step": 71470 }, { "epoch": 0.8710833241928997, "grad_norm": 1.8289557695388794, "learning_rate": 6.785760102629892e-07, "loss": 0.7554, "step": 71475 }, { "epoch": 0.8711442604170475, "grad_norm": 2.0865941047668457, "learning_rate": 6.782552918537524e-07, "loss": 0.8779, "step": 71480 }, { "epoch": 0.8712051966411953, "grad_norm": 2.0811877250671387, "learning_rate": 6.779345734445157e-07, "loss": 0.7559, "step": 71485 }, { "epoch": 0.8712661328653432, "grad_norm": 1.9999653100967407, "learning_rate": 6.776138550352791e-07, "loss": 0.767, "step": 71490 }, { "epoch": 0.871327069089491, "grad_norm": 2.8338444232940674, "learning_rate": 6.772931366260424e-07, "loss": 0.8332, "step": 71495 }, { "epoch": 0.8713880053136387, "grad_norm": 1.7181187868118286, "learning_rate": 6.769724182168056e-07, "loss": 0.7668, "step": 71500 }, { "epoch": 0.8714489415377865, "grad_norm": 1.7954199314117432, "learning_rate": 6.76651699807569e-07, "loss": 0.7709, "step": 71505 }, { "epoch": 0.8715098777619343, "grad_norm": 1.902867078781128, "learning_rate": 6.763309813983324e-07, "loss": 0.9214, "step": 71510 }, { "epoch": 0.8715708139860822, "grad_norm": 1.945737600326538, "learning_rate": 6.760102629890956e-07, "loss": 0.7685, "step": 71515 }, { "epoch": 0.87163175021023, "grad_norm": 2.1488795280456543, "learning_rate": 6.756895445798589e-07, "loss": 0.7679, "step": 71520 }, { "epoch": 0.8716926864343778, "grad_norm": 2.1427903175354004, "learning_rate": 6.753688261706222e-07, "loss": 0.8688, "step": 71525 }, { "epoch": 0.8717536226585256, "grad_norm": 2.0919957160949707, "learning_rate": 6.750481077613856e-07, "loss": 0.7952, "step": 71530 }, { "epoch": 0.8718145588826733, "grad_norm": 2.148214101791382, "learning_rate": 6.747273893521489e-07, "loss": 0.8419, "step": 71535 }, { "epoch": 0.8718754951068212, "grad_norm": 1.868070363998413, "learning_rate": 6.744066709429121e-07, "loss": 0.8049, "step": 71540 }, { "epoch": 0.871936431330969, "grad_norm": 2.2393290996551514, "learning_rate": 6.740859525336754e-07, "loss": 0.8143, "step": 71545 }, { "epoch": 0.8719973675551168, "grad_norm": 2.0003750324249268, "learning_rate": 6.737652341244389e-07, "loss": 0.7831, "step": 71550 }, { "epoch": 0.8720583037792646, "grad_norm": 2.1223928928375244, "learning_rate": 6.734445157152021e-07, "loss": 0.809, "step": 71555 }, { "epoch": 0.8721192400034125, "grad_norm": 1.8601226806640625, "learning_rate": 6.731237973059653e-07, "loss": 0.8384, "step": 71560 }, { "epoch": 0.8721801762275603, "grad_norm": 1.873889684677124, "learning_rate": 6.728030788967287e-07, "loss": 0.843, "step": 71565 }, { "epoch": 0.872241112451708, "grad_norm": 2.0737380981445312, "learning_rate": 6.724823604874921e-07, "loss": 0.8268, "step": 71570 }, { "epoch": 0.8723020486758558, "grad_norm": 1.6050035953521729, "learning_rate": 6.721616420782553e-07, "loss": 0.7578, "step": 71575 }, { "epoch": 0.8723629849000036, "grad_norm": 1.7658042907714844, "learning_rate": 6.718409236690187e-07, "loss": 0.8002, "step": 71580 }, { "epoch": 0.8724239211241515, "grad_norm": 1.9594677686691284, "learning_rate": 6.715202052597819e-07, "loss": 0.7659, "step": 71585 }, { "epoch": 0.8724848573482993, "grad_norm": 2.024847984313965, "learning_rate": 6.711994868505453e-07, "loss": 0.8064, "step": 71590 }, { "epoch": 0.8725457935724471, "grad_norm": 1.968186855316162, "learning_rate": 6.708787684413086e-07, "loss": 0.7357, "step": 71595 }, { "epoch": 0.8726067297965949, "grad_norm": 1.7351014614105225, "learning_rate": 6.705580500320719e-07, "loss": 0.7788, "step": 71600 }, { "epoch": 0.8726676660207426, "grad_norm": 1.9431347846984863, "learning_rate": 6.702373316228352e-07, "loss": 0.7567, "step": 71605 }, { "epoch": 0.8727286022448905, "grad_norm": 1.9218502044677734, "learning_rate": 6.699166132135986e-07, "loss": 0.7564, "step": 71610 }, { "epoch": 0.8727895384690383, "grad_norm": 1.8674077987670898, "learning_rate": 6.695958948043618e-07, "loss": 0.7779, "step": 71615 }, { "epoch": 0.8728504746931861, "grad_norm": 2.0219814777374268, "learning_rate": 6.692751763951251e-07, "loss": 0.8151, "step": 71620 }, { "epoch": 0.8729114109173339, "grad_norm": 2.021202564239502, "learning_rate": 6.689544579858885e-07, "loss": 0.7809, "step": 71625 }, { "epoch": 0.8729723471414818, "grad_norm": 1.857548475265503, "learning_rate": 6.686337395766518e-07, "loss": 0.839, "step": 71630 }, { "epoch": 0.8730332833656296, "grad_norm": 1.8309905529022217, "learning_rate": 6.68313021167415e-07, "loss": 0.7636, "step": 71635 }, { "epoch": 0.8730942195897773, "grad_norm": 1.8100610971450806, "learning_rate": 6.679923027581784e-07, "loss": 0.7998, "step": 71640 }, { "epoch": 0.8731551558139251, "grad_norm": 1.7422014474868774, "learning_rate": 6.676715843489417e-07, "loss": 0.7882, "step": 71645 }, { "epoch": 0.8732160920380729, "grad_norm": 1.8992255926132202, "learning_rate": 6.67350865939705e-07, "loss": 0.8019, "step": 71650 }, { "epoch": 0.8732770282622208, "grad_norm": 1.8384754657745361, "learning_rate": 6.670301475304683e-07, "loss": 0.796, "step": 71655 }, { "epoch": 0.8733379644863686, "grad_norm": 2.2897393703460693, "learning_rate": 6.667094291212316e-07, "loss": 0.8136, "step": 71660 }, { "epoch": 0.8733989007105164, "grad_norm": 1.6980377435684204, "learning_rate": 6.663887107119949e-07, "loss": 0.8165, "step": 71665 }, { "epoch": 0.8734598369346642, "grad_norm": 2.082998275756836, "learning_rate": 6.660679923027583e-07, "loss": 0.8376, "step": 71670 }, { "epoch": 0.8735207731588119, "grad_norm": 1.8454320430755615, "learning_rate": 6.657472738935215e-07, "loss": 0.8167, "step": 71675 }, { "epoch": 0.8735817093829598, "grad_norm": 1.690165400505066, "learning_rate": 6.654265554842848e-07, "loss": 0.81, "step": 71680 }, { "epoch": 0.8736426456071076, "grad_norm": 1.958492398262024, "learning_rate": 6.651058370750482e-07, "loss": 0.7806, "step": 71685 }, { "epoch": 0.8737035818312554, "grad_norm": 2.0083634853363037, "learning_rate": 6.647851186658115e-07, "loss": 0.7657, "step": 71690 }, { "epoch": 0.8737645180554032, "grad_norm": 2.106034517288208, "learning_rate": 6.644644002565747e-07, "loss": 0.804, "step": 71695 }, { "epoch": 0.873825454279551, "grad_norm": 3.2380876541137695, "learning_rate": 6.641436818473381e-07, "loss": 0.8086, "step": 71700 }, { "epoch": 0.8738863905036989, "grad_norm": 1.9802881479263306, "learning_rate": 6.638229634381015e-07, "loss": 0.789, "step": 71705 }, { "epoch": 0.8739473267278466, "grad_norm": 1.7285025119781494, "learning_rate": 6.635022450288647e-07, "loss": 0.8381, "step": 71710 }, { "epoch": 0.8740082629519944, "grad_norm": 1.9308815002441406, "learning_rate": 6.63181526619628e-07, "loss": 0.7885, "step": 71715 }, { "epoch": 0.8740691991761422, "grad_norm": 2.165529727935791, "learning_rate": 6.628608082103913e-07, "loss": 0.7882, "step": 71720 }, { "epoch": 0.87413013540029, "grad_norm": 2.4590871334075928, "learning_rate": 6.625400898011547e-07, "loss": 0.8224, "step": 71725 }, { "epoch": 0.8741910716244379, "grad_norm": 2.157233715057373, "learning_rate": 6.62219371391918e-07, "loss": 0.7728, "step": 71730 }, { "epoch": 0.8742520078485857, "grad_norm": 1.9177113771438599, "learning_rate": 6.618986529826812e-07, "loss": 0.8178, "step": 71735 }, { "epoch": 0.8743129440727335, "grad_norm": 1.8657795190811157, "learning_rate": 6.615779345734445e-07, "loss": 0.8086, "step": 71740 }, { "epoch": 0.8743738802968812, "grad_norm": 1.893441081047058, "learning_rate": 6.61257216164208e-07, "loss": 0.8306, "step": 71745 }, { "epoch": 0.8744348165210291, "grad_norm": 1.8601092100143433, "learning_rate": 6.609364977549712e-07, "loss": 0.7842, "step": 71750 }, { "epoch": 0.8744957527451769, "grad_norm": 1.8520212173461914, "learning_rate": 6.606157793457345e-07, "loss": 0.8501, "step": 71755 }, { "epoch": 0.8745566889693247, "grad_norm": 1.874127984046936, "learning_rate": 6.602950609364978e-07, "loss": 0.8614, "step": 71760 }, { "epoch": 0.8746176251934725, "grad_norm": 1.9340035915374756, "learning_rate": 6.599743425272612e-07, "loss": 0.8317, "step": 71765 }, { "epoch": 0.8746785614176203, "grad_norm": 1.8993544578552246, "learning_rate": 6.596536241180244e-07, "loss": 0.8355, "step": 71770 }, { "epoch": 0.8747394976417682, "grad_norm": 2.2010598182678223, "learning_rate": 6.593329057087878e-07, "loss": 0.8148, "step": 71775 }, { "epoch": 0.8748004338659159, "grad_norm": 2.1887123584747314, "learning_rate": 6.59012187299551e-07, "loss": 0.8588, "step": 71780 }, { "epoch": 0.8748613700900637, "grad_norm": 2.069200038909912, "learning_rate": 6.586914688903144e-07, "loss": 0.7177, "step": 71785 }, { "epoch": 0.8749223063142115, "grad_norm": 1.939088225364685, "learning_rate": 6.583707504810777e-07, "loss": 0.9169, "step": 71790 }, { "epoch": 0.8749832425383594, "grad_norm": 1.8433563709259033, "learning_rate": 6.58050032071841e-07, "loss": 0.8344, "step": 71795 }, { "epoch": 0.8750441787625072, "grad_norm": 1.8438314199447632, "learning_rate": 6.577293136626042e-07, "loss": 0.851, "step": 71800 }, { "epoch": 0.875105114986655, "grad_norm": 1.7760822772979736, "learning_rate": 6.574085952533677e-07, "loss": 0.8365, "step": 71805 }, { "epoch": 0.8751660512108028, "grad_norm": 2.340843915939331, "learning_rate": 6.570878768441309e-07, "loss": 0.77, "step": 71810 }, { "epoch": 0.8752269874349505, "grad_norm": 2.249772548675537, "learning_rate": 6.567671584348942e-07, "loss": 0.8254, "step": 71815 }, { "epoch": 0.8752879236590984, "grad_norm": 2.126896381378174, "learning_rate": 6.564464400256575e-07, "loss": 0.8358, "step": 71820 }, { "epoch": 0.8753488598832462, "grad_norm": 2.0225493907928467, "learning_rate": 6.561257216164209e-07, "loss": 0.7687, "step": 71825 }, { "epoch": 0.875409796107394, "grad_norm": 2.132838487625122, "learning_rate": 6.558050032071841e-07, "loss": 0.8253, "step": 71830 }, { "epoch": 0.8754707323315418, "grad_norm": 2.414767026901245, "learning_rate": 6.554842847979475e-07, "loss": 0.8173, "step": 71835 }, { "epoch": 0.8755316685556896, "grad_norm": 1.8747217655181885, "learning_rate": 6.551635663887107e-07, "loss": 0.7926, "step": 71840 }, { "epoch": 0.8755926047798375, "grad_norm": 2.177701234817505, "learning_rate": 6.548428479794741e-07, "loss": 0.8973, "step": 71845 }, { "epoch": 0.8756535410039852, "grad_norm": 1.899600625038147, "learning_rate": 6.545221295702374e-07, "loss": 0.8264, "step": 71850 }, { "epoch": 0.875714477228133, "grad_norm": 2.20105242729187, "learning_rate": 6.542014111610007e-07, "loss": 0.8781, "step": 71855 }, { "epoch": 0.8757754134522808, "grad_norm": 1.8441073894500732, "learning_rate": 6.538806927517639e-07, "loss": 0.6932, "step": 71860 }, { "epoch": 0.8758363496764286, "grad_norm": 2.3613951206207275, "learning_rate": 6.535599743425274e-07, "loss": 0.7712, "step": 71865 }, { "epoch": 0.8758972859005765, "grad_norm": 2.0714528560638428, "learning_rate": 6.532392559332906e-07, "loss": 0.7538, "step": 71870 }, { "epoch": 0.8759582221247243, "grad_norm": 2.182844877243042, "learning_rate": 6.529185375240539e-07, "loss": 0.8423, "step": 71875 }, { "epoch": 0.8760191583488721, "grad_norm": 1.884507179260254, "learning_rate": 6.525978191148172e-07, "loss": 0.8446, "step": 71880 }, { "epoch": 0.8760800945730198, "grad_norm": 1.9591543674468994, "learning_rate": 6.522771007055806e-07, "loss": 0.7933, "step": 71885 }, { "epoch": 0.8761410307971677, "grad_norm": 2.2321741580963135, "learning_rate": 6.519563822963438e-07, "loss": 0.831, "step": 71890 }, { "epoch": 0.8762019670213155, "grad_norm": 2.2612199783325195, "learning_rate": 6.516356638871072e-07, "loss": 0.8194, "step": 71895 }, { "epoch": 0.8762629032454633, "grad_norm": 2.1176884174346924, "learning_rate": 6.513149454778704e-07, "loss": 0.746, "step": 71900 }, { "epoch": 0.8763238394696111, "grad_norm": 1.874550461769104, "learning_rate": 6.509942270686338e-07, "loss": 0.8216, "step": 71905 }, { "epoch": 0.8763847756937589, "grad_norm": 2.2868740558624268, "learning_rate": 6.506735086593971e-07, "loss": 0.8227, "step": 71910 }, { "epoch": 0.8764457119179068, "grad_norm": 1.8347303867340088, "learning_rate": 6.503527902501604e-07, "loss": 0.8848, "step": 71915 }, { "epoch": 0.8765066481420545, "grad_norm": 1.5828847885131836, "learning_rate": 6.500320718409238e-07, "loss": 0.8695, "step": 71920 }, { "epoch": 0.8765675843662023, "grad_norm": 1.903888463973999, "learning_rate": 6.497113534316871e-07, "loss": 0.8149, "step": 71925 }, { "epoch": 0.8766285205903501, "grad_norm": 2.451089382171631, "learning_rate": 6.493906350224504e-07, "loss": 0.799, "step": 71930 }, { "epoch": 0.876689456814498, "grad_norm": 1.887791395187378, "learning_rate": 6.490699166132136e-07, "loss": 0.7895, "step": 71935 }, { "epoch": 0.8767503930386458, "grad_norm": 2.326188087463379, "learning_rate": 6.487491982039771e-07, "loss": 0.8399, "step": 71940 }, { "epoch": 0.8768113292627936, "grad_norm": 2.208470582962036, "learning_rate": 6.484284797947403e-07, "loss": 0.7755, "step": 71945 }, { "epoch": 0.8768722654869414, "grad_norm": 1.928194522857666, "learning_rate": 6.481077613855036e-07, "loss": 0.7111, "step": 71950 }, { "epoch": 0.8769332017110891, "grad_norm": 2.1474380493164062, "learning_rate": 6.477870429762669e-07, "loss": 0.7555, "step": 71955 }, { "epoch": 0.876994137935237, "grad_norm": 2.1709365844726562, "learning_rate": 6.474663245670303e-07, "loss": 0.8293, "step": 71960 }, { "epoch": 0.8770550741593848, "grad_norm": 2.379680871963501, "learning_rate": 6.471456061577935e-07, "loss": 0.8909, "step": 71965 }, { "epoch": 0.8771160103835326, "grad_norm": 2.1327757835388184, "learning_rate": 6.468248877485569e-07, "loss": 0.8075, "step": 71970 }, { "epoch": 0.8771769466076804, "grad_norm": 1.9195915460586548, "learning_rate": 6.465041693393201e-07, "loss": 0.7671, "step": 71975 }, { "epoch": 0.8772378828318282, "grad_norm": 1.9816844463348389, "learning_rate": 6.461834509300835e-07, "loss": 0.8411, "step": 71980 }, { "epoch": 0.8772988190559761, "grad_norm": 2.190575361251831, "learning_rate": 6.458627325208468e-07, "loss": 0.8736, "step": 71985 }, { "epoch": 0.8773597552801238, "grad_norm": 2.2196550369262695, "learning_rate": 6.455420141116101e-07, "loss": 0.8211, "step": 71990 }, { "epoch": 0.8774206915042716, "grad_norm": 2.0194554328918457, "learning_rate": 6.452212957023733e-07, "loss": 0.8504, "step": 71995 }, { "epoch": 0.8774816277284194, "grad_norm": 1.8843666315078735, "learning_rate": 6.449005772931368e-07, "loss": 0.8656, "step": 72000 }, { "epoch": 0.8775425639525672, "grad_norm": 1.8883733749389648, "learning_rate": 6.445798588839e-07, "loss": 0.7551, "step": 72005 }, { "epoch": 0.8776035001767151, "grad_norm": 1.9770156145095825, "learning_rate": 6.442591404746633e-07, "loss": 0.8183, "step": 72010 }, { "epoch": 0.8776644364008629, "grad_norm": 1.7371752262115479, "learning_rate": 6.439384220654265e-07, "loss": 0.7887, "step": 72015 }, { "epoch": 0.8777253726250107, "grad_norm": 2.1997761726379395, "learning_rate": 6.4361770365619e-07, "loss": 0.8906, "step": 72020 }, { "epoch": 0.8777863088491584, "grad_norm": 2.150641918182373, "learning_rate": 6.432969852469532e-07, "loss": 0.7965, "step": 72025 }, { "epoch": 0.8778472450733062, "grad_norm": 1.8001710176467896, "learning_rate": 6.429762668377165e-07, "loss": 0.8565, "step": 72030 }, { "epoch": 0.8779081812974541, "grad_norm": 2.2626163959503174, "learning_rate": 6.426555484284798e-07, "loss": 0.807, "step": 72035 }, { "epoch": 0.8779691175216019, "grad_norm": 1.7396647930145264, "learning_rate": 6.423348300192432e-07, "loss": 0.7781, "step": 72040 }, { "epoch": 0.8780300537457497, "grad_norm": 1.7071683406829834, "learning_rate": 6.420141116100064e-07, "loss": 0.8069, "step": 72045 }, { "epoch": 0.8780909899698975, "grad_norm": 2.1729238033294678, "learning_rate": 6.416933932007698e-07, "loss": 0.805, "step": 72050 }, { "epoch": 0.8781519261940454, "grad_norm": 1.9709213972091675, "learning_rate": 6.41372674791533e-07, "loss": 0.8542, "step": 72055 }, { "epoch": 0.8782128624181931, "grad_norm": 2.071624517440796, "learning_rate": 6.410519563822965e-07, "loss": 0.8053, "step": 72060 }, { "epoch": 0.8782737986423409, "grad_norm": 1.7191054821014404, "learning_rate": 6.407312379730597e-07, "loss": 0.7996, "step": 72065 }, { "epoch": 0.8783347348664887, "grad_norm": 1.9841691255569458, "learning_rate": 6.40410519563823e-07, "loss": 0.7643, "step": 72070 }, { "epoch": 0.8783956710906365, "grad_norm": 1.990890383720398, "learning_rate": 6.400898011545862e-07, "loss": 0.8262, "step": 72075 }, { "epoch": 0.8784566073147844, "grad_norm": 1.8581953048706055, "learning_rate": 6.397690827453497e-07, "loss": 0.8051, "step": 72080 }, { "epoch": 0.8785175435389322, "grad_norm": 2.0524024963378906, "learning_rate": 6.394483643361129e-07, "loss": 0.8284, "step": 72085 }, { "epoch": 0.87857847976308, "grad_norm": 1.7173625230789185, "learning_rate": 6.391276459268762e-07, "loss": 0.7907, "step": 72090 }, { "epoch": 0.8786394159872277, "grad_norm": 1.764520525932312, "learning_rate": 6.388069275176395e-07, "loss": 0.7853, "step": 72095 }, { "epoch": 0.8787003522113755, "grad_norm": 2.0317463874816895, "learning_rate": 6.384862091084029e-07, "loss": 0.7925, "step": 72100 }, { "epoch": 0.8787612884355234, "grad_norm": 2.036404848098755, "learning_rate": 6.381654906991662e-07, "loss": 0.8154, "step": 72105 }, { "epoch": 0.8788222246596712, "grad_norm": 1.7228951454162598, "learning_rate": 6.378447722899295e-07, "loss": 0.8178, "step": 72110 }, { "epoch": 0.878883160883819, "grad_norm": 2.075524091720581, "learning_rate": 6.375240538806927e-07, "loss": 0.7711, "step": 72115 }, { "epoch": 0.8789440971079668, "grad_norm": 1.9594855308532715, "learning_rate": 6.372033354714561e-07, "loss": 0.802, "step": 72120 }, { "epoch": 0.8790050333321147, "grad_norm": 2.092029333114624, "learning_rate": 6.368826170622195e-07, "loss": 0.7562, "step": 72125 }, { "epoch": 0.8790659695562624, "grad_norm": 2.0918819904327393, "learning_rate": 6.365618986529827e-07, "loss": 0.8099, "step": 72130 }, { "epoch": 0.8791269057804102, "grad_norm": 1.8450415134429932, "learning_rate": 6.362411802437459e-07, "loss": 0.8133, "step": 72135 }, { "epoch": 0.879187842004558, "grad_norm": 1.8400733470916748, "learning_rate": 6.359204618345094e-07, "loss": 0.7844, "step": 72140 }, { "epoch": 0.8792487782287058, "grad_norm": 2.2122159004211426, "learning_rate": 6.355997434252727e-07, "loss": 0.8945, "step": 72145 }, { "epoch": 0.8793097144528537, "grad_norm": 2.0092320442199707, "learning_rate": 6.352790250160359e-07, "loss": 0.8505, "step": 72150 }, { "epoch": 0.8793706506770015, "grad_norm": 2.028690814971924, "learning_rate": 6.349583066067992e-07, "loss": 0.8333, "step": 72155 }, { "epoch": 0.8794315869011493, "grad_norm": 1.6939353942871094, "learning_rate": 6.346375881975626e-07, "loss": 0.7598, "step": 72160 }, { "epoch": 0.879492523125297, "grad_norm": 2.3598411083221436, "learning_rate": 6.343168697883259e-07, "loss": 0.755, "step": 72165 }, { "epoch": 0.8795534593494448, "grad_norm": 2.0734920501708984, "learning_rate": 6.339961513790892e-07, "loss": 0.7936, "step": 72170 }, { "epoch": 0.8796143955735927, "grad_norm": 1.978017807006836, "learning_rate": 6.336754329698525e-07, "loss": 0.7747, "step": 72175 }, { "epoch": 0.8796753317977405, "grad_norm": 1.7930750846862793, "learning_rate": 6.333547145606158e-07, "loss": 0.774, "step": 72180 }, { "epoch": 0.8797362680218883, "grad_norm": 2.147808790206909, "learning_rate": 6.330339961513792e-07, "loss": 0.8196, "step": 72185 }, { "epoch": 0.8797972042460361, "grad_norm": 2.3593459129333496, "learning_rate": 6.327132777421424e-07, "loss": 0.8678, "step": 72190 }, { "epoch": 0.879858140470184, "grad_norm": 2.1669373512268066, "learning_rate": 6.323925593329057e-07, "loss": 0.7678, "step": 72195 }, { "epoch": 0.8799190766943317, "grad_norm": 2.0854570865631104, "learning_rate": 6.320718409236691e-07, "loss": 0.8966, "step": 72200 }, { "epoch": 0.8799800129184795, "grad_norm": 2.0190820693969727, "learning_rate": 6.317511225144324e-07, "loss": 0.8375, "step": 72205 }, { "epoch": 0.8800409491426273, "grad_norm": 2.247119188308716, "learning_rate": 6.314304041051956e-07, "loss": 0.8854, "step": 72210 }, { "epoch": 0.8801018853667751, "grad_norm": 2.2241218090057373, "learning_rate": 6.311096856959591e-07, "loss": 0.8456, "step": 72215 }, { "epoch": 0.880162821590923, "grad_norm": 2.216804265975952, "learning_rate": 6.307889672867223e-07, "loss": 0.8314, "step": 72220 }, { "epoch": 0.8802237578150708, "grad_norm": 2.5677103996276855, "learning_rate": 6.304682488774856e-07, "loss": 0.8097, "step": 72225 }, { "epoch": 0.8802846940392186, "grad_norm": 1.8410309553146362, "learning_rate": 6.301475304682489e-07, "loss": 0.7096, "step": 72230 }, { "epoch": 0.8803456302633663, "grad_norm": 1.903986930847168, "learning_rate": 6.298268120590123e-07, "loss": 0.7175, "step": 72235 }, { "epoch": 0.8804065664875141, "grad_norm": 1.955086350440979, "learning_rate": 6.295060936497755e-07, "loss": 0.7135, "step": 72240 }, { "epoch": 0.880467502711662, "grad_norm": 2.393357038497925, "learning_rate": 6.291853752405389e-07, "loss": 0.8265, "step": 72245 }, { "epoch": 0.8805284389358098, "grad_norm": 1.775400996208191, "learning_rate": 6.288646568313021e-07, "loss": 0.752, "step": 72250 }, { "epoch": 0.8805893751599576, "grad_norm": 1.985199213027954, "learning_rate": 6.285439384220655e-07, "loss": 0.8621, "step": 72255 }, { "epoch": 0.8806503113841054, "grad_norm": 2.1980884075164795, "learning_rate": 6.282232200128288e-07, "loss": 0.8121, "step": 72260 }, { "epoch": 0.8807112476082533, "grad_norm": 1.925275444984436, "learning_rate": 6.279025016035921e-07, "loss": 0.7519, "step": 72265 }, { "epoch": 0.880772183832401, "grad_norm": 1.9880588054656982, "learning_rate": 6.275817831943553e-07, "loss": 0.8091, "step": 72270 }, { "epoch": 0.8808331200565488, "grad_norm": 1.8180278539657593, "learning_rate": 6.272610647851188e-07, "loss": 0.7671, "step": 72275 }, { "epoch": 0.8808940562806966, "grad_norm": 2.1355321407318115, "learning_rate": 6.269403463758821e-07, "loss": 0.8153, "step": 72280 }, { "epoch": 0.8809549925048444, "grad_norm": 1.7217907905578613, "learning_rate": 6.266196279666453e-07, "loss": 0.8263, "step": 72285 }, { "epoch": 0.8810159287289923, "grad_norm": 1.7270004749298096, "learning_rate": 6.262989095574086e-07, "loss": 0.771, "step": 72290 }, { "epoch": 0.8810768649531401, "grad_norm": 2.3801393508911133, "learning_rate": 6.25978191148172e-07, "loss": 0.7782, "step": 72295 }, { "epoch": 0.8811378011772879, "grad_norm": 1.7958765029907227, "learning_rate": 6.256574727389353e-07, "loss": 0.7397, "step": 72300 }, { "epoch": 0.8811987374014356, "grad_norm": 2.463974714279175, "learning_rate": 6.253367543296986e-07, "loss": 0.8835, "step": 72305 }, { "epoch": 0.8812596736255834, "grad_norm": 1.8450837135314941, "learning_rate": 6.250160359204618e-07, "loss": 0.7919, "step": 72310 }, { "epoch": 0.8813206098497313, "grad_norm": 1.9897985458374023, "learning_rate": 6.246953175112252e-07, "loss": 0.8036, "step": 72315 }, { "epoch": 0.8813815460738791, "grad_norm": 2.230206251144409, "learning_rate": 6.243745991019886e-07, "loss": 0.7975, "step": 72320 }, { "epoch": 0.8814424822980269, "grad_norm": 2.123939275741577, "learning_rate": 6.240538806927518e-07, "loss": 0.8267, "step": 72325 }, { "epoch": 0.8815034185221747, "grad_norm": 1.9486392736434937, "learning_rate": 6.237331622835151e-07, "loss": 0.8665, "step": 72330 }, { "epoch": 0.8815643547463226, "grad_norm": 2.031862258911133, "learning_rate": 6.234124438742785e-07, "loss": 0.7963, "step": 72335 }, { "epoch": 0.8816252909704703, "grad_norm": 2.004692316055298, "learning_rate": 6.230917254650418e-07, "loss": 0.847, "step": 72340 }, { "epoch": 0.8816862271946181, "grad_norm": 2.2365269660949707, "learning_rate": 6.22771007055805e-07, "loss": 0.8432, "step": 72345 }, { "epoch": 0.8817471634187659, "grad_norm": 1.9158883094787598, "learning_rate": 6.224502886465684e-07, "loss": 0.805, "step": 72350 }, { "epoch": 0.8818080996429137, "grad_norm": 1.9937132596969604, "learning_rate": 6.221295702373317e-07, "loss": 0.8231, "step": 72355 }, { "epoch": 0.8818690358670616, "grad_norm": 2.971313238143921, "learning_rate": 6.21808851828095e-07, "loss": 0.8437, "step": 72360 }, { "epoch": 0.8819299720912094, "grad_norm": 1.725123405456543, "learning_rate": 6.214881334188583e-07, "loss": 0.8597, "step": 72365 }, { "epoch": 0.8819909083153572, "grad_norm": 2.0582306385040283, "learning_rate": 6.211674150096216e-07, "loss": 0.6952, "step": 72370 }, { "epoch": 0.8820518445395049, "grad_norm": 2.2669224739074707, "learning_rate": 6.208466966003849e-07, "loss": 0.875, "step": 72375 }, { "epoch": 0.8821127807636527, "grad_norm": 1.8537157773971558, "learning_rate": 6.205259781911483e-07, "loss": 0.8206, "step": 72380 }, { "epoch": 0.8821737169878006, "grad_norm": 1.6611170768737793, "learning_rate": 6.202052597819115e-07, "loss": 0.8408, "step": 72385 }, { "epoch": 0.8822346532119484, "grad_norm": 2.025634288787842, "learning_rate": 6.198845413726748e-07, "loss": 0.7803, "step": 72390 }, { "epoch": 0.8822955894360962, "grad_norm": 1.936163067817688, "learning_rate": 6.195638229634382e-07, "loss": 0.8096, "step": 72395 }, { "epoch": 0.882356525660244, "grad_norm": 2.084477663040161, "learning_rate": 6.192431045542015e-07, "loss": 0.7596, "step": 72400 }, { "epoch": 0.8824174618843917, "grad_norm": 1.9561998844146729, "learning_rate": 6.189223861449647e-07, "loss": 0.7698, "step": 72405 }, { "epoch": 0.8824783981085396, "grad_norm": 1.8311493396759033, "learning_rate": 6.186016677357281e-07, "loss": 0.8172, "step": 72410 }, { "epoch": 0.8825393343326874, "grad_norm": 1.9002101421356201, "learning_rate": 6.182809493264914e-07, "loss": 0.7989, "step": 72415 }, { "epoch": 0.8826002705568352, "grad_norm": 1.840410828590393, "learning_rate": 6.179602309172547e-07, "loss": 0.8668, "step": 72420 }, { "epoch": 0.882661206780983, "grad_norm": 2.035933017730713, "learning_rate": 6.17639512508018e-07, "loss": 0.7956, "step": 72425 }, { "epoch": 0.8827221430051309, "grad_norm": 1.6316372156143188, "learning_rate": 6.173187940987813e-07, "loss": 0.7561, "step": 72430 }, { "epoch": 0.8827830792292787, "grad_norm": 2.0323493480682373, "learning_rate": 6.169980756895446e-07, "loss": 0.7527, "step": 72435 }, { "epoch": 0.8828440154534264, "grad_norm": 1.7110079526901245, "learning_rate": 6.16677357280308e-07, "loss": 0.73, "step": 72440 }, { "epoch": 0.8829049516775742, "grad_norm": 1.7918522357940674, "learning_rate": 6.163566388710712e-07, "loss": 0.7631, "step": 72445 }, { "epoch": 0.882965887901722, "grad_norm": 2.311934471130371, "learning_rate": 6.160359204618345e-07, "loss": 0.8455, "step": 72450 }, { "epoch": 0.8830268241258699, "grad_norm": 2.3293442726135254, "learning_rate": 6.157152020525979e-07, "loss": 0.7654, "step": 72455 }, { "epoch": 0.8830877603500177, "grad_norm": 2.191208600997925, "learning_rate": 6.153944836433612e-07, "loss": 0.8413, "step": 72460 }, { "epoch": 0.8831486965741655, "grad_norm": 2.106126308441162, "learning_rate": 6.150737652341244e-07, "loss": 0.756, "step": 72465 }, { "epoch": 0.8832096327983133, "grad_norm": 1.7146947383880615, "learning_rate": 6.147530468248878e-07, "loss": 0.8101, "step": 72470 }, { "epoch": 0.883270569022461, "grad_norm": 2.3079464435577393, "learning_rate": 6.144323284156511e-07, "loss": 0.8399, "step": 72475 }, { "epoch": 0.8833315052466089, "grad_norm": 2.1083552837371826, "learning_rate": 6.141116100064144e-07, "loss": 0.8379, "step": 72480 }, { "epoch": 0.8833924414707567, "grad_norm": 1.9173144102096558, "learning_rate": 6.137908915971777e-07, "loss": 0.808, "step": 72485 }, { "epoch": 0.8834533776949045, "grad_norm": 1.9622243642807007, "learning_rate": 6.134701731879411e-07, "loss": 0.812, "step": 72490 }, { "epoch": 0.8835143139190523, "grad_norm": 2.0714001655578613, "learning_rate": 6.131494547787043e-07, "loss": 0.7881, "step": 72495 }, { "epoch": 0.8835752501432002, "grad_norm": 1.7979246377944946, "learning_rate": 6.128287363694677e-07, "loss": 0.8497, "step": 72500 }, { "epoch": 0.883636186367348, "grad_norm": 2.347532033920288, "learning_rate": 6.125080179602309e-07, "loss": 0.7353, "step": 72505 }, { "epoch": 0.8836971225914957, "grad_norm": 2.338243007659912, "learning_rate": 6.121872995509943e-07, "loss": 0.8466, "step": 72510 }, { "epoch": 0.8837580588156435, "grad_norm": 1.861939549446106, "learning_rate": 6.118665811417577e-07, "loss": 0.829, "step": 72515 }, { "epoch": 0.8838189950397913, "grad_norm": 2.072418689727783, "learning_rate": 6.115458627325209e-07, "loss": 0.7478, "step": 72520 }, { "epoch": 0.8838799312639392, "grad_norm": 2.5752806663513184, "learning_rate": 6.112251443232842e-07, "loss": 0.7966, "step": 72525 }, { "epoch": 0.883940867488087, "grad_norm": 2.1002533435821533, "learning_rate": 6.109044259140476e-07, "loss": 0.7773, "step": 72530 }, { "epoch": 0.8840018037122348, "grad_norm": 2.0546064376831055, "learning_rate": 6.105837075048109e-07, "loss": 0.7861, "step": 72535 }, { "epoch": 0.8840627399363826, "grad_norm": 2.46355938911438, "learning_rate": 6.102629890955741e-07, "loss": 0.829, "step": 72540 }, { "epoch": 0.8841236761605303, "grad_norm": 1.7122807502746582, "learning_rate": 6.099422706863375e-07, "loss": 0.7928, "step": 72545 }, { "epoch": 0.8841846123846782, "grad_norm": 3.103745222091675, "learning_rate": 6.096215522771008e-07, "loss": 0.7817, "step": 72550 }, { "epoch": 0.884245548608826, "grad_norm": 1.7584466934204102, "learning_rate": 6.093008338678641e-07, "loss": 0.8621, "step": 72555 }, { "epoch": 0.8843064848329738, "grad_norm": 2.789133310317993, "learning_rate": 6.089801154586274e-07, "loss": 0.7908, "step": 72560 }, { "epoch": 0.8843674210571216, "grad_norm": 2.030958652496338, "learning_rate": 6.086593970493907e-07, "loss": 0.8078, "step": 72565 }, { "epoch": 0.8844283572812695, "grad_norm": 1.8782892227172852, "learning_rate": 6.08338678640154e-07, "loss": 0.7743, "step": 72570 }, { "epoch": 0.8844892935054173, "grad_norm": 2.147045612335205, "learning_rate": 6.080179602309174e-07, "loss": 0.8406, "step": 72575 }, { "epoch": 0.884550229729565, "grad_norm": 1.7930474281311035, "learning_rate": 6.076972418216806e-07, "loss": 0.8345, "step": 72580 }, { "epoch": 0.8846111659537128, "grad_norm": 1.7544255256652832, "learning_rate": 6.073765234124439e-07, "loss": 0.8127, "step": 72585 }, { "epoch": 0.8846721021778606, "grad_norm": 1.963099718093872, "learning_rate": 6.070558050032073e-07, "loss": 0.8099, "step": 72590 }, { "epoch": 0.8847330384020085, "grad_norm": 2.4212379455566406, "learning_rate": 6.067350865939706e-07, "loss": 0.838, "step": 72595 }, { "epoch": 0.8847939746261563, "grad_norm": 1.7018609046936035, "learning_rate": 6.064143681847338e-07, "loss": 0.8377, "step": 72600 }, { "epoch": 0.8848549108503041, "grad_norm": 1.8228713274002075, "learning_rate": 6.060936497754971e-07, "loss": 0.7269, "step": 72605 }, { "epoch": 0.8849158470744519, "grad_norm": 1.793880581855774, "learning_rate": 6.057729313662605e-07, "loss": 0.8158, "step": 72610 }, { "epoch": 0.8849767832985996, "grad_norm": 1.7978134155273438, "learning_rate": 6.054522129570238e-07, "loss": 0.8188, "step": 72615 }, { "epoch": 0.8850377195227475, "grad_norm": 2.1850595474243164, "learning_rate": 6.05131494547787e-07, "loss": 0.8326, "step": 72620 }, { "epoch": 0.8850986557468953, "grad_norm": 1.6039913892745972, "learning_rate": 6.048107761385504e-07, "loss": 0.733, "step": 72625 }, { "epoch": 0.8851595919710431, "grad_norm": 1.923095703125, "learning_rate": 6.044900577293137e-07, "loss": 0.9037, "step": 72630 }, { "epoch": 0.8852205281951909, "grad_norm": 2.104661703109741, "learning_rate": 6.04169339320077e-07, "loss": 0.7953, "step": 72635 }, { "epoch": 0.8852814644193387, "grad_norm": 1.9902863502502441, "learning_rate": 6.038486209108403e-07, "loss": 0.7967, "step": 72640 }, { "epoch": 0.8853424006434866, "grad_norm": 1.9638875722885132, "learning_rate": 6.035279025016036e-07, "loss": 0.8102, "step": 72645 }, { "epoch": 0.8854033368676343, "grad_norm": 2.069828987121582, "learning_rate": 6.032071840923669e-07, "loss": 0.7769, "step": 72650 }, { "epoch": 0.8854642730917821, "grad_norm": 1.9573042392730713, "learning_rate": 6.028864656831303e-07, "loss": 0.8403, "step": 72655 }, { "epoch": 0.8855252093159299, "grad_norm": 1.738306999206543, "learning_rate": 6.025657472738935e-07, "loss": 0.8786, "step": 72660 }, { "epoch": 0.8855861455400778, "grad_norm": 1.955702304840088, "learning_rate": 6.02245028864657e-07, "loss": 0.8348, "step": 72665 }, { "epoch": 0.8856470817642256, "grad_norm": 1.8448975086212158, "learning_rate": 6.019243104554202e-07, "loss": 0.8374, "step": 72670 }, { "epoch": 0.8857080179883734, "grad_norm": 2.084439277648926, "learning_rate": 6.016035920461835e-07, "loss": 0.8219, "step": 72675 }, { "epoch": 0.8857689542125212, "grad_norm": 2.2035412788391113, "learning_rate": 6.012828736369467e-07, "loss": 0.8561, "step": 72680 }, { "epoch": 0.8858298904366689, "grad_norm": 2.011110782623291, "learning_rate": 6.009621552277102e-07, "loss": 0.7727, "step": 72685 }, { "epoch": 0.8858908266608168, "grad_norm": 1.8076326847076416, "learning_rate": 6.006414368184734e-07, "loss": 0.8376, "step": 72690 }, { "epoch": 0.8859517628849646, "grad_norm": 2.203885078430176, "learning_rate": 6.003207184092367e-07, "loss": 0.8468, "step": 72695 }, { "epoch": 0.8860126991091124, "grad_norm": 2.058652877807617, "learning_rate": 6.000000000000001e-07, "loss": 0.805, "step": 72700 }, { "epoch": 0.8860736353332602, "grad_norm": 1.6332677602767944, "learning_rate": 5.996792815907634e-07, "loss": 0.7581, "step": 72705 }, { "epoch": 0.886134571557408, "grad_norm": 2.3490960597991943, "learning_rate": 5.993585631815266e-07, "loss": 0.7607, "step": 72710 }, { "epoch": 0.8861955077815559, "grad_norm": 2.0269393920898438, "learning_rate": 5.9903784477229e-07, "loss": 0.8344, "step": 72715 }, { "epoch": 0.8862564440057036, "grad_norm": 2.071617841720581, "learning_rate": 5.987171263630533e-07, "loss": 0.7748, "step": 72720 }, { "epoch": 0.8863173802298514, "grad_norm": 2.1696887016296387, "learning_rate": 5.983964079538166e-07, "loss": 0.8022, "step": 72725 }, { "epoch": 0.8863783164539992, "grad_norm": 1.8333587646484375, "learning_rate": 5.980756895445799e-07, "loss": 0.855, "step": 72730 }, { "epoch": 0.886439252678147, "grad_norm": 2.3615496158599854, "learning_rate": 5.977549711353432e-07, "loss": 0.8156, "step": 72735 }, { "epoch": 0.8865001889022949, "grad_norm": 2.1212825775146484, "learning_rate": 5.974342527261065e-07, "loss": 0.8284, "step": 72740 }, { "epoch": 0.8865611251264427, "grad_norm": 2.03517746925354, "learning_rate": 5.971135343168699e-07, "loss": 0.7758, "step": 72745 }, { "epoch": 0.8866220613505905, "grad_norm": 2.438081741333008, "learning_rate": 5.967928159076331e-07, "loss": 0.8187, "step": 72750 }, { "epoch": 0.8866829975747382, "grad_norm": 2.424241542816162, "learning_rate": 5.964720974983964e-07, "loss": 0.8094, "step": 72755 }, { "epoch": 0.886743933798886, "grad_norm": 1.793638825416565, "learning_rate": 5.961513790891598e-07, "loss": 0.8373, "step": 72760 }, { "epoch": 0.8868048700230339, "grad_norm": 1.6648612022399902, "learning_rate": 5.958306606799231e-07, "loss": 0.7231, "step": 72765 }, { "epoch": 0.8868658062471817, "grad_norm": 1.896521806716919, "learning_rate": 5.955099422706863e-07, "loss": 0.8389, "step": 72770 }, { "epoch": 0.8869267424713295, "grad_norm": 2.2181999683380127, "learning_rate": 5.951892238614497e-07, "loss": 0.8077, "step": 72775 }, { "epoch": 0.8869876786954773, "grad_norm": 1.734833836555481, "learning_rate": 5.94868505452213e-07, "loss": 0.7827, "step": 72780 }, { "epoch": 0.8870486149196252, "grad_norm": 1.9902538061141968, "learning_rate": 5.945477870429763e-07, "loss": 0.7322, "step": 72785 }, { "epoch": 0.8871095511437729, "grad_norm": 3.1305439472198486, "learning_rate": 5.942270686337396e-07, "loss": 0.817, "step": 72790 }, { "epoch": 0.8871704873679207, "grad_norm": 2.3516080379486084, "learning_rate": 5.939063502245029e-07, "loss": 0.7866, "step": 72795 }, { "epoch": 0.8872314235920685, "grad_norm": 1.8870669603347778, "learning_rate": 5.935856318152662e-07, "loss": 0.7416, "step": 72800 }, { "epoch": 0.8872923598162163, "grad_norm": 2.059680700302124, "learning_rate": 5.932649134060296e-07, "loss": 0.7778, "step": 72805 }, { "epoch": 0.8873532960403642, "grad_norm": 1.7870432138442993, "learning_rate": 5.929441949967929e-07, "loss": 0.8415, "step": 72810 }, { "epoch": 0.887414232264512, "grad_norm": 1.8485065698623657, "learning_rate": 5.926234765875561e-07, "loss": 0.9409, "step": 72815 }, { "epoch": 0.8874751684886598, "grad_norm": 2.6692917346954346, "learning_rate": 5.923027581783195e-07, "loss": 0.8299, "step": 72820 }, { "epoch": 0.8875361047128075, "grad_norm": 1.9201825857162476, "learning_rate": 5.919820397690828e-07, "loss": 0.8672, "step": 72825 }, { "epoch": 0.8875970409369554, "grad_norm": 2.1769533157348633, "learning_rate": 5.916613213598461e-07, "loss": 0.7501, "step": 72830 }, { "epoch": 0.8876579771611032, "grad_norm": 2.082204818725586, "learning_rate": 5.913406029506094e-07, "loss": 0.8511, "step": 72835 }, { "epoch": 0.887718913385251, "grad_norm": 2.0706257820129395, "learning_rate": 5.910198845413728e-07, "loss": 0.8133, "step": 72840 }, { "epoch": 0.8877798496093988, "grad_norm": 1.8129327297210693, "learning_rate": 5.90699166132136e-07, "loss": 0.8347, "step": 72845 }, { "epoch": 0.8878407858335466, "grad_norm": 2.123157501220703, "learning_rate": 5.903784477228994e-07, "loss": 0.7542, "step": 72850 }, { "epoch": 0.8879017220576945, "grad_norm": 1.7960388660430908, "learning_rate": 5.900577293136626e-07, "loss": 0.7818, "step": 72855 }, { "epoch": 0.8879626582818422, "grad_norm": 1.877897024154663, "learning_rate": 5.89737010904426e-07, "loss": 0.7109, "step": 72860 }, { "epoch": 0.88802359450599, "grad_norm": 2.1149837970733643, "learning_rate": 5.894162924951893e-07, "loss": 0.849, "step": 72865 }, { "epoch": 0.8880845307301378, "grad_norm": 2.1325814723968506, "learning_rate": 5.890955740859526e-07, "loss": 0.7942, "step": 72870 }, { "epoch": 0.8881454669542856, "grad_norm": 1.8808907270431519, "learning_rate": 5.887748556767159e-07, "loss": 0.8329, "step": 72875 }, { "epoch": 0.8882064031784335, "grad_norm": 1.8237228393554688, "learning_rate": 5.884541372674793e-07, "loss": 0.7731, "step": 72880 }, { "epoch": 0.8882673394025813, "grad_norm": 1.8779892921447754, "learning_rate": 5.881334188582425e-07, "loss": 0.8012, "step": 72885 }, { "epoch": 0.8883282756267291, "grad_norm": 2.4288408756256104, "learning_rate": 5.878127004490058e-07, "loss": 0.8488, "step": 72890 }, { "epoch": 0.8883892118508768, "grad_norm": 1.8507499694824219, "learning_rate": 5.874919820397692e-07, "loss": 0.7994, "step": 72895 }, { "epoch": 0.8884501480750246, "grad_norm": 2.274502992630005, "learning_rate": 5.871712636305325e-07, "loss": 0.7445, "step": 72900 }, { "epoch": 0.8885110842991725, "grad_norm": 1.8623380661010742, "learning_rate": 5.868505452212957e-07, "loss": 0.8283, "step": 72905 }, { "epoch": 0.8885720205233203, "grad_norm": 1.7900913953781128, "learning_rate": 5.865298268120591e-07, "loss": 0.781, "step": 72910 }, { "epoch": 0.8886329567474681, "grad_norm": 2.157017707824707, "learning_rate": 5.862091084028224e-07, "loss": 0.7773, "step": 72915 }, { "epoch": 0.8886938929716159, "grad_norm": 2.2389042377471924, "learning_rate": 5.858883899935857e-07, "loss": 0.7388, "step": 72920 }, { "epoch": 0.8887548291957638, "grad_norm": 2.1757423877716064, "learning_rate": 5.85567671584349e-07, "loss": 0.7669, "step": 72925 }, { "epoch": 0.8888157654199115, "grad_norm": 2.030728578567505, "learning_rate": 5.852469531751123e-07, "loss": 0.8131, "step": 72930 }, { "epoch": 0.8888767016440593, "grad_norm": 1.9517929553985596, "learning_rate": 5.849262347658756e-07, "loss": 0.8398, "step": 72935 }, { "epoch": 0.8889376378682071, "grad_norm": 1.9404451847076416, "learning_rate": 5.84605516356639e-07, "loss": 0.7427, "step": 72940 }, { "epoch": 0.8889985740923549, "grad_norm": 1.9053852558135986, "learning_rate": 5.842847979474022e-07, "loss": 0.7641, "step": 72945 }, { "epoch": 0.8890595103165028, "grad_norm": 1.8617380857467651, "learning_rate": 5.839640795381655e-07, "loss": 0.7727, "step": 72950 }, { "epoch": 0.8891204465406506, "grad_norm": 2.1202054023742676, "learning_rate": 5.836433611289289e-07, "loss": 0.7654, "step": 72955 }, { "epoch": 0.8891813827647984, "grad_norm": 1.9218536615371704, "learning_rate": 5.833226427196922e-07, "loss": 0.7995, "step": 72960 }, { "epoch": 0.8892423189889461, "grad_norm": 2.0621230602264404, "learning_rate": 5.830019243104554e-07, "loss": 0.8599, "step": 72965 }, { "epoch": 0.889303255213094, "grad_norm": 1.8641005754470825, "learning_rate": 5.826812059012188e-07, "loss": 0.7154, "step": 72970 }, { "epoch": 0.8893641914372418, "grad_norm": 2.1335177421569824, "learning_rate": 5.823604874919821e-07, "loss": 0.8257, "step": 72975 }, { "epoch": 0.8894251276613896, "grad_norm": 1.846839427947998, "learning_rate": 5.820397690827454e-07, "loss": 0.889, "step": 72980 }, { "epoch": 0.8894860638855374, "grad_norm": 1.9223531484603882, "learning_rate": 5.817190506735087e-07, "loss": 0.8009, "step": 72985 }, { "epoch": 0.8895470001096852, "grad_norm": 1.9990521669387817, "learning_rate": 5.81398332264272e-07, "loss": 0.7852, "step": 72990 }, { "epoch": 0.8896079363338331, "grad_norm": 1.9215056896209717, "learning_rate": 5.810776138550353e-07, "loss": 0.8043, "step": 72995 }, { "epoch": 0.8896688725579808, "grad_norm": 2.3277199268341064, "learning_rate": 5.807568954457987e-07, "loss": 0.773, "step": 73000 }, { "epoch": 0.8897298087821286, "grad_norm": 1.950571060180664, "learning_rate": 5.804361770365619e-07, "loss": 0.7562, "step": 73005 }, { "epoch": 0.8897907450062764, "grad_norm": 1.8078376054763794, "learning_rate": 5.801154586273252e-07, "loss": 0.7634, "step": 73010 }, { "epoch": 0.8898516812304242, "grad_norm": 2.143970012664795, "learning_rate": 5.797947402180886e-07, "loss": 0.8132, "step": 73015 }, { "epoch": 0.8899126174545721, "grad_norm": 2.010272979736328, "learning_rate": 5.794740218088519e-07, "loss": 0.7329, "step": 73020 }, { "epoch": 0.8899735536787199, "grad_norm": 1.953428030014038, "learning_rate": 5.791533033996151e-07, "loss": 0.8583, "step": 73025 }, { "epoch": 0.8900344899028677, "grad_norm": 2.1178712844848633, "learning_rate": 5.788325849903785e-07, "loss": 0.8175, "step": 73030 }, { "epoch": 0.8900954261270154, "grad_norm": 2.4683337211608887, "learning_rate": 5.785118665811418e-07, "loss": 0.8009, "step": 73035 }, { "epoch": 0.8901563623511632, "grad_norm": 1.872951626777649, "learning_rate": 5.781911481719051e-07, "loss": 0.8603, "step": 73040 }, { "epoch": 0.8902172985753111, "grad_norm": 1.884451150894165, "learning_rate": 5.778704297626684e-07, "loss": 0.825, "step": 73045 }, { "epoch": 0.8902782347994589, "grad_norm": 1.7102071046829224, "learning_rate": 5.775497113534318e-07, "loss": 0.8229, "step": 73050 }, { "epoch": 0.8903391710236067, "grad_norm": 1.7264299392700195, "learning_rate": 5.77228992944195e-07, "loss": 0.8041, "step": 73055 }, { "epoch": 0.8904001072477545, "grad_norm": 2.238525152206421, "learning_rate": 5.769082745349584e-07, "loss": 0.7076, "step": 73060 }, { "epoch": 0.8904610434719024, "grad_norm": 2.1786322593688965, "learning_rate": 5.765875561257216e-07, "loss": 0.8301, "step": 73065 }, { "epoch": 0.8905219796960501, "grad_norm": 2.211784601211548, "learning_rate": 5.76266837716485e-07, "loss": 0.8148, "step": 73070 }, { "epoch": 0.8905829159201979, "grad_norm": 1.757660984992981, "learning_rate": 5.759461193072483e-07, "loss": 0.8653, "step": 73075 }, { "epoch": 0.8906438521443457, "grad_norm": 2.0568482875823975, "learning_rate": 5.756254008980116e-07, "loss": 0.8313, "step": 73080 }, { "epoch": 0.8907047883684935, "grad_norm": 2.0696022510528564, "learning_rate": 5.753046824887749e-07, "loss": 0.7995, "step": 73085 }, { "epoch": 0.8907657245926414, "grad_norm": 2.3016040325164795, "learning_rate": 5.749839640795383e-07, "loss": 0.7469, "step": 73090 }, { "epoch": 0.8908266608167892, "grad_norm": 1.6683168411254883, "learning_rate": 5.746632456703015e-07, "loss": 0.7954, "step": 73095 }, { "epoch": 0.890887597040937, "grad_norm": 1.7073792219161987, "learning_rate": 5.743425272610648e-07, "loss": 0.7504, "step": 73100 }, { "epoch": 0.8909485332650847, "grad_norm": 2.748070478439331, "learning_rate": 5.740218088518282e-07, "loss": 0.8513, "step": 73105 }, { "epoch": 0.8910094694892325, "grad_norm": 2.3326575756073, "learning_rate": 5.737010904425915e-07, "loss": 0.8398, "step": 73110 }, { "epoch": 0.8910704057133804, "grad_norm": 3.6789286136627197, "learning_rate": 5.733803720333548e-07, "loss": 0.8212, "step": 73115 }, { "epoch": 0.8911313419375282, "grad_norm": 2.0339436531066895, "learning_rate": 5.73059653624118e-07, "loss": 0.8054, "step": 73120 }, { "epoch": 0.891192278161676, "grad_norm": 2.089951515197754, "learning_rate": 5.727389352148814e-07, "loss": 0.7918, "step": 73125 }, { "epoch": 0.8912532143858238, "grad_norm": 2.183039903640747, "learning_rate": 5.724182168056447e-07, "loss": 0.7867, "step": 73130 }, { "epoch": 0.8913141506099717, "grad_norm": 2.105921506881714, "learning_rate": 5.720974983964081e-07, "loss": 0.8315, "step": 73135 }, { "epoch": 0.8913750868341194, "grad_norm": 2.052700996398926, "learning_rate": 5.717767799871713e-07, "loss": 0.8029, "step": 73140 }, { "epoch": 0.8914360230582672, "grad_norm": 1.9609087705612183, "learning_rate": 5.714560615779346e-07, "loss": 0.8433, "step": 73145 }, { "epoch": 0.891496959282415, "grad_norm": 1.969369649887085, "learning_rate": 5.71135343168698e-07, "loss": 0.7637, "step": 73150 }, { "epoch": 0.8915578955065628, "grad_norm": 1.7944740056991577, "learning_rate": 5.708146247594613e-07, "loss": 0.7799, "step": 73155 }, { "epoch": 0.8916188317307107, "grad_norm": 2.3546061515808105, "learning_rate": 5.704939063502245e-07, "loss": 0.7882, "step": 73160 }, { "epoch": 0.8916797679548585, "grad_norm": 1.6088281869888306, "learning_rate": 5.701731879409878e-07, "loss": 0.7735, "step": 73165 }, { "epoch": 0.8917407041790063, "grad_norm": 1.9684226512908936, "learning_rate": 5.698524695317512e-07, "loss": 0.8123, "step": 73170 }, { "epoch": 0.891801640403154, "grad_norm": 1.9141844511032104, "learning_rate": 5.695317511225145e-07, "loss": 0.7692, "step": 73175 }, { "epoch": 0.8918625766273018, "grad_norm": 2.152768611907959, "learning_rate": 5.692110327132777e-07, "loss": 0.8207, "step": 73180 }, { "epoch": 0.8919235128514497, "grad_norm": 2.4109392166137695, "learning_rate": 5.688903143040411e-07, "loss": 0.7589, "step": 73185 }, { "epoch": 0.8919844490755975, "grad_norm": 1.6782306432724, "learning_rate": 5.685695958948044e-07, "loss": 0.7898, "step": 73190 }, { "epoch": 0.8920453852997453, "grad_norm": 2.5143675804138184, "learning_rate": 5.682488774855678e-07, "loss": 0.9008, "step": 73195 }, { "epoch": 0.8921063215238931, "grad_norm": 1.8195476531982422, "learning_rate": 5.67928159076331e-07, "loss": 0.8683, "step": 73200 }, { "epoch": 0.892167257748041, "grad_norm": 1.789284586906433, "learning_rate": 5.676074406670943e-07, "loss": 0.9249, "step": 73205 }, { "epoch": 0.8922281939721887, "grad_norm": 1.8891197443008423, "learning_rate": 5.672867222578576e-07, "loss": 0.8693, "step": 73210 }, { "epoch": 0.8922891301963365, "grad_norm": 1.616953730583191, "learning_rate": 5.66966003848621e-07, "loss": 0.7988, "step": 73215 }, { "epoch": 0.8923500664204843, "grad_norm": 2.558061361312866, "learning_rate": 5.666452854393842e-07, "loss": 0.832, "step": 73220 }, { "epoch": 0.8924110026446321, "grad_norm": 1.9217463731765747, "learning_rate": 5.663245670301477e-07, "loss": 0.7748, "step": 73225 }, { "epoch": 0.89247193886878, "grad_norm": 1.9959315061569214, "learning_rate": 5.660038486209109e-07, "loss": 0.8116, "step": 73230 }, { "epoch": 0.8925328750929278, "grad_norm": 2.3396782875061035, "learning_rate": 5.656831302116742e-07, "loss": 0.865, "step": 73235 }, { "epoch": 0.8925938113170756, "grad_norm": 2.085198402404785, "learning_rate": 5.653624118024374e-07, "loss": 0.8186, "step": 73240 }, { "epoch": 0.8926547475412233, "grad_norm": 2.1045594215393066, "learning_rate": 5.650416933932009e-07, "loss": 0.8574, "step": 73245 }, { "epoch": 0.8927156837653711, "grad_norm": 1.822389006614685, "learning_rate": 5.647209749839641e-07, "loss": 0.7964, "step": 73250 }, { "epoch": 0.892776619989519, "grad_norm": 2.0123143196105957, "learning_rate": 5.644002565747274e-07, "loss": 0.8337, "step": 73255 }, { "epoch": 0.8928375562136668, "grad_norm": 1.7832622528076172, "learning_rate": 5.640795381654908e-07, "loss": 0.8184, "step": 73260 }, { "epoch": 0.8928984924378146, "grad_norm": 2.0688347816467285, "learning_rate": 5.637588197562541e-07, "loss": 0.8298, "step": 73265 }, { "epoch": 0.8929594286619624, "grad_norm": 1.893332839012146, "learning_rate": 5.634381013470173e-07, "loss": 0.8151, "step": 73270 }, { "epoch": 0.8930203648861103, "grad_norm": 2.1749303340911865, "learning_rate": 5.631173829377807e-07, "loss": 0.8207, "step": 73275 }, { "epoch": 0.893081301110258, "grad_norm": 2.319088935852051, "learning_rate": 5.62796664528544e-07, "loss": 0.7734, "step": 73280 }, { "epoch": 0.8931422373344058, "grad_norm": 2.215684652328491, "learning_rate": 5.624759461193073e-07, "loss": 0.8558, "step": 73285 }, { "epoch": 0.8932031735585536, "grad_norm": 1.8578194379806519, "learning_rate": 5.621552277100706e-07, "loss": 0.8919, "step": 73290 }, { "epoch": 0.8932641097827014, "grad_norm": 1.8721349239349365, "learning_rate": 5.618345093008339e-07, "loss": 0.8388, "step": 73295 }, { "epoch": 0.8933250460068493, "grad_norm": 1.9429277181625366, "learning_rate": 5.615137908915972e-07, "loss": 0.7891, "step": 73300 }, { "epoch": 0.8933859822309971, "grad_norm": 2.1528332233428955, "learning_rate": 5.611930724823606e-07, "loss": 0.8275, "step": 73305 }, { "epoch": 0.8934469184551449, "grad_norm": 1.77438485622406, "learning_rate": 5.608723540731238e-07, "loss": 0.8641, "step": 73310 }, { "epoch": 0.8935078546792926, "grad_norm": 1.6975024938583374, "learning_rate": 5.605516356638871e-07, "loss": 0.7646, "step": 73315 }, { "epoch": 0.8935687909034404, "grad_norm": 1.7624772787094116, "learning_rate": 5.602309172546505e-07, "loss": 0.9078, "step": 73320 }, { "epoch": 0.8936297271275883, "grad_norm": 2.0343949794769287, "learning_rate": 5.599101988454138e-07, "loss": 0.8574, "step": 73325 }, { "epoch": 0.8936906633517361, "grad_norm": 1.809305191040039, "learning_rate": 5.59589480436177e-07, "loss": 0.8012, "step": 73330 }, { "epoch": 0.8937515995758839, "grad_norm": 2.0832278728485107, "learning_rate": 5.592687620269404e-07, "loss": 0.7913, "step": 73335 }, { "epoch": 0.8938125358000317, "grad_norm": 1.6748610734939575, "learning_rate": 5.589480436177037e-07, "loss": 0.8007, "step": 73340 }, { "epoch": 0.8938734720241794, "grad_norm": 2.075528621673584, "learning_rate": 5.58627325208467e-07, "loss": 0.7747, "step": 73345 }, { "epoch": 0.8939344082483273, "grad_norm": 2.5270326137542725, "learning_rate": 5.583066067992303e-07, "loss": 0.8633, "step": 73350 }, { "epoch": 0.8939953444724751, "grad_norm": 1.9221692085266113, "learning_rate": 5.579858883899936e-07, "loss": 0.8136, "step": 73355 }, { "epoch": 0.8940562806966229, "grad_norm": 1.9104135036468506, "learning_rate": 5.576651699807569e-07, "loss": 0.802, "step": 73360 }, { "epoch": 0.8941172169207707, "grad_norm": 2.0945074558258057, "learning_rate": 5.573444515715203e-07, "loss": 0.8266, "step": 73365 }, { "epoch": 0.8941781531449186, "grad_norm": 2.163872480392456, "learning_rate": 5.570237331622835e-07, "loss": 0.855, "step": 73370 }, { "epoch": 0.8942390893690664, "grad_norm": 2.185678005218506, "learning_rate": 5.567030147530468e-07, "loss": 0.7894, "step": 73375 }, { "epoch": 0.8943000255932141, "grad_norm": 2.1835076808929443, "learning_rate": 5.563822963438102e-07, "loss": 0.794, "step": 73380 }, { "epoch": 0.8943609618173619, "grad_norm": 2.0556395053863525, "learning_rate": 5.560615779345735e-07, "loss": 0.7387, "step": 73385 }, { "epoch": 0.8944218980415097, "grad_norm": 1.7343933582305908, "learning_rate": 5.557408595253367e-07, "loss": 0.8331, "step": 73390 }, { "epoch": 0.8944828342656576, "grad_norm": 1.8952475786209106, "learning_rate": 5.554201411161001e-07, "loss": 0.7931, "step": 73395 }, { "epoch": 0.8945437704898054, "grad_norm": 2.0270965099334717, "learning_rate": 5.550994227068634e-07, "loss": 0.8817, "step": 73400 }, { "epoch": 0.8946047067139532, "grad_norm": 1.9500010013580322, "learning_rate": 5.547787042976267e-07, "loss": 0.787, "step": 73405 }, { "epoch": 0.894665642938101, "grad_norm": 2.118765115737915, "learning_rate": 5.5445798588839e-07, "loss": 0.8321, "step": 73410 }, { "epoch": 0.8947265791622487, "grad_norm": 2.077544927597046, "learning_rate": 5.541372674791533e-07, "loss": 0.7732, "step": 73415 }, { "epoch": 0.8947875153863966, "grad_norm": 1.7615450620651245, "learning_rate": 5.538165490699167e-07, "loss": 0.807, "step": 73420 }, { "epoch": 0.8948484516105444, "grad_norm": 1.837791919708252, "learning_rate": 5.5349583066068e-07, "loss": 0.8111, "step": 73425 }, { "epoch": 0.8949093878346922, "grad_norm": 1.967370629310608, "learning_rate": 5.531751122514433e-07, "loss": 0.7777, "step": 73430 }, { "epoch": 0.89497032405884, "grad_norm": 1.7913812398910522, "learning_rate": 5.528543938422066e-07, "loss": 0.801, "step": 73435 }, { "epoch": 0.8950312602829879, "grad_norm": 2.2353668212890625, "learning_rate": 5.5253367543297e-07, "loss": 0.8435, "step": 73440 }, { "epoch": 0.8950921965071357, "grad_norm": 1.9844777584075928, "learning_rate": 5.522129570237332e-07, "loss": 0.7793, "step": 73445 }, { "epoch": 0.8951531327312834, "grad_norm": 2.1315057277679443, "learning_rate": 5.518922386144965e-07, "loss": 0.8642, "step": 73450 }, { "epoch": 0.8952140689554312, "grad_norm": 2.3553078174591064, "learning_rate": 5.515715202052599e-07, "loss": 0.8698, "step": 73455 }, { "epoch": 0.895275005179579, "grad_norm": 1.723659873008728, "learning_rate": 5.512508017960232e-07, "loss": 0.8506, "step": 73460 }, { "epoch": 0.8953359414037269, "grad_norm": 2.2610533237457275, "learning_rate": 5.509300833867864e-07, "loss": 0.8324, "step": 73465 }, { "epoch": 0.8953968776278747, "grad_norm": 1.9142720699310303, "learning_rate": 5.506093649775498e-07, "loss": 0.8551, "step": 73470 }, { "epoch": 0.8954578138520225, "grad_norm": 1.9619754552841187, "learning_rate": 5.502886465683131e-07, "loss": 0.8392, "step": 73475 }, { "epoch": 0.8955187500761703, "grad_norm": 2.063152313232422, "learning_rate": 5.499679281590764e-07, "loss": 0.7505, "step": 73480 }, { "epoch": 0.895579686300318, "grad_norm": 1.9975299835205078, "learning_rate": 5.496472097498397e-07, "loss": 0.8189, "step": 73485 }, { "epoch": 0.8956406225244659, "grad_norm": 1.9198362827301025, "learning_rate": 5.49326491340603e-07, "loss": 0.783, "step": 73490 }, { "epoch": 0.8957015587486137, "grad_norm": 2.1419551372528076, "learning_rate": 5.490057729313663e-07, "loss": 0.7753, "step": 73495 }, { "epoch": 0.8957624949727615, "grad_norm": 1.9406232833862305, "learning_rate": 5.486850545221297e-07, "loss": 0.8316, "step": 73500 }, { "epoch": 0.8958234311969093, "grad_norm": 2.2435355186462402, "learning_rate": 5.483643361128929e-07, "loss": 0.9012, "step": 73505 }, { "epoch": 0.8958843674210571, "grad_norm": 1.8039968013763428, "learning_rate": 5.480436177036562e-07, "loss": 0.8629, "step": 73510 }, { "epoch": 0.895945303645205, "grad_norm": 2.022382974624634, "learning_rate": 5.477228992944196e-07, "loss": 0.7714, "step": 73515 }, { "epoch": 0.8960062398693527, "grad_norm": 2.0565290451049805, "learning_rate": 5.474021808851829e-07, "loss": 0.7872, "step": 73520 }, { "epoch": 0.8960671760935005, "grad_norm": 2.290876865386963, "learning_rate": 5.470814624759461e-07, "loss": 0.7821, "step": 73525 }, { "epoch": 0.8961281123176483, "grad_norm": 1.8101099729537964, "learning_rate": 5.467607440667095e-07, "loss": 0.7782, "step": 73530 }, { "epoch": 0.8961890485417962, "grad_norm": 1.711029291152954, "learning_rate": 5.464400256574728e-07, "loss": 0.8147, "step": 73535 }, { "epoch": 0.896249984765944, "grad_norm": 1.8527820110321045, "learning_rate": 5.461193072482361e-07, "loss": 0.8221, "step": 73540 }, { "epoch": 0.8963109209900918, "grad_norm": 1.8676484823226929, "learning_rate": 5.457985888389994e-07, "loss": 0.7904, "step": 73545 }, { "epoch": 0.8963718572142396, "grad_norm": 1.796929121017456, "learning_rate": 5.454778704297627e-07, "loss": 0.8033, "step": 73550 }, { "epoch": 0.8964327934383873, "grad_norm": 2.2170841693878174, "learning_rate": 5.45157152020526e-07, "loss": 0.8412, "step": 73555 }, { "epoch": 0.8964937296625352, "grad_norm": 1.8144809007644653, "learning_rate": 5.448364336112894e-07, "loss": 0.7952, "step": 73560 }, { "epoch": 0.896554665886683, "grad_norm": 1.895376443862915, "learning_rate": 5.445157152020526e-07, "loss": 0.786, "step": 73565 }, { "epoch": 0.8966156021108308, "grad_norm": 2.211671829223633, "learning_rate": 5.441949967928159e-07, "loss": 0.8498, "step": 73570 }, { "epoch": 0.8966765383349786, "grad_norm": 2.0713565349578857, "learning_rate": 5.438742783835793e-07, "loss": 0.8121, "step": 73575 }, { "epoch": 0.8967374745591264, "grad_norm": 2.0741899013519287, "learning_rate": 5.435535599743426e-07, "loss": 0.8203, "step": 73580 }, { "epoch": 0.8967984107832743, "grad_norm": 2.0061113834381104, "learning_rate": 5.432328415651058e-07, "loss": 0.7886, "step": 73585 }, { "epoch": 0.896859347007422, "grad_norm": 1.9981943368911743, "learning_rate": 5.429121231558692e-07, "loss": 0.8045, "step": 73590 }, { "epoch": 0.8969202832315698, "grad_norm": 1.8377410173416138, "learning_rate": 5.425914047466325e-07, "loss": 0.7915, "step": 73595 }, { "epoch": 0.8969812194557176, "grad_norm": 1.9722470045089722, "learning_rate": 5.422706863373958e-07, "loss": 0.8455, "step": 73600 }, { "epoch": 0.8970421556798654, "grad_norm": 1.7605078220367432, "learning_rate": 5.419499679281591e-07, "loss": 0.8239, "step": 73605 }, { "epoch": 0.8971030919040133, "grad_norm": 1.8559678792953491, "learning_rate": 5.416292495189225e-07, "loss": 0.7649, "step": 73610 }, { "epoch": 0.8971640281281611, "grad_norm": 2.1440024375915527, "learning_rate": 5.413085311096857e-07, "loss": 0.7944, "step": 73615 }, { "epoch": 0.8972249643523089, "grad_norm": 1.9676822423934937, "learning_rate": 5.409878127004491e-07, "loss": 0.8039, "step": 73620 }, { "epoch": 0.8972859005764566, "grad_norm": 2.389507532119751, "learning_rate": 5.406670942912123e-07, "loss": 0.8291, "step": 73625 }, { "epoch": 0.8973468368006045, "grad_norm": 2.5793633460998535, "learning_rate": 5.403463758819757e-07, "loss": 0.7717, "step": 73630 }, { "epoch": 0.8974077730247523, "grad_norm": 2.4139230251312256, "learning_rate": 5.40025657472739e-07, "loss": 0.8501, "step": 73635 }, { "epoch": 0.8974687092489001, "grad_norm": 2.050610065460205, "learning_rate": 5.397049390635023e-07, "loss": 0.8002, "step": 73640 }, { "epoch": 0.8975296454730479, "grad_norm": 2.355401039123535, "learning_rate": 5.393842206542656e-07, "loss": 0.8419, "step": 73645 }, { "epoch": 0.8975905816971957, "grad_norm": 2.0292165279388428, "learning_rate": 5.39063502245029e-07, "loss": 0.8352, "step": 73650 }, { "epoch": 0.8976515179213436, "grad_norm": 2.030256509780884, "learning_rate": 5.387427838357922e-07, "loss": 0.7819, "step": 73655 }, { "epoch": 0.8977124541454913, "grad_norm": 1.9392911195755005, "learning_rate": 5.384220654265555e-07, "loss": 0.7901, "step": 73660 }, { "epoch": 0.8977733903696391, "grad_norm": 2.038195848464966, "learning_rate": 5.381013470173189e-07, "loss": 0.8378, "step": 73665 }, { "epoch": 0.8978343265937869, "grad_norm": 1.8382728099822998, "learning_rate": 5.377806286080822e-07, "loss": 0.7534, "step": 73670 }, { "epoch": 0.8978952628179347, "grad_norm": 2.1549041271209717, "learning_rate": 5.374599101988454e-07, "loss": 0.8203, "step": 73675 }, { "epoch": 0.8979561990420826, "grad_norm": 2.0034444332122803, "learning_rate": 5.371391917896088e-07, "loss": 0.7609, "step": 73680 }, { "epoch": 0.8980171352662304, "grad_norm": 1.721921443939209, "learning_rate": 5.368184733803721e-07, "loss": 0.7342, "step": 73685 }, { "epoch": 0.8980780714903782, "grad_norm": 2.001150131225586, "learning_rate": 5.364977549711354e-07, "loss": 0.75, "step": 73690 }, { "epoch": 0.8981390077145259, "grad_norm": 1.862019658088684, "learning_rate": 5.361770365618987e-07, "loss": 0.8191, "step": 73695 }, { "epoch": 0.8981999439386738, "grad_norm": 2.414445638656616, "learning_rate": 5.35856318152662e-07, "loss": 0.8047, "step": 73700 }, { "epoch": 0.8982608801628216, "grad_norm": 1.9862040281295776, "learning_rate": 5.355355997434253e-07, "loss": 0.8251, "step": 73705 }, { "epoch": 0.8983218163869694, "grad_norm": 2.013570547103882, "learning_rate": 5.352148813341887e-07, "loss": 0.8663, "step": 73710 }, { "epoch": 0.8983827526111172, "grad_norm": 2.367983818054199, "learning_rate": 5.348941629249519e-07, "loss": 0.7901, "step": 73715 }, { "epoch": 0.898443688835265, "grad_norm": 2.091657876968384, "learning_rate": 5.345734445157152e-07, "loss": 0.793, "step": 73720 }, { "epoch": 0.8985046250594129, "grad_norm": 2.0419580936431885, "learning_rate": 5.342527261064786e-07, "loss": 0.7256, "step": 73725 }, { "epoch": 0.8985655612835606, "grad_norm": 2.137225389480591, "learning_rate": 5.339320076972419e-07, "loss": 0.8344, "step": 73730 }, { "epoch": 0.8986264975077084, "grad_norm": 1.9002209901809692, "learning_rate": 5.336112892880052e-07, "loss": 0.8245, "step": 73735 }, { "epoch": 0.8986874337318562, "grad_norm": 2.153090238571167, "learning_rate": 5.332905708787684e-07, "loss": 0.7974, "step": 73740 }, { "epoch": 0.898748369956004, "grad_norm": 1.8818473815917969, "learning_rate": 5.329698524695318e-07, "loss": 0.8131, "step": 73745 }, { "epoch": 0.8988093061801519, "grad_norm": 1.8947579860687256, "learning_rate": 5.326491340602951e-07, "loss": 0.703, "step": 73750 }, { "epoch": 0.8988702424042997, "grad_norm": 2.232111930847168, "learning_rate": 5.323284156510585e-07, "loss": 0.7374, "step": 73755 }, { "epoch": 0.8989311786284475, "grad_norm": 2.042140007019043, "learning_rate": 5.320076972418217e-07, "loss": 0.8156, "step": 73760 }, { "epoch": 0.8989921148525952, "grad_norm": 2.0766735076904297, "learning_rate": 5.31686978832585e-07, "loss": 0.7862, "step": 73765 }, { "epoch": 0.899053051076743, "grad_norm": 2.243281841278076, "learning_rate": 5.313662604233483e-07, "loss": 0.8669, "step": 73770 }, { "epoch": 0.8991139873008909, "grad_norm": 2.1675994396209717, "learning_rate": 5.310455420141117e-07, "loss": 0.754, "step": 73775 }, { "epoch": 0.8991749235250387, "grad_norm": 2.0758090019226074, "learning_rate": 5.307248236048749e-07, "loss": 0.908, "step": 73780 }, { "epoch": 0.8992358597491865, "grad_norm": 1.9588180780410767, "learning_rate": 5.304041051956384e-07, "loss": 0.7741, "step": 73785 }, { "epoch": 0.8992967959733343, "grad_norm": 1.8546507358551025, "learning_rate": 5.300833867864016e-07, "loss": 0.834, "step": 73790 }, { "epoch": 0.8993577321974822, "grad_norm": 1.9606796503067017, "learning_rate": 5.297626683771649e-07, "loss": 0.7105, "step": 73795 }, { "epoch": 0.8994186684216299, "grad_norm": 2.0901217460632324, "learning_rate": 5.294419499679281e-07, "loss": 0.8236, "step": 73800 }, { "epoch": 0.8994796046457777, "grad_norm": 2.059889316558838, "learning_rate": 5.291212315586916e-07, "loss": 0.8008, "step": 73805 }, { "epoch": 0.8995405408699255, "grad_norm": 2.0881783962249756, "learning_rate": 5.288005131494548e-07, "loss": 0.7937, "step": 73810 }, { "epoch": 0.8996014770940733, "grad_norm": 2.234757900238037, "learning_rate": 5.284797947402181e-07, "loss": 0.7404, "step": 73815 }, { "epoch": 0.8996624133182212, "grad_norm": 1.8676506280899048, "learning_rate": 5.281590763309815e-07, "loss": 0.8246, "step": 73820 }, { "epoch": 0.899723349542369, "grad_norm": 2.262847900390625, "learning_rate": 5.278383579217448e-07, "loss": 0.8469, "step": 73825 }, { "epoch": 0.8997842857665168, "grad_norm": 2.1652543544769287, "learning_rate": 5.27517639512508e-07, "loss": 0.8866, "step": 73830 }, { "epoch": 0.8998452219906645, "grad_norm": 2.13765549659729, "learning_rate": 5.271969211032714e-07, "loss": 0.823, "step": 73835 }, { "epoch": 0.8999061582148123, "grad_norm": 2.1614534854888916, "learning_rate": 5.268762026940347e-07, "loss": 0.7835, "step": 73840 }, { "epoch": 0.8999670944389602, "grad_norm": 1.853579044342041, "learning_rate": 5.26555484284798e-07, "loss": 0.8674, "step": 73845 }, { "epoch": 0.900028030663108, "grad_norm": 2.258697032928467, "learning_rate": 5.262347658755613e-07, "loss": 0.8183, "step": 73850 }, { "epoch": 0.9000889668872558, "grad_norm": 1.9555203914642334, "learning_rate": 5.259140474663246e-07, "loss": 0.8195, "step": 73855 }, { "epoch": 0.9001499031114036, "grad_norm": 2.3237721920013428, "learning_rate": 5.255933290570879e-07, "loss": 0.8514, "step": 73860 }, { "epoch": 0.9002108393355515, "grad_norm": 1.9389573335647583, "learning_rate": 5.252726106478513e-07, "loss": 0.7577, "step": 73865 }, { "epoch": 0.9002717755596992, "grad_norm": 2.201258659362793, "learning_rate": 5.249518922386145e-07, "loss": 0.8446, "step": 73870 }, { "epoch": 0.900332711783847, "grad_norm": 1.92435884475708, "learning_rate": 5.246311738293778e-07, "loss": 0.8139, "step": 73875 }, { "epoch": 0.9003936480079948, "grad_norm": 2.109182119369507, "learning_rate": 5.243104554201412e-07, "loss": 0.7997, "step": 73880 }, { "epoch": 0.9004545842321426, "grad_norm": 1.8189573287963867, "learning_rate": 5.239897370109045e-07, "loss": 0.7981, "step": 73885 }, { "epoch": 0.9005155204562905, "grad_norm": 2.0731754302978516, "learning_rate": 5.236690186016677e-07, "loss": 0.8557, "step": 73890 }, { "epoch": 0.9005764566804383, "grad_norm": 2.078472375869751, "learning_rate": 5.233483001924311e-07, "loss": 0.7712, "step": 73895 }, { "epoch": 0.9006373929045861, "grad_norm": 2.3663411140441895, "learning_rate": 5.230275817831944e-07, "loss": 0.7864, "step": 73900 }, { "epoch": 0.9006983291287338, "grad_norm": 2.1291353702545166, "learning_rate": 5.227068633739577e-07, "loss": 0.8305, "step": 73905 }, { "epoch": 0.9007592653528816, "grad_norm": 1.707641839981079, "learning_rate": 5.22386144964721e-07, "loss": 0.7876, "step": 73910 }, { "epoch": 0.9008202015770295, "grad_norm": 2.213866949081421, "learning_rate": 5.220654265554843e-07, "loss": 0.7848, "step": 73915 }, { "epoch": 0.9008811378011773, "grad_norm": 2.232158899307251, "learning_rate": 5.217447081462476e-07, "loss": 0.7748, "step": 73920 }, { "epoch": 0.9009420740253251, "grad_norm": 1.8864179849624634, "learning_rate": 5.21423989737011e-07, "loss": 0.7945, "step": 73925 }, { "epoch": 0.9010030102494729, "grad_norm": 1.966742753982544, "learning_rate": 5.211032713277742e-07, "loss": 0.8092, "step": 73930 }, { "epoch": 0.9010639464736208, "grad_norm": 1.899289608001709, "learning_rate": 5.207825529185375e-07, "loss": 0.7951, "step": 73935 }, { "epoch": 0.9011248826977685, "grad_norm": 2.2236902713775635, "learning_rate": 5.204618345093009e-07, "loss": 0.7539, "step": 73940 }, { "epoch": 0.9011858189219163, "grad_norm": 2.0101191997528076, "learning_rate": 5.201411161000642e-07, "loss": 0.8, "step": 73945 }, { "epoch": 0.9012467551460641, "grad_norm": 2.0942189693450928, "learning_rate": 5.198203976908274e-07, "loss": 0.7646, "step": 73950 }, { "epoch": 0.9013076913702119, "grad_norm": 1.6738899946212769, "learning_rate": 5.194996792815908e-07, "loss": 0.6988, "step": 73955 }, { "epoch": 0.9013686275943598, "grad_norm": 1.611084222793579, "learning_rate": 5.191789608723541e-07, "loss": 0.769, "step": 73960 }, { "epoch": 0.9014295638185076, "grad_norm": 2.375844955444336, "learning_rate": 5.188582424631174e-07, "loss": 0.838, "step": 73965 }, { "epoch": 0.9014905000426554, "grad_norm": 2.427577495574951, "learning_rate": 5.185375240538807e-07, "loss": 0.8174, "step": 73970 }, { "epoch": 0.9015514362668031, "grad_norm": 1.934152364730835, "learning_rate": 5.18216805644644e-07, "loss": 0.827, "step": 73975 }, { "epoch": 0.9016123724909509, "grad_norm": 1.8226912021636963, "learning_rate": 5.178960872354073e-07, "loss": 0.7714, "step": 73980 }, { "epoch": 0.9016733087150988, "grad_norm": 1.8176347017288208, "learning_rate": 5.175753688261707e-07, "loss": 0.7785, "step": 73985 }, { "epoch": 0.9017342449392466, "grad_norm": 1.9719228744506836, "learning_rate": 5.172546504169339e-07, "loss": 0.7877, "step": 73990 }, { "epoch": 0.9017951811633944, "grad_norm": 1.896228551864624, "learning_rate": 5.169339320076973e-07, "loss": 0.8285, "step": 73995 }, { "epoch": 0.9018561173875422, "grad_norm": 1.9574724435806274, "learning_rate": 5.166132135984606e-07, "loss": 0.8442, "step": 74000 }, { "epoch": 0.9019170536116901, "grad_norm": 1.9726592302322388, "learning_rate": 5.162924951892239e-07, "loss": 0.8411, "step": 74005 }, { "epoch": 0.9019779898358378, "grad_norm": 2.108497381210327, "learning_rate": 5.159717767799871e-07, "loss": 0.7512, "step": 74010 }, { "epoch": 0.9020389260599856, "grad_norm": 1.9952619075775146, "learning_rate": 5.156510583707506e-07, "loss": 0.7753, "step": 74015 }, { "epoch": 0.9020998622841334, "grad_norm": 1.7834752798080444, "learning_rate": 5.153303399615139e-07, "loss": 0.7598, "step": 74020 }, { "epoch": 0.9021607985082812, "grad_norm": 2.0944268703460693, "learning_rate": 5.150096215522771e-07, "loss": 0.8376, "step": 74025 }, { "epoch": 0.9022217347324291, "grad_norm": 2.087991952896118, "learning_rate": 5.146889031430405e-07, "loss": 0.7986, "step": 74030 }, { "epoch": 0.9022826709565769, "grad_norm": 2.8149657249450684, "learning_rate": 5.143681847338038e-07, "loss": 0.8386, "step": 74035 }, { "epoch": 0.9023436071807247, "grad_norm": 2.0630102157592773, "learning_rate": 5.140474663245671e-07, "loss": 0.7854, "step": 74040 }, { "epoch": 0.9024045434048724, "grad_norm": 1.9387941360473633, "learning_rate": 5.137267479153304e-07, "loss": 0.8129, "step": 74045 }, { "epoch": 0.9024654796290202, "grad_norm": 2.063589096069336, "learning_rate": 5.134060295060937e-07, "loss": 0.774, "step": 74050 }, { "epoch": 0.9025264158531681, "grad_norm": 1.8678220510482788, "learning_rate": 5.13085311096857e-07, "loss": 0.8295, "step": 74055 }, { "epoch": 0.9025873520773159, "grad_norm": 2.2255170345306396, "learning_rate": 5.127645926876204e-07, "loss": 0.7831, "step": 74060 }, { "epoch": 0.9026482883014637, "grad_norm": 2.151193857192993, "learning_rate": 5.124438742783836e-07, "loss": 0.8007, "step": 74065 }, { "epoch": 0.9027092245256115, "grad_norm": 1.9919873476028442, "learning_rate": 5.121231558691469e-07, "loss": 0.732, "step": 74070 }, { "epoch": 0.9027701607497594, "grad_norm": 1.8211970329284668, "learning_rate": 5.118024374599103e-07, "loss": 0.8131, "step": 74075 }, { "epoch": 0.9028310969739071, "grad_norm": 1.8350147008895874, "learning_rate": 5.114817190506736e-07, "loss": 0.6833, "step": 74080 }, { "epoch": 0.9028920331980549, "grad_norm": 1.82084321975708, "learning_rate": 5.111610006414368e-07, "loss": 0.786, "step": 74085 }, { "epoch": 0.9029529694222027, "grad_norm": 2.03483510017395, "learning_rate": 5.108402822322002e-07, "loss": 0.7915, "step": 74090 }, { "epoch": 0.9030139056463505, "grad_norm": 1.7658625841140747, "learning_rate": 5.105195638229635e-07, "loss": 0.8145, "step": 74095 }, { "epoch": 0.9030748418704984, "grad_norm": 2.009953498840332, "learning_rate": 5.101988454137268e-07, "loss": 0.812, "step": 74100 }, { "epoch": 0.9031357780946462, "grad_norm": 1.7202115058898926, "learning_rate": 5.098781270044901e-07, "loss": 0.8282, "step": 74105 }, { "epoch": 0.903196714318794, "grad_norm": 2.113921642303467, "learning_rate": 5.095574085952534e-07, "loss": 0.7914, "step": 74110 }, { "epoch": 0.9032576505429417, "grad_norm": 2.2713193893432617, "learning_rate": 5.092366901860167e-07, "loss": 0.8271, "step": 74115 }, { "epoch": 0.9033185867670895, "grad_norm": 1.9380427598953247, "learning_rate": 5.089159717767801e-07, "loss": 0.7823, "step": 74120 }, { "epoch": 0.9033795229912374, "grad_norm": 1.8157540559768677, "learning_rate": 5.085952533675433e-07, "loss": 0.804, "step": 74125 }, { "epoch": 0.9034404592153852, "grad_norm": 2.0618197917938232, "learning_rate": 5.082745349583066e-07, "loss": 0.8456, "step": 74130 }, { "epoch": 0.903501395439533, "grad_norm": 2.048490047454834, "learning_rate": 5.0795381654907e-07, "loss": 0.7855, "step": 74135 }, { "epoch": 0.9035623316636808, "grad_norm": 2.046578884124756, "learning_rate": 5.076330981398333e-07, "loss": 0.8368, "step": 74140 }, { "epoch": 0.9036232678878287, "grad_norm": 2.3101205825805664, "learning_rate": 5.073123797305965e-07, "loss": 0.8102, "step": 74145 }, { "epoch": 0.9036842041119764, "grad_norm": 2.2140777111053467, "learning_rate": 5.069916613213599e-07, "loss": 0.8176, "step": 74150 }, { "epoch": 0.9037451403361242, "grad_norm": 2.0478785037994385, "learning_rate": 5.066709429121232e-07, "loss": 0.8121, "step": 74155 }, { "epoch": 0.903806076560272, "grad_norm": 2.0935449600219727, "learning_rate": 5.063502245028865e-07, "loss": 0.8625, "step": 74160 }, { "epoch": 0.9038670127844198, "grad_norm": 1.9098012447357178, "learning_rate": 5.060295060936498e-07, "loss": 0.7632, "step": 74165 }, { "epoch": 0.9039279490085677, "grad_norm": 1.8747758865356445, "learning_rate": 5.057087876844132e-07, "loss": 0.8712, "step": 74170 }, { "epoch": 0.9039888852327155, "grad_norm": 1.849746584892273, "learning_rate": 5.053880692751764e-07, "loss": 0.8729, "step": 74175 }, { "epoch": 0.9040498214568633, "grad_norm": 1.9715806245803833, "learning_rate": 5.050673508659398e-07, "loss": 0.8038, "step": 74180 }, { "epoch": 0.904110757681011, "grad_norm": 2.0810658931732178, "learning_rate": 5.04746632456703e-07, "loss": 0.796, "step": 74185 }, { "epoch": 0.9041716939051588, "grad_norm": 1.8294044733047485, "learning_rate": 5.044259140474664e-07, "loss": 0.761, "step": 74190 }, { "epoch": 0.9042326301293067, "grad_norm": 2.0332109928131104, "learning_rate": 5.041051956382297e-07, "loss": 0.8007, "step": 74195 }, { "epoch": 0.9042935663534545, "grad_norm": 2.117960214614868, "learning_rate": 5.03784477228993e-07, "loss": 0.8236, "step": 74200 }, { "epoch": 0.9043545025776023, "grad_norm": 2.317138671875, "learning_rate": 5.034637588197563e-07, "loss": 0.8542, "step": 74205 }, { "epoch": 0.9044154388017501, "grad_norm": 1.6481318473815918, "learning_rate": 5.031430404105197e-07, "loss": 0.7078, "step": 74210 }, { "epoch": 0.904476375025898, "grad_norm": 2.2895166873931885, "learning_rate": 5.028223220012829e-07, "loss": 0.7395, "step": 74215 }, { "epoch": 0.9045373112500457, "grad_norm": 2.1028144359588623, "learning_rate": 5.025016035920462e-07, "loss": 0.7829, "step": 74220 }, { "epoch": 0.9045982474741935, "grad_norm": 2.1569700241088867, "learning_rate": 5.021808851828096e-07, "loss": 0.856, "step": 74225 }, { "epoch": 0.9046591836983413, "grad_norm": 2.212291955947876, "learning_rate": 5.018601667735729e-07, "loss": 0.769, "step": 74230 }, { "epoch": 0.9047201199224891, "grad_norm": 2.1405234336853027, "learning_rate": 5.015394483643361e-07, "loss": 0.8135, "step": 74235 }, { "epoch": 0.904781056146637, "grad_norm": 2.933713674545288, "learning_rate": 5.012187299550995e-07, "loss": 0.859, "step": 74240 }, { "epoch": 0.9048419923707848, "grad_norm": 1.8536643981933594, "learning_rate": 5.008980115458628e-07, "loss": 0.7353, "step": 74245 }, { "epoch": 0.9049029285949326, "grad_norm": 1.8993338346481323, "learning_rate": 5.005772931366261e-07, "loss": 0.8148, "step": 74250 }, { "epoch": 0.9049638648190803, "grad_norm": 1.6084415912628174, "learning_rate": 5.002565747273894e-07, "loss": 0.7759, "step": 74255 }, { "epoch": 0.9050248010432281, "grad_norm": 2.2682557106018066, "learning_rate": 4.999358563181527e-07, "loss": 0.8571, "step": 74260 }, { "epoch": 0.905085737267376, "grad_norm": 2.1028542518615723, "learning_rate": 4.99615137908916e-07, "loss": 0.8221, "step": 74265 }, { "epoch": 0.9051466734915238, "grad_norm": 2.1240181922912598, "learning_rate": 4.992944194996794e-07, "loss": 0.7217, "step": 74270 }, { "epoch": 0.9052076097156716, "grad_norm": 1.9061425924301147, "learning_rate": 4.989737010904426e-07, "loss": 0.8223, "step": 74275 }, { "epoch": 0.9052685459398194, "grad_norm": 1.8923311233520508, "learning_rate": 4.986529826812059e-07, "loss": 0.8271, "step": 74280 }, { "epoch": 0.9053294821639672, "grad_norm": 2.256089210510254, "learning_rate": 4.983322642719693e-07, "loss": 0.846, "step": 74285 }, { "epoch": 0.905390418388115, "grad_norm": 1.7583729028701782, "learning_rate": 4.980115458627326e-07, "loss": 0.758, "step": 74290 }, { "epoch": 0.9054513546122628, "grad_norm": 2.1316771507263184, "learning_rate": 4.976908274534958e-07, "loss": 0.7472, "step": 74295 }, { "epoch": 0.9055122908364106, "grad_norm": 1.9508309364318848, "learning_rate": 4.973701090442592e-07, "loss": 0.7494, "step": 74300 }, { "epoch": 0.9055732270605584, "grad_norm": 2.1187994480133057, "learning_rate": 4.970493906350225e-07, "loss": 0.8099, "step": 74305 }, { "epoch": 0.9056341632847063, "grad_norm": 1.9539910554885864, "learning_rate": 4.967286722257858e-07, "loss": 0.8636, "step": 74310 }, { "epoch": 0.9056950995088541, "grad_norm": 2.1283154487609863, "learning_rate": 4.96407953816549e-07, "loss": 0.8289, "step": 74315 }, { "epoch": 0.9057560357330018, "grad_norm": 1.6623672246932983, "learning_rate": 4.960872354073124e-07, "loss": 0.8295, "step": 74320 }, { "epoch": 0.9058169719571496, "grad_norm": 1.8181928396224976, "learning_rate": 4.957665169980757e-07, "loss": 0.7579, "step": 74325 }, { "epoch": 0.9058779081812974, "grad_norm": 1.8232680559158325, "learning_rate": 4.95445798588839e-07, "loss": 0.7626, "step": 74330 }, { "epoch": 0.9059388444054453, "grad_norm": 1.883330225944519, "learning_rate": 4.951250801796024e-07, "loss": 0.8086, "step": 74335 }, { "epoch": 0.9059997806295931, "grad_norm": 1.9428327083587646, "learning_rate": 4.948043617703656e-07, "loss": 0.7681, "step": 74340 }, { "epoch": 0.9060607168537409, "grad_norm": 1.802334189414978, "learning_rate": 4.94483643361129e-07, "loss": 0.7574, "step": 74345 }, { "epoch": 0.9061216530778887, "grad_norm": 1.988925814628601, "learning_rate": 4.941629249518923e-07, "loss": 0.7887, "step": 74350 }, { "epoch": 0.9061825893020364, "grad_norm": 1.9780807495117188, "learning_rate": 4.938422065426556e-07, "loss": 0.749, "step": 74355 }, { "epoch": 0.9062435255261843, "grad_norm": 2.1209256649017334, "learning_rate": 4.935214881334188e-07, "loss": 0.8981, "step": 74360 }, { "epoch": 0.9063044617503321, "grad_norm": 1.9950318336486816, "learning_rate": 4.932007697241823e-07, "loss": 0.7573, "step": 74365 }, { "epoch": 0.9063653979744799, "grad_norm": 2.0521225929260254, "learning_rate": 4.928800513149455e-07, "loss": 0.8607, "step": 74370 }, { "epoch": 0.9064263341986277, "grad_norm": 1.7905343770980835, "learning_rate": 4.925593329057088e-07, "loss": 0.8145, "step": 74375 }, { "epoch": 0.9064872704227755, "grad_norm": 1.657140851020813, "learning_rate": 4.922386144964722e-07, "loss": 0.8219, "step": 74380 }, { "epoch": 0.9065482066469234, "grad_norm": 1.8831688165664673, "learning_rate": 4.919178960872355e-07, "loss": 0.8499, "step": 74385 }, { "epoch": 0.9066091428710711, "grad_norm": 1.936854600906372, "learning_rate": 4.915971776779987e-07, "loss": 0.7817, "step": 74390 }, { "epoch": 0.9066700790952189, "grad_norm": 2.354403257369995, "learning_rate": 4.912764592687621e-07, "loss": 0.835, "step": 74395 }, { "epoch": 0.9067310153193667, "grad_norm": 1.9879201650619507, "learning_rate": 4.909557408595254e-07, "loss": 0.8243, "step": 74400 }, { "epoch": 0.9067919515435146, "grad_norm": 1.9811888933181763, "learning_rate": 4.906350224502887e-07, "loss": 0.7695, "step": 74405 }, { "epoch": 0.9068528877676624, "grad_norm": 1.8828376531600952, "learning_rate": 4.90314304041052e-07, "loss": 0.7778, "step": 74410 }, { "epoch": 0.9069138239918102, "grad_norm": 1.9420275688171387, "learning_rate": 4.899935856318153e-07, "loss": 0.8831, "step": 74415 }, { "epoch": 0.906974760215958, "grad_norm": 2.261974573135376, "learning_rate": 4.896728672225786e-07, "loss": 0.8502, "step": 74420 }, { "epoch": 0.9070356964401057, "grad_norm": 2.1133458614349365, "learning_rate": 4.89352148813342e-07, "loss": 0.7999, "step": 74425 }, { "epoch": 0.9070966326642536, "grad_norm": 2.1004996299743652, "learning_rate": 4.890314304041052e-07, "loss": 0.831, "step": 74430 }, { "epoch": 0.9071575688884014, "grad_norm": 1.9201656579971313, "learning_rate": 4.887107119948685e-07, "loss": 0.7749, "step": 74435 }, { "epoch": 0.9072185051125492, "grad_norm": 2.516740083694458, "learning_rate": 4.883899935856319e-07, "loss": 0.8055, "step": 74440 }, { "epoch": 0.907279441336697, "grad_norm": 2.175724506378174, "learning_rate": 4.880692751763952e-07, "loss": 0.8896, "step": 74445 }, { "epoch": 0.9073403775608448, "grad_norm": 2.0515453815460205, "learning_rate": 4.877485567671584e-07, "loss": 0.802, "step": 74450 }, { "epoch": 0.9074013137849927, "grad_norm": 2.0994179248809814, "learning_rate": 4.874278383579218e-07, "loss": 0.8615, "step": 74455 }, { "epoch": 0.9074622500091404, "grad_norm": 2.501156806945801, "learning_rate": 4.871071199486851e-07, "loss": 0.8265, "step": 74460 }, { "epoch": 0.9075231862332882, "grad_norm": 2.044224262237549, "learning_rate": 4.867864015394484e-07, "loss": 0.8328, "step": 74465 }, { "epoch": 0.907584122457436, "grad_norm": 2.2263081073760986, "learning_rate": 4.864656831302117e-07, "loss": 0.7937, "step": 74470 }, { "epoch": 0.9076450586815838, "grad_norm": 1.9440277814865112, "learning_rate": 4.86144964720975e-07, "loss": 0.8519, "step": 74475 }, { "epoch": 0.9077059949057317, "grad_norm": 1.9587808847427368, "learning_rate": 4.858242463117383e-07, "loss": 0.7727, "step": 74480 }, { "epoch": 0.9077669311298795, "grad_norm": 1.9076281785964966, "learning_rate": 4.855035279025017e-07, "loss": 0.8297, "step": 74485 }, { "epoch": 0.9078278673540273, "grad_norm": 2.208848237991333, "learning_rate": 4.851828094932649e-07, "loss": 0.8293, "step": 74490 }, { "epoch": 0.907888803578175, "grad_norm": 1.8422294855117798, "learning_rate": 4.848620910840282e-07, "loss": 0.7958, "step": 74495 }, { "epoch": 0.9079497398023229, "grad_norm": 1.9612278938293457, "learning_rate": 4.845413726747916e-07, "loss": 0.8037, "step": 74500 }, { "epoch": 0.9080106760264707, "grad_norm": 1.8797045946121216, "learning_rate": 4.842206542655549e-07, "loss": 0.7469, "step": 74505 }, { "epoch": 0.9080716122506185, "grad_norm": 2.2996833324432373, "learning_rate": 4.838999358563181e-07, "loss": 0.8514, "step": 74510 }, { "epoch": 0.9081325484747663, "grad_norm": 1.8639037609100342, "learning_rate": 4.835792174470815e-07, "loss": 0.7975, "step": 74515 }, { "epoch": 0.9081934846989141, "grad_norm": 1.8490647077560425, "learning_rate": 4.832584990378448e-07, "loss": 0.7623, "step": 74520 }, { "epoch": 0.908254420923062, "grad_norm": 2.2150461673736572, "learning_rate": 4.829377806286081e-07, "loss": 0.8367, "step": 74525 }, { "epoch": 0.9083153571472097, "grad_norm": 1.9602539539337158, "learning_rate": 4.826170622193714e-07, "loss": 0.8028, "step": 74530 }, { "epoch": 0.9083762933713575, "grad_norm": 1.83163583278656, "learning_rate": 4.822963438101347e-07, "loss": 0.8266, "step": 74535 }, { "epoch": 0.9084372295955053, "grad_norm": 2.064582347869873, "learning_rate": 4.81975625400898e-07, "loss": 0.8529, "step": 74540 }, { "epoch": 0.9084981658196531, "grad_norm": 2.001220941543579, "learning_rate": 4.816549069916614e-07, "loss": 0.7732, "step": 74545 }, { "epoch": 0.908559102043801, "grad_norm": 2.521036386489868, "learning_rate": 4.813341885824246e-07, "loss": 0.8296, "step": 74550 }, { "epoch": 0.9086200382679488, "grad_norm": 2.4090685844421387, "learning_rate": 4.81013470173188e-07, "loss": 0.7947, "step": 74555 }, { "epoch": 0.9086809744920966, "grad_norm": 2.481186866760254, "learning_rate": 4.806927517639513e-07, "loss": 0.7973, "step": 74560 }, { "epoch": 0.9087419107162443, "grad_norm": 2.293583631515503, "learning_rate": 4.803720333547146e-07, "loss": 0.8454, "step": 74565 }, { "epoch": 0.9088028469403922, "grad_norm": 1.9827855825424194, "learning_rate": 4.800513149454778e-07, "loss": 0.7836, "step": 74570 }, { "epoch": 0.90886378316454, "grad_norm": 2.005281925201416, "learning_rate": 4.797305965362413e-07, "loss": 0.7877, "step": 74575 }, { "epoch": 0.9089247193886878, "grad_norm": 1.8931702375411987, "learning_rate": 4.794098781270045e-07, "loss": 0.8906, "step": 74580 }, { "epoch": 0.9089856556128356, "grad_norm": 1.7722516059875488, "learning_rate": 4.790891597177678e-07, "loss": 0.7466, "step": 74585 }, { "epoch": 0.9090465918369834, "grad_norm": 2.332554340362549, "learning_rate": 4.787684413085312e-07, "loss": 0.895, "step": 74590 }, { "epoch": 0.9091075280611313, "grad_norm": 1.9027634859085083, "learning_rate": 4.784477228992945e-07, "loss": 0.7851, "step": 74595 }, { "epoch": 0.909168464285279, "grad_norm": 2.2503418922424316, "learning_rate": 4.781270044900577e-07, "loss": 0.8055, "step": 74600 }, { "epoch": 0.9092294005094268, "grad_norm": 2.2132160663604736, "learning_rate": 4.778062860808211e-07, "loss": 0.7879, "step": 74605 }, { "epoch": 0.9092903367335746, "grad_norm": 1.865771770477295, "learning_rate": 4.774855676715844e-07, "loss": 0.7577, "step": 74610 }, { "epoch": 0.9093512729577224, "grad_norm": 1.821985125541687, "learning_rate": 4.771648492623477e-07, "loss": 0.7509, "step": 74615 }, { "epoch": 0.9094122091818703, "grad_norm": 1.8674620389938354, "learning_rate": 4.76844130853111e-07, "loss": 0.7786, "step": 74620 }, { "epoch": 0.9094731454060181, "grad_norm": 1.6852551698684692, "learning_rate": 4.765234124438743e-07, "loss": 0.9143, "step": 74625 }, { "epoch": 0.9095340816301659, "grad_norm": 1.9167064428329468, "learning_rate": 4.762026940346377e-07, "loss": 0.8001, "step": 74630 }, { "epoch": 0.9095950178543136, "grad_norm": 2.1920852661132812, "learning_rate": 4.758819756254009e-07, "loss": 0.8539, "step": 74635 }, { "epoch": 0.9096559540784614, "grad_norm": 1.8847434520721436, "learning_rate": 4.755612572161643e-07, "loss": 0.798, "step": 74640 }, { "epoch": 0.9097168903026093, "grad_norm": 2.829132318496704, "learning_rate": 4.7524053880692753e-07, "loss": 0.8341, "step": 74645 }, { "epoch": 0.9097778265267571, "grad_norm": 2.035240888595581, "learning_rate": 4.749198203976909e-07, "loss": 0.7861, "step": 74650 }, { "epoch": 0.9098387627509049, "grad_norm": 1.9730005264282227, "learning_rate": 4.7459910198845415e-07, "loss": 0.8135, "step": 74655 }, { "epoch": 0.9098996989750527, "grad_norm": 2.251471519470215, "learning_rate": 4.7427838357921753e-07, "loss": 0.7998, "step": 74660 }, { "epoch": 0.9099606351992006, "grad_norm": 2.0199968814849854, "learning_rate": 4.739576651699808e-07, "loss": 0.7064, "step": 74665 }, { "epoch": 0.9100215714233483, "grad_norm": 1.9616284370422363, "learning_rate": 4.7363694676074415e-07, "loss": 0.8319, "step": 74670 }, { "epoch": 0.9100825076474961, "grad_norm": 1.6849658489227295, "learning_rate": 4.7331622835150743e-07, "loss": 0.7641, "step": 74675 }, { "epoch": 0.9101434438716439, "grad_norm": 2.1465814113616943, "learning_rate": 4.7299550994227077e-07, "loss": 0.7988, "step": 74680 }, { "epoch": 0.9102043800957917, "grad_norm": 1.9136369228363037, "learning_rate": 4.7267479153303405e-07, "loss": 0.8075, "step": 74685 }, { "epoch": 0.9102653163199396, "grad_norm": 2.2539241313934326, "learning_rate": 4.723540731237974e-07, "loss": 0.8417, "step": 74690 }, { "epoch": 0.9103262525440874, "grad_norm": 2.079643487930298, "learning_rate": 4.7203335471456066e-07, "loss": 0.7899, "step": 74695 }, { "epoch": 0.9103871887682352, "grad_norm": 1.8949493169784546, "learning_rate": 4.71712636305324e-07, "loss": 0.8877, "step": 74700 }, { "epoch": 0.9104481249923829, "grad_norm": 1.5675333738327026, "learning_rate": 4.713919178960873e-07, "loss": 0.8445, "step": 74705 }, { "epoch": 0.9105090612165307, "grad_norm": 1.8811416625976562, "learning_rate": 4.710711994868506e-07, "loss": 0.8071, "step": 74710 }, { "epoch": 0.9105699974406786, "grad_norm": 2.498516798019409, "learning_rate": 4.707504810776139e-07, "loss": 0.8416, "step": 74715 }, { "epoch": 0.9106309336648264, "grad_norm": 1.8763008117675781, "learning_rate": 4.7042976266837723e-07, "loss": 0.7926, "step": 74720 }, { "epoch": 0.9106918698889742, "grad_norm": 2.3046555519104004, "learning_rate": 4.701090442591405e-07, "loss": 0.8235, "step": 74725 }, { "epoch": 0.910752806113122, "grad_norm": 1.7298004627227783, "learning_rate": 4.6978832584990385e-07, "loss": 0.7071, "step": 74730 }, { "epoch": 0.9108137423372699, "grad_norm": 1.9717507362365723, "learning_rate": 4.6946760744066713e-07, "loss": 0.7673, "step": 74735 }, { "epoch": 0.9108746785614176, "grad_norm": 2.158059597015381, "learning_rate": 4.6914688903143046e-07, "loss": 0.7886, "step": 74740 }, { "epoch": 0.9109356147855654, "grad_norm": 2.732944965362549, "learning_rate": 4.6882617062219374e-07, "loss": 0.8127, "step": 74745 }, { "epoch": 0.9109965510097132, "grad_norm": 1.9052499532699585, "learning_rate": 4.685054522129571e-07, "loss": 0.7689, "step": 74750 }, { "epoch": 0.911057487233861, "grad_norm": 2.022847890853882, "learning_rate": 4.6818473380372036e-07, "loss": 0.8492, "step": 74755 }, { "epoch": 0.9111184234580089, "grad_norm": 1.9302659034729004, "learning_rate": 4.678640153944837e-07, "loss": 0.8539, "step": 74760 }, { "epoch": 0.9111793596821567, "grad_norm": 1.7821106910705566, "learning_rate": 4.67543296985247e-07, "loss": 0.792, "step": 74765 }, { "epoch": 0.9112402959063045, "grad_norm": 2.17467999458313, "learning_rate": 4.672225785760103e-07, "loss": 0.8219, "step": 74770 }, { "epoch": 0.9113012321304522, "grad_norm": 2.3924672603607178, "learning_rate": 4.669018601667736e-07, "loss": 0.7465, "step": 74775 }, { "epoch": 0.9113621683546, "grad_norm": 1.9056168794631958, "learning_rate": 4.665811417575369e-07, "loss": 0.7981, "step": 74780 }, { "epoch": 0.9114231045787479, "grad_norm": 1.7938531637191772, "learning_rate": 4.662604233483002e-07, "loss": 0.8091, "step": 74785 }, { "epoch": 0.9114840408028957, "grad_norm": 2.458132266998291, "learning_rate": 4.6593970493906354e-07, "loss": 0.8029, "step": 74790 }, { "epoch": 0.9115449770270435, "grad_norm": 2.18281888961792, "learning_rate": 4.656189865298268e-07, "loss": 0.7486, "step": 74795 }, { "epoch": 0.9116059132511913, "grad_norm": 1.6797003746032715, "learning_rate": 4.6529826812059016e-07, "loss": 0.8492, "step": 74800 }, { "epoch": 0.9116668494753392, "grad_norm": 1.7830504179000854, "learning_rate": 4.6497754971135344e-07, "loss": 0.7795, "step": 74805 }, { "epoch": 0.9117277856994869, "grad_norm": 2.012634754180908, "learning_rate": 4.6465683130211677e-07, "loss": 0.7522, "step": 74810 }, { "epoch": 0.9117887219236347, "grad_norm": 2.4548075199127197, "learning_rate": 4.6433611289288005e-07, "loss": 0.8109, "step": 74815 }, { "epoch": 0.9118496581477825, "grad_norm": 2.359010696411133, "learning_rate": 4.640153944836434e-07, "loss": 0.827, "step": 74820 }, { "epoch": 0.9119105943719303, "grad_norm": 1.9723289012908936, "learning_rate": 4.6369467607440667e-07, "loss": 0.8071, "step": 74825 }, { "epoch": 0.9119715305960782, "grad_norm": 1.7203822135925293, "learning_rate": 4.6337395766517e-07, "loss": 0.7491, "step": 74830 }, { "epoch": 0.912032466820226, "grad_norm": 2.080477476119995, "learning_rate": 4.630532392559333e-07, "loss": 0.8407, "step": 74835 }, { "epoch": 0.9120934030443738, "grad_norm": 2.004549264907837, "learning_rate": 4.6273252084669667e-07, "loss": 0.8231, "step": 74840 }, { "epoch": 0.9121543392685215, "grad_norm": 2.1144821643829346, "learning_rate": 4.624118024374599e-07, "loss": 0.8066, "step": 74845 }, { "epoch": 0.9122152754926693, "grad_norm": 2.0215344429016113, "learning_rate": 4.620910840282233e-07, "loss": 0.787, "step": 74850 }, { "epoch": 0.9122762117168172, "grad_norm": 1.6573690176010132, "learning_rate": 4.617703656189865e-07, "loss": 0.8175, "step": 74855 }, { "epoch": 0.912337147940965, "grad_norm": 1.976012110710144, "learning_rate": 4.614496472097499e-07, "loss": 0.784, "step": 74860 }, { "epoch": 0.9123980841651128, "grad_norm": 1.995246171951294, "learning_rate": 4.6112892880051313e-07, "loss": 0.7522, "step": 74865 }, { "epoch": 0.9124590203892606, "grad_norm": 2.039891242980957, "learning_rate": 4.608082103912765e-07, "loss": 0.8542, "step": 74870 }, { "epoch": 0.9125199566134085, "grad_norm": 2.028825044631958, "learning_rate": 4.604874919820398e-07, "loss": 0.8366, "step": 74875 }, { "epoch": 0.9125808928375562, "grad_norm": 1.7694206237792969, "learning_rate": 4.6016677357280314e-07, "loss": 0.8646, "step": 74880 }, { "epoch": 0.912641829061704, "grad_norm": 2.1697545051574707, "learning_rate": 4.598460551635664e-07, "loss": 0.8778, "step": 74885 }, { "epoch": 0.9127027652858518, "grad_norm": 2.043205738067627, "learning_rate": 4.5952533675432975e-07, "loss": 0.7855, "step": 74890 }, { "epoch": 0.9127637015099996, "grad_norm": 1.933834195137024, "learning_rate": 4.5920461834509303e-07, "loss": 0.8755, "step": 74895 }, { "epoch": 0.9128246377341475, "grad_norm": 1.8853398561477661, "learning_rate": 4.5888389993585637e-07, "loss": 0.7938, "step": 74900 }, { "epoch": 0.9128855739582953, "grad_norm": 1.9487110376358032, "learning_rate": 4.5856318152661965e-07, "loss": 0.7705, "step": 74905 }, { "epoch": 0.9129465101824431, "grad_norm": 2.63922381401062, "learning_rate": 4.58242463117383e-07, "loss": 0.8152, "step": 74910 }, { "epoch": 0.9130074464065908, "grad_norm": 1.7844762802124023, "learning_rate": 4.5792174470814627e-07, "loss": 0.79, "step": 74915 }, { "epoch": 0.9130683826307386, "grad_norm": 1.6898666620254517, "learning_rate": 4.576010262989096e-07, "loss": 0.7835, "step": 74920 }, { "epoch": 0.9131293188548865, "grad_norm": 1.8688186407089233, "learning_rate": 4.5728030788967293e-07, "loss": 0.8346, "step": 74925 }, { "epoch": 0.9131902550790343, "grad_norm": 1.8983722925186157, "learning_rate": 4.569595894804362e-07, "loss": 0.788, "step": 74930 }, { "epoch": 0.9132511913031821, "grad_norm": 2.2281417846679688, "learning_rate": 4.5663887107119955e-07, "loss": 0.8311, "step": 74935 }, { "epoch": 0.9133121275273299, "grad_norm": 1.9324572086334229, "learning_rate": 4.5631815266196283e-07, "loss": 0.759, "step": 74940 }, { "epoch": 0.9133730637514778, "grad_norm": 2.205890417098999, "learning_rate": 4.5599743425272617e-07, "loss": 0.8018, "step": 74945 }, { "epoch": 0.9134339999756255, "grad_norm": 2.097736358642578, "learning_rate": 4.5567671584348945e-07, "loss": 0.8301, "step": 74950 }, { "epoch": 0.9134949361997733, "grad_norm": 2.150308847427368, "learning_rate": 4.553559974342528e-07, "loss": 0.8527, "step": 74955 }, { "epoch": 0.9135558724239211, "grad_norm": 2.052504777908325, "learning_rate": 4.5503527902501606e-07, "loss": 0.7957, "step": 74960 }, { "epoch": 0.9136168086480689, "grad_norm": 1.988661766052246, "learning_rate": 4.547145606157794e-07, "loss": 0.8235, "step": 74965 }, { "epoch": 0.9136777448722168, "grad_norm": 1.9450232982635498, "learning_rate": 4.543938422065427e-07, "loss": 0.7903, "step": 74970 }, { "epoch": 0.9137386810963646, "grad_norm": 2.1395602226257324, "learning_rate": 4.54073123797306e-07, "loss": 0.8216, "step": 74975 }, { "epoch": 0.9137996173205124, "grad_norm": 2.4597792625427246, "learning_rate": 4.537524053880693e-07, "loss": 0.7603, "step": 74980 }, { "epoch": 0.9138605535446601, "grad_norm": 2.1920690536499023, "learning_rate": 4.5343168697883263e-07, "loss": 0.9196, "step": 74985 }, { "epoch": 0.9139214897688079, "grad_norm": 1.912989854812622, "learning_rate": 4.531109685695959e-07, "loss": 0.842, "step": 74990 }, { "epoch": 0.9139824259929558, "grad_norm": 1.9129899740219116, "learning_rate": 4.5279025016035925e-07, "loss": 0.8039, "step": 74995 }, { "epoch": 0.9140433622171036, "grad_norm": 1.9439057111740112, "learning_rate": 4.5246953175112253e-07, "loss": 0.8167, "step": 75000 }, { "epoch": 0.9141042984412514, "grad_norm": 2.1397452354431152, "learning_rate": 4.5214881334188586e-07, "loss": 0.8092, "step": 75005 }, { "epoch": 0.9141652346653992, "grad_norm": 1.8888086080551147, "learning_rate": 4.5182809493264914e-07, "loss": 0.7514, "step": 75010 }, { "epoch": 0.914226170889547, "grad_norm": 1.9072939157485962, "learning_rate": 4.5150737652341253e-07, "loss": 0.7591, "step": 75015 }, { "epoch": 0.9142871071136948, "grad_norm": 1.8285939693450928, "learning_rate": 4.5118665811417576e-07, "loss": 0.8358, "step": 75020 }, { "epoch": 0.9143480433378426, "grad_norm": 2.308305025100708, "learning_rate": 4.5086593970493915e-07, "loss": 0.8206, "step": 75025 }, { "epoch": 0.9144089795619904, "grad_norm": 1.6796151399612427, "learning_rate": 4.505452212957024e-07, "loss": 0.7807, "step": 75030 }, { "epoch": 0.9144699157861382, "grad_norm": 1.7152074575424194, "learning_rate": 4.5022450288646576e-07, "loss": 0.7898, "step": 75035 }, { "epoch": 0.914530852010286, "grad_norm": 1.809317946434021, "learning_rate": 4.49903784477229e-07, "loss": 0.7897, "step": 75040 }, { "epoch": 0.9145917882344339, "grad_norm": 1.7192883491516113, "learning_rate": 4.495830660679924e-07, "loss": 0.7677, "step": 75045 }, { "epoch": 0.9146527244585817, "grad_norm": 2.0364797115325928, "learning_rate": 4.4926234765875566e-07, "loss": 0.8286, "step": 75050 }, { "epoch": 0.9147136606827294, "grad_norm": 2.010770082473755, "learning_rate": 4.48941629249519e-07, "loss": 0.7544, "step": 75055 }, { "epoch": 0.9147745969068772, "grad_norm": 1.7576630115509033, "learning_rate": 4.486209108402823e-07, "loss": 0.793, "step": 75060 }, { "epoch": 0.9148355331310251, "grad_norm": 2.1651525497436523, "learning_rate": 4.483001924310456e-07, "loss": 0.7581, "step": 75065 }, { "epoch": 0.9148964693551729, "grad_norm": 1.748414158821106, "learning_rate": 4.479794740218089e-07, "loss": 0.8368, "step": 75070 }, { "epoch": 0.9149574055793207, "grad_norm": 1.836614727973938, "learning_rate": 4.476587556125722e-07, "loss": 0.813, "step": 75075 }, { "epoch": 0.9150183418034685, "grad_norm": 1.940445065498352, "learning_rate": 4.473380372033355e-07, "loss": 0.8357, "step": 75080 }, { "epoch": 0.9150792780276163, "grad_norm": 2.0645549297332764, "learning_rate": 4.4701731879409884e-07, "loss": 0.8172, "step": 75085 }, { "epoch": 0.9151402142517641, "grad_norm": 2.24676775932312, "learning_rate": 4.466966003848621e-07, "loss": 0.7582, "step": 75090 }, { "epoch": 0.9152011504759119, "grad_norm": 1.8367739915847778, "learning_rate": 4.4637588197562546e-07, "loss": 0.7987, "step": 75095 }, { "epoch": 0.9152620867000597, "grad_norm": 1.8016294240951538, "learning_rate": 4.4605516356638874e-07, "loss": 0.7639, "step": 75100 }, { "epoch": 0.9153230229242075, "grad_norm": 1.8209748268127441, "learning_rate": 4.4573444515715207e-07, "loss": 0.9458, "step": 75105 }, { "epoch": 0.9153839591483554, "grad_norm": 1.9970906972885132, "learning_rate": 4.4541372674791535e-07, "loss": 0.7793, "step": 75110 }, { "epoch": 0.9154448953725032, "grad_norm": 1.8403702974319458, "learning_rate": 4.450930083386787e-07, "loss": 0.8127, "step": 75115 }, { "epoch": 0.915505831596651, "grad_norm": 2.1321332454681396, "learning_rate": 4.4477228992944197e-07, "loss": 0.7987, "step": 75120 }, { "epoch": 0.9155667678207987, "grad_norm": 1.8073365688323975, "learning_rate": 4.444515715202053e-07, "loss": 0.7523, "step": 75125 }, { "epoch": 0.9156277040449465, "grad_norm": 2.024583101272583, "learning_rate": 4.441308531109686e-07, "loss": 0.7478, "step": 75130 }, { "epoch": 0.9156886402690944, "grad_norm": 1.858600378036499, "learning_rate": 4.438101347017319e-07, "loss": 0.8549, "step": 75135 }, { "epoch": 0.9157495764932422, "grad_norm": 1.8992148637771606, "learning_rate": 4.434894162924952e-07, "loss": 0.7568, "step": 75140 }, { "epoch": 0.91581051271739, "grad_norm": 1.981439471244812, "learning_rate": 4.4316869788325854e-07, "loss": 0.861, "step": 75145 }, { "epoch": 0.9158714489415378, "grad_norm": 1.7752803564071655, "learning_rate": 4.428479794740218e-07, "loss": 0.8124, "step": 75150 }, { "epoch": 0.9159323851656856, "grad_norm": 1.876877784729004, "learning_rate": 4.4252726106478515e-07, "loss": 0.7943, "step": 75155 }, { "epoch": 0.9159933213898334, "grad_norm": 2.0071985721588135, "learning_rate": 4.4220654265554843e-07, "loss": 0.7981, "step": 75160 }, { "epoch": 0.9160542576139812, "grad_norm": 2.0218749046325684, "learning_rate": 4.4188582424631177e-07, "loss": 0.8762, "step": 75165 }, { "epoch": 0.916115193838129, "grad_norm": 2.1980504989624023, "learning_rate": 4.4156510583707505e-07, "loss": 0.6949, "step": 75170 }, { "epoch": 0.9161761300622768, "grad_norm": 2.4404938220977783, "learning_rate": 4.412443874278384e-07, "loss": 0.7875, "step": 75175 }, { "epoch": 0.9162370662864247, "grad_norm": 1.8600674867630005, "learning_rate": 4.4092366901860167e-07, "loss": 0.8565, "step": 75180 }, { "epoch": 0.9162980025105725, "grad_norm": 2.2038424015045166, "learning_rate": 4.40602950609365e-07, "loss": 0.7836, "step": 75185 }, { "epoch": 0.9163589387347203, "grad_norm": 1.8171954154968262, "learning_rate": 4.402822322001283e-07, "loss": 0.7551, "step": 75190 }, { "epoch": 0.916419874958868, "grad_norm": 1.8717565536499023, "learning_rate": 4.399615137908916e-07, "loss": 0.8164, "step": 75195 }, { "epoch": 0.9164808111830158, "grad_norm": 2.246119976043701, "learning_rate": 4.396407953816549e-07, "loss": 0.8014, "step": 75200 }, { "epoch": 0.9165417474071637, "grad_norm": 2.1118650436401367, "learning_rate": 4.3932007697241823e-07, "loss": 0.782, "step": 75205 }, { "epoch": 0.9166026836313115, "grad_norm": 1.8550746440887451, "learning_rate": 4.389993585631815e-07, "loss": 0.8402, "step": 75210 }, { "epoch": 0.9166636198554593, "grad_norm": 1.9558637142181396, "learning_rate": 4.3867864015394485e-07, "loss": 0.7402, "step": 75215 }, { "epoch": 0.9167245560796071, "grad_norm": 2.07761812210083, "learning_rate": 4.3835792174470813e-07, "loss": 0.7768, "step": 75220 }, { "epoch": 0.916785492303755, "grad_norm": 1.9173253774642944, "learning_rate": 4.380372033354715e-07, "loss": 0.8313, "step": 75225 }, { "epoch": 0.9168464285279027, "grad_norm": 1.9107234477996826, "learning_rate": 4.3771648492623485e-07, "loss": 0.8098, "step": 75230 }, { "epoch": 0.9169073647520505, "grad_norm": 1.9836598634719849, "learning_rate": 4.3739576651699813e-07, "loss": 0.815, "step": 75235 }, { "epoch": 0.9169683009761983, "grad_norm": 2.143298864364624, "learning_rate": 4.3707504810776147e-07, "loss": 0.7849, "step": 75240 }, { "epoch": 0.9170292372003461, "grad_norm": 1.8523131608963013, "learning_rate": 4.3675432969852475e-07, "loss": 0.7906, "step": 75245 }, { "epoch": 0.917090173424494, "grad_norm": 1.7362068891525269, "learning_rate": 4.364336112892881e-07, "loss": 0.816, "step": 75250 }, { "epoch": 0.9171511096486418, "grad_norm": 2.251373529434204, "learning_rate": 4.3611289288005136e-07, "loss": 0.7942, "step": 75255 }, { "epoch": 0.9172120458727895, "grad_norm": 1.8748371601104736, "learning_rate": 4.357921744708147e-07, "loss": 0.8021, "step": 75260 }, { "epoch": 0.9172729820969373, "grad_norm": 1.835628867149353, "learning_rate": 4.35471456061578e-07, "loss": 0.7841, "step": 75265 }, { "epoch": 0.9173339183210851, "grad_norm": 2.0142903327941895, "learning_rate": 4.351507376523413e-07, "loss": 0.7416, "step": 75270 }, { "epoch": 0.917394854545233, "grad_norm": 1.9222182035446167, "learning_rate": 4.348300192431046e-07, "loss": 0.7949, "step": 75275 }, { "epoch": 0.9174557907693808, "grad_norm": 2.1599349975585938, "learning_rate": 4.3450930083386793e-07, "loss": 0.7885, "step": 75280 }, { "epoch": 0.9175167269935286, "grad_norm": 2.1987478733062744, "learning_rate": 4.341885824246312e-07, "loss": 0.7691, "step": 75285 }, { "epoch": 0.9175776632176764, "grad_norm": 1.9904508590698242, "learning_rate": 4.3386786401539455e-07, "loss": 0.8942, "step": 75290 }, { "epoch": 0.9176385994418241, "grad_norm": 1.8749430179595947, "learning_rate": 4.3354714560615783e-07, "loss": 0.8277, "step": 75295 }, { "epoch": 0.917699535665972, "grad_norm": 2.213902473449707, "learning_rate": 4.3322642719692116e-07, "loss": 0.8858, "step": 75300 }, { "epoch": 0.9177604718901198, "grad_norm": 1.9922518730163574, "learning_rate": 4.3290570878768444e-07, "loss": 0.8272, "step": 75305 }, { "epoch": 0.9178214081142676, "grad_norm": 2.024744987487793, "learning_rate": 4.325849903784478e-07, "loss": 0.7771, "step": 75310 }, { "epoch": 0.9178823443384154, "grad_norm": 1.8727360963821411, "learning_rate": 4.3226427196921106e-07, "loss": 0.8021, "step": 75315 }, { "epoch": 0.9179432805625632, "grad_norm": 1.8049167394638062, "learning_rate": 4.319435535599744e-07, "loss": 0.8405, "step": 75320 }, { "epoch": 0.9180042167867111, "grad_norm": 1.9581271409988403, "learning_rate": 4.316228351507377e-07, "loss": 0.7864, "step": 75325 }, { "epoch": 0.9180651530108588, "grad_norm": 2.192715883255005, "learning_rate": 4.31302116741501e-07, "loss": 0.8022, "step": 75330 }, { "epoch": 0.9181260892350066, "grad_norm": 2.2288835048675537, "learning_rate": 4.309813983322643e-07, "loss": 0.8118, "step": 75335 }, { "epoch": 0.9181870254591544, "grad_norm": 1.7844728231430054, "learning_rate": 4.306606799230276e-07, "loss": 0.7589, "step": 75340 }, { "epoch": 0.9182479616833022, "grad_norm": 1.6353766918182373, "learning_rate": 4.303399615137909e-07, "loss": 0.7839, "step": 75345 }, { "epoch": 0.9183088979074501, "grad_norm": 2.0422561168670654, "learning_rate": 4.3001924310455424e-07, "loss": 0.8474, "step": 75350 }, { "epoch": 0.9183698341315979, "grad_norm": 1.9613425731658936, "learning_rate": 4.296985246953175e-07, "loss": 0.7909, "step": 75355 }, { "epoch": 0.9184307703557457, "grad_norm": 1.883183479309082, "learning_rate": 4.2937780628608086e-07, "loss": 0.8299, "step": 75360 }, { "epoch": 0.9184917065798934, "grad_norm": 1.7175272703170776, "learning_rate": 4.2905708787684414e-07, "loss": 0.8283, "step": 75365 }, { "epoch": 0.9185526428040413, "grad_norm": 2.0871567726135254, "learning_rate": 4.287363694676075e-07, "loss": 0.8008, "step": 75370 }, { "epoch": 0.9186135790281891, "grad_norm": 2.170318603515625, "learning_rate": 4.2841565105837076e-07, "loss": 0.6997, "step": 75375 }, { "epoch": 0.9186745152523369, "grad_norm": 2.2166759967803955, "learning_rate": 4.280949326491341e-07, "loss": 0.8091, "step": 75380 }, { "epoch": 0.9187354514764847, "grad_norm": 1.7716670036315918, "learning_rate": 4.2777421423989737e-07, "loss": 0.7554, "step": 75385 }, { "epoch": 0.9187963877006325, "grad_norm": 1.792122721672058, "learning_rate": 4.274534958306607e-07, "loss": 0.7875, "step": 75390 }, { "epoch": 0.9188573239247804, "grad_norm": 2.1512415409088135, "learning_rate": 4.27132777421424e-07, "loss": 0.8295, "step": 75395 }, { "epoch": 0.9189182601489281, "grad_norm": 2.1338706016540527, "learning_rate": 4.268120590121874e-07, "loss": 0.7913, "step": 75400 }, { "epoch": 0.9189791963730759, "grad_norm": 1.9994041919708252, "learning_rate": 4.264913406029506e-07, "loss": 0.8497, "step": 75405 }, { "epoch": 0.9190401325972237, "grad_norm": 1.8692423105239868, "learning_rate": 4.26170622193714e-07, "loss": 0.8421, "step": 75410 }, { "epoch": 0.9191010688213715, "grad_norm": 1.7847561836242676, "learning_rate": 4.258499037844772e-07, "loss": 0.8532, "step": 75415 }, { "epoch": 0.9191620050455194, "grad_norm": 2.0562126636505127, "learning_rate": 4.255291853752406e-07, "loss": 0.7947, "step": 75420 }, { "epoch": 0.9192229412696672, "grad_norm": 2.1504688262939453, "learning_rate": 4.2520846696600383e-07, "loss": 0.7495, "step": 75425 }, { "epoch": 0.919283877493815, "grad_norm": 1.9845421314239502, "learning_rate": 4.248877485567672e-07, "loss": 0.7977, "step": 75430 }, { "epoch": 0.9193448137179627, "grad_norm": 2.1777877807617188, "learning_rate": 4.245670301475305e-07, "loss": 0.7724, "step": 75435 }, { "epoch": 0.9194057499421106, "grad_norm": 1.9525787830352783, "learning_rate": 4.2424631173829384e-07, "loss": 0.8253, "step": 75440 }, { "epoch": 0.9194666861662584, "grad_norm": 1.9041523933410645, "learning_rate": 4.239255933290571e-07, "loss": 0.7524, "step": 75445 }, { "epoch": 0.9195276223904062, "grad_norm": 2.431960105895996, "learning_rate": 4.2360487491982045e-07, "loss": 0.7827, "step": 75450 }, { "epoch": 0.919588558614554, "grad_norm": 4.903670310974121, "learning_rate": 4.2328415651058374e-07, "loss": 0.8029, "step": 75455 }, { "epoch": 0.9196494948387018, "grad_norm": 1.902147650718689, "learning_rate": 4.2296343810134707e-07, "loss": 0.7995, "step": 75460 }, { "epoch": 0.9197104310628497, "grad_norm": 2.0772411823272705, "learning_rate": 4.2264271969211035e-07, "loss": 0.7973, "step": 75465 }, { "epoch": 0.9197713672869974, "grad_norm": 1.9977445602416992, "learning_rate": 4.223220012828737e-07, "loss": 0.8518, "step": 75470 }, { "epoch": 0.9198323035111452, "grad_norm": 1.867692470550537, "learning_rate": 4.2200128287363697e-07, "loss": 0.7758, "step": 75475 }, { "epoch": 0.919893239735293, "grad_norm": 2.259798526763916, "learning_rate": 4.216805644644003e-07, "loss": 0.8535, "step": 75480 }, { "epoch": 0.9199541759594408, "grad_norm": 1.8217025995254517, "learning_rate": 4.213598460551636e-07, "loss": 0.7945, "step": 75485 }, { "epoch": 0.9200151121835887, "grad_norm": 1.8358598947525024, "learning_rate": 4.210391276459269e-07, "loss": 0.8767, "step": 75490 }, { "epoch": 0.9200760484077365, "grad_norm": 1.8249812126159668, "learning_rate": 4.207184092366902e-07, "loss": 0.775, "step": 75495 }, { "epoch": 0.9201369846318843, "grad_norm": 2.660485029220581, "learning_rate": 4.2039769082745353e-07, "loss": 0.907, "step": 75500 }, { "epoch": 0.920197920856032, "grad_norm": 1.8674993515014648, "learning_rate": 4.200769724182168e-07, "loss": 0.8156, "step": 75505 }, { "epoch": 0.9202588570801798, "grad_norm": 1.8053479194641113, "learning_rate": 4.1975625400898015e-07, "loss": 0.7877, "step": 75510 }, { "epoch": 0.9203197933043277, "grad_norm": 2.051701784133911, "learning_rate": 4.1943553559974343e-07, "loss": 0.8346, "step": 75515 }, { "epoch": 0.9203807295284755, "grad_norm": 2.025850296020508, "learning_rate": 4.1911481719050676e-07, "loss": 0.8718, "step": 75520 }, { "epoch": 0.9204416657526233, "grad_norm": 2.018336296081543, "learning_rate": 4.1879409878127005e-07, "loss": 0.8223, "step": 75525 }, { "epoch": 0.9205026019767711, "grad_norm": 1.9607921838760376, "learning_rate": 4.184733803720334e-07, "loss": 0.8242, "step": 75530 }, { "epoch": 0.920563538200919, "grad_norm": 1.9887381792068481, "learning_rate": 4.181526619627967e-07, "loss": 0.8602, "step": 75535 }, { "epoch": 0.9206244744250667, "grad_norm": 2.3470277786254883, "learning_rate": 4.1783194355356e-07, "loss": 0.7799, "step": 75540 }, { "epoch": 0.9206854106492145, "grad_norm": 1.8956714868545532, "learning_rate": 4.1751122514432333e-07, "loss": 0.8712, "step": 75545 }, { "epoch": 0.9207463468733623, "grad_norm": 1.7520160675048828, "learning_rate": 4.171905067350866e-07, "loss": 0.8265, "step": 75550 }, { "epoch": 0.9208072830975101, "grad_norm": 2.2912275791168213, "learning_rate": 4.1686978832584995e-07, "loss": 0.777, "step": 75555 }, { "epoch": 0.920868219321658, "grad_norm": 2.5185821056365967, "learning_rate": 4.1654906991661323e-07, "loss": 0.7359, "step": 75560 }, { "epoch": 0.9209291555458058, "grad_norm": 2.1026535034179688, "learning_rate": 4.1622835150737656e-07, "loss": 0.8344, "step": 75565 }, { "epoch": 0.9209900917699536, "grad_norm": 2.023601531982422, "learning_rate": 4.1590763309813984e-07, "loss": 0.8705, "step": 75570 }, { "epoch": 0.9210510279941013, "grad_norm": 2.059497117996216, "learning_rate": 4.1558691468890323e-07, "loss": 0.8401, "step": 75575 }, { "epoch": 0.9211119642182491, "grad_norm": 2.338500499725342, "learning_rate": 4.1526619627966646e-07, "loss": 0.7908, "step": 75580 }, { "epoch": 0.921172900442397, "grad_norm": 2.2211785316467285, "learning_rate": 4.1494547787042985e-07, "loss": 0.8202, "step": 75585 }, { "epoch": 0.9212338366665448, "grad_norm": 1.7847471237182617, "learning_rate": 4.146247594611931e-07, "loss": 0.7417, "step": 75590 }, { "epoch": 0.9212947728906926, "grad_norm": 2.4584968090057373, "learning_rate": 4.1430404105195646e-07, "loss": 0.8472, "step": 75595 }, { "epoch": 0.9213557091148404, "grad_norm": 1.975672721862793, "learning_rate": 4.139833226427197e-07, "loss": 0.7447, "step": 75600 }, { "epoch": 0.9214166453389883, "grad_norm": 1.933070421218872, "learning_rate": 4.136626042334831e-07, "loss": 0.8836, "step": 75605 }, { "epoch": 0.921477581563136, "grad_norm": 1.9432709217071533, "learning_rate": 4.1334188582424636e-07, "loss": 0.7866, "step": 75610 }, { "epoch": 0.9215385177872838, "grad_norm": 2.0686073303222656, "learning_rate": 4.130211674150097e-07, "loss": 0.7687, "step": 75615 }, { "epoch": 0.9215994540114316, "grad_norm": 2.102227210998535, "learning_rate": 4.12700449005773e-07, "loss": 0.826, "step": 75620 }, { "epoch": 0.9216603902355794, "grad_norm": 1.582356333732605, "learning_rate": 4.123797305965363e-07, "loss": 0.8166, "step": 75625 }, { "epoch": 0.9217213264597273, "grad_norm": 4.129176139831543, "learning_rate": 4.120590121872996e-07, "loss": 0.8266, "step": 75630 }, { "epoch": 0.9217822626838751, "grad_norm": 2.3469581604003906, "learning_rate": 4.1173829377806293e-07, "loss": 0.7948, "step": 75635 }, { "epoch": 0.9218431989080229, "grad_norm": 1.8660775423049927, "learning_rate": 4.114175753688262e-07, "loss": 0.7795, "step": 75640 }, { "epoch": 0.9219041351321706, "grad_norm": 1.8329392671585083, "learning_rate": 4.1109685695958954e-07, "loss": 0.7908, "step": 75645 }, { "epoch": 0.9219650713563184, "grad_norm": 2.5221920013427734, "learning_rate": 4.107761385503528e-07, "loss": 0.8085, "step": 75650 }, { "epoch": 0.9220260075804663, "grad_norm": 1.6858900785446167, "learning_rate": 4.1045542014111616e-07, "loss": 0.7483, "step": 75655 }, { "epoch": 0.9220869438046141, "grad_norm": 2.0015673637390137, "learning_rate": 4.1013470173187944e-07, "loss": 0.836, "step": 75660 }, { "epoch": 0.9221478800287619, "grad_norm": 2.004695177078247, "learning_rate": 4.098139833226428e-07, "loss": 0.7302, "step": 75665 }, { "epoch": 0.9222088162529097, "grad_norm": 2.0556142330169678, "learning_rate": 4.0949326491340606e-07, "loss": 0.7636, "step": 75670 }, { "epoch": 0.9222697524770576, "grad_norm": 1.962221384048462, "learning_rate": 4.091725465041694e-07, "loss": 0.7753, "step": 75675 }, { "epoch": 0.9223306887012053, "grad_norm": 2.3534913063049316, "learning_rate": 4.0885182809493267e-07, "loss": 0.8503, "step": 75680 }, { "epoch": 0.9223916249253531, "grad_norm": 2.1240108013153076, "learning_rate": 4.08531109685696e-07, "loss": 0.7793, "step": 75685 }, { "epoch": 0.9224525611495009, "grad_norm": 2.1898975372314453, "learning_rate": 4.082103912764593e-07, "loss": 0.8412, "step": 75690 }, { "epoch": 0.9225134973736487, "grad_norm": 1.7219115495681763, "learning_rate": 4.078896728672226e-07, "loss": 0.7817, "step": 75695 }, { "epoch": 0.9225744335977966, "grad_norm": 2.1625192165374756, "learning_rate": 4.075689544579859e-07, "loss": 0.8654, "step": 75700 }, { "epoch": 0.9226353698219444, "grad_norm": 1.93706476688385, "learning_rate": 4.0724823604874924e-07, "loss": 0.8119, "step": 75705 }, { "epoch": 0.9226963060460922, "grad_norm": 1.9356794357299805, "learning_rate": 4.069275176395125e-07, "loss": 0.8135, "step": 75710 }, { "epoch": 0.9227572422702399, "grad_norm": 2.1461291313171387, "learning_rate": 4.0660679923027585e-07, "loss": 0.781, "step": 75715 }, { "epoch": 0.9228181784943877, "grad_norm": 2.128194808959961, "learning_rate": 4.0628608082103914e-07, "loss": 0.8192, "step": 75720 }, { "epoch": 0.9228791147185356, "grad_norm": 2.2268271446228027, "learning_rate": 4.0596536241180247e-07, "loss": 0.7737, "step": 75725 }, { "epoch": 0.9229400509426834, "grad_norm": 2.0879719257354736, "learning_rate": 4.0564464400256575e-07, "loss": 0.7846, "step": 75730 }, { "epoch": 0.9230009871668312, "grad_norm": 1.938671588897705, "learning_rate": 4.053239255933291e-07, "loss": 0.7726, "step": 75735 }, { "epoch": 0.923061923390979, "grad_norm": 2.2449307441711426, "learning_rate": 4.0500320718409237e-07, "loss": 0.7596, "step": 75740 }, { "epoch": 0.9231228596151269, "grad_norm": 1.9368985891342163, "learning_rate": 4.046824887748557e-07, "loss": 0.7825, "step": 75745 }, { "epoch": 0.9231837958392746, "grad_norm": 1.9037870168685913, "learning_rate": 4.04361770365619e-07, "loss": 0.7822, "step": 75750 }, { "epoch": 0.9232447320634224, "grad_norm": 2.0749619007110596, "learning_rate": 4.040410519563823e-07, "loss": 0.9152, "step": 75755 }, { "epoch": 0.9233056682875702, "grad_norm": 2.051543712615967, "learning_rate": 4.037203335471456e-07, "loss": 0.7718, "step": 75760 }, { "epoch": 0.923366604511718, "grad_norm": 2.4113948345184326, "learning_rate": 4.0339961513790893e-07, "loss": 0.798, "step": 75765 }, { "epoch": 0.9234275407358659, "grad_norm": 2.6832919120788574, "learning_rate": 4.030788967286722e-07, "loss": 0.8339, "step": 75770 }, { "epoch": 0.9234884769600137, "grad_norm": 2.029059648513794, "learning_rate": 4.0275817831943555e-07, "loss": 0.8065, "step": 75775 }, { "epoch": 0.9235494131841615, "grad_norm": 2.0195021629333496, "learning_rate": 4.0243745991019883e-07, "loss": 0.7845, "step": 75780 }, { "epoch": 0.9236103494083092, "grad_norm": 1.7679390907287598, "learning_rate": 4.021167415009622e-07, "loss": 0.8222, "step": 75785 }, { "epoch": 0.923671285632457, "grad_norm": 2.1477255821228027, "learning_rate": 4.0179602309172545e-07, "loss": 0.7735, "step": 75790 }, { "epoch": 0.9237322218566049, "grad_norm": 1.7190474271774292, "learning_rate": 4.0147530468248883e-07, "loss": 0.7929, "step": 75795 }, { "epoch": 0.9237931580807527, "grad_norm": 1.7805153131484985, "learning_rate": 4.0115458627325206e-07, "loss": 0.7856, "step": 75800 }, { "epoch": 0.9238540943049005, "grad_norm": 1.7623273134231567, "learning_rate": 4.0083386786401545e-07, "loss": 0.7642, "step": 75805 }, { "epoch": 0.9239150305290483, "grad_norm": 1.880258321762085, "learning_rate": 4.005131494547787e-07, "loss": 0.844, "step": 75810 }, { "epoch": 0.9239759667531962, "grad_norm": 1.6821908950805664, "learning_rate": 4.0019243104554207e-07, "loss": 0.8257, "step": 75815 }, { "epoch": 0.9240369029773439, "grad_norm": 1.7818355560302734, "learning_rate": 3.9987171263630535e-07, "loss": 0.7878, "step": 75820 }, { "epoch": 0.9240978392014917, "grad_norm": 1.809428095817566, "learning_rate": 3.995509942270687e-07, "loss": 0.7909, "step": 75825 }, { "epoch": 0.9241587754256395, "grad_norm": 2.1609818935394287, "learning_rate": 3.9923027581783196e-07, "loss": 0.828, "step": 75830 }, { "epoch": 0.9242197116497873, "grad_norm": 1.8298755884170532, "learning_rate": 3.989095574085953e-07, "loss": 0.76, "step": 75835 }, { "epoch": 0.9242806478739352, "grad_norm": 1.8705556392669678, "learning_rate": 3.9858883899935863e-07, "loss": 0.7907, "step": 75840 }, { "epoch": 0.924341584098083, "grad_norm": 1.857273817062378, "learning_rate": 3.982681205901219e-07, "loss": 0.74, "step": 75845 }, { "epoch": 0.9244025203222308, "grad_norm": 2.154808759689331, "learning_rate": 3.9794740218088525e-07, "loss": 0.8284, "step": 75850 }, { "epoch": 0.9244634565463785, "grad_norm": 1.8217105865478516, "learning_rate": 3.9762668377164853e-07, "loss": 0.7614, "step": 75855 }, { "epoch": 0.9245243927705263, "grad_norm": 1.7568846940994263, "learning_rate": 3.9730596536241186e-07, "loss": 0.7454, "step": 75860 }, { "epoch": 0.9245853289946742, "grad_norm": 1.9761950969696045, "learning_rate": 3.9698524695317515e-07, "loss": 0.7989, "step": 75865 }, { "epoch": 0.924646265218822, "grad_norm": 2.0679705142974854, "learning_rate": 3.966645285439385e-07, "loss": 0.7188, "step": 75870 }, { "epoch": 0.9247072014429698, "grad_norm": 1.8765885829925537, "learning_rate": 3.9634381013470176e-07, "loss": 0.8297, "step": 75875 }, { "epoch": 0.9247681376671176, "grad_norm": 2.2166173458099365, "learning_rate": 3.960230917254651e-07, "loss": 0.8654, "step": 75880 }, { "epoch": 0.9248290738912655, "grad_norm": 1.6762579679489136, "learning_rate": 3.957023733162284e-07, "loss": 0.7976, "step": 75885 }, { "epoch": 0.9248900101154132, "grad_norm": 1.9466668367385864, "learning_rate": 3.953816549069917e-07, "loss": 0.8598, "step": 75890 }, { "epoch": 0.924950946339561, "grad_norm": 1.8107093572616577, "learning_rate": 3.95060936497755e-07, "loss": 0.8203, "step": 75895 }, { "epoch": 0.9250118825637088, "grad_norm": 1.8944586515426636, "learning_rate": 3.9474021808851833e-07, "loss": 0.7402, "step": 75900 }, { "epoch": 0.9250728187878566, "grad_norm": 1.8698503971099854, "learning_rate": 3.944194996792816e-07, "loss": 0.7402, "step": 75905 }, { "epoch": 0.9251337550120045, "grad_norm": 2.288933753967285, "learning_rate": 3.9409878127004494e-07, "loss": 0.8078, "step": 75910 }, { "epoch": 0.9251946912361523, "grad_norm": 3.0805246829986572, "learning_rate": 3.937780628608082e-07, "loss": 0.8466, "step": 75915 }, { "epoch": 0.9252556274603001, "grad_norm": 1.8668979406356812, "learning_rate": 3.9345734445157156e-07, "loss": 0.8729, "step": 75920 }, { "epoch": 0.9253165636844478, "grad_norm": 2.185427665710449, "learning_rate": 3.9313662604233484e-07, "loss": 0.8164, "step": 75925 }, { "epoch": 0.9253774999085956, "grad_norm": 2.083134412765503, "learning_rate": 3.928159076330982e-07, "loss": 0.8144, "step": 75930 }, { "epoch": 0.9254384361327435, "grad_norm": 2.077396869659424, "learning_rate": 3.9249518922386146e-07, "loss": 0.7839, "step": 75935 }, { "epoch": 0.9254993723568913, "grad_norm": 2.081045627593994, "learning_rate": 3.921744708146248e-07, "loss": 0.8465, "step": 75940 }, { "epoch": 0.9255603085810391, "grad_norm": 2.0488922595977783, "learning_rate": 3.9185375240538807e-07, "loss": 0.8421, "step": 75945 }, { "epoch": 0.9256212448051869, "grad_norm": 2.0333938598632812, "learning_rate": 3.915330339961514e-07, "loss": 0.8222, "step": 75950 }, { "epoch": 0.9256821810293347, "grad_norm": 1.829184889793396, "learning_rate": 3.912123155869147e-07, "loss": 0.7439, "step": 75955 }, { "epoch": 0.9257431172534825, "grad_norm": 1.9208776950836182, "learning_rate": 3.908915971776781e-07, "loss": 0.8164, "step": 75960 }, { "epoch": 0.9258040534776303, "grad_norm": 1.7878445386886597, "learning_rate": 3.905708787684413e-07, "loss": 0.7899, "step": 75965 }, { "epoch": 0.9258649897017781, "grad_norm": 1.8314540386199951, "learning_rate": 3.902501603592047e-07, "loss": 0.7503, "step": 75970 }, { "epoch": 0.9259259259259259, "grad_norm": 2.1207947731018066, "learning_rate": 3.899294419499679e-07, "loss": 0.905, "step": 75975 }, { "epoch": 0.9259868621500738, "grad_norm": 2.0390777587890625, "learning_rate": 3.896087235407313e-07, "loss": 0.7734, "step": 75980 }, { "epoch": 0.9260477983742216, "grad_norm": 1.7134239673614502, "learning_rate": 3.8928800513149454e-07, "loss": 0.8019, "step": 75985 }, { "epoch": 0.9261087345983694, "grad_norm": 2.264366388320923, "learning_rate": 3.889672867222579e-07, "loss": 0.7621, "step": 75990 }, { "epoch": 0.9261696708225171, "grad_norm": 2.3656787872314453, "learning_rate": 3.886465683130212e-07, "loss": 0.7843, "step": 75995 }, { "epoch": 0.9262306070466649, "grad_norm": 1.95164155960083, "learning_rate": 3.8832584990378454e-07, "loss": 0.8354, "step": 76000 }, { "epoch": 0.9262915432708128, "grad_norm": 1.7799205780029297, "learning_rate": 3.880051314945478e-07, "loss": 0.8258, "step": 76005 }, { "epoch": 0.9263524794949606, "grad_norm": 2.070265054702759, "learning_rate": 3.8768441308531116e-07, "loss": 0.7298, "step": 76010 }, { "epoch": 0.9264134157191084, "grad_norm": 2.1119608879089355, "learning_rate": 3.8736369467607444e-07, "loss": 0.8398, "step": 76015 }, { "epoch": 0.9264743519432562, "grad_norm": 2.0738279819488525, "learning_rate": 3.8704297626683777e-07, "loss": 0.8719, "step": 76020 }, { "epoch": 0.926535288167404, "grad_norm": 2.117635726928711, "learning_rate": 3.8672225785760105e-07, "loss": 0.8143, "step": 76025 }, { "epoch": 0.9265962243915518, "grad_norm": 1.9113881587982178, "learning_rate": 3.864015394483644e-07, "loss": 0.9089, "step": 76030 }, { "epoch": 0.9266571606156996, "grad_norm": 2.6182048320770264, "learning_rate": 3.8608082103912767e-07, "loss": 0.8, "step": 76035 }, { "epoch": 0.9267180968398474, "grad_norm": 2.2962307929992676, "learning_rate": 3.85760102629891e-07, "loss": 0.8485, "step": 76040 }, { "epoch": 0.9267790330639952, "grad_norm": 2.073901414871216, "learning_rate": 3.854393842206543e-07, "loss": 0.8675, "step": 76045 }, { "epoch": 0.926839969288143, "grad_norm": 1.8303965330123901, "learning_rate": 3.851186658114176e-07, "loss": 0.9132, "step": 76050 }, { "epoch": 0.9269009055122909, "grad_norm": 2.2295823097229004, "learning_rate": 3.847979474021809e-07, "loss": 0.8271, "step": 76055 }, { "epoch": 0.9269618417364387, "grad_norm": 1.8471240997314453, "learning_rate": 3.8447722899294423e-07, "loss": 0.7602, "step": 76060 }, { "epoch": 0.9270227779605864, "grad_norm": 2.1032376289367676, "learning_rate": 3.841565105837075e-07, "loss": 0.7758, "step": 76065 }, { "epoch": 0.9270837141847342, "grad_norm": 2.0545547008514404, "learning_rate": 3.8383579217447085e-07, "loss": 0.8082, "step": 76070 }, { "epoch": 0.927144650408882, "grad_norm": 2.0499796867370605, "learning_rate": 3.8351507376523413e-07, "loss": 0.8566, "step": 76075 }, { "epoch": 0.9272055866330299, "grad_norm": 2.0434341430664062, "learning_rate": 3.8319435535599747e-07, "loss": 0.8753, "step": 76080 }, { "epoch": 0.9272665228571777, "grad_norm": 1.8219099044799805, "learning_rate": 3.8287363694676075e-07, "loss": 0.865, "step": 76085 }, { "epoch": 0.9273274590813255, "grad_norm": 1.980893850326538, "learning_rate": 3.825529185375241e-07, "loss": 0.8326, "step": 76090 }, { "epoch": 0.9273883953054733, "grad_norm": 1.9445900917053223, "learning_rate": 3.8223220012828736e-07, "loss": 0.7496, "step": 76095 }, { "epoch": 0.9274493315296211, "grad_norm": 1.9499948024749756, "learning_rate": 3.819114817190507e-07, "loss": 0.8501, "step": 76100 }, { "epoch": 0.9275102677537689, "grad_norm": 1.9263464212417603, "learning_rate": 3.81590763309814e-07, "loss": 0.8228, "step": 76105 }, { "epoch": 0.9275712039779167, "grad_norm": 2.1538684368133545, "learning_rate": 3.812700449005773e-07, "loss": 0.7615, "step": 76110 }, { "epoch": 0.9276321402020645, "grad_norm": 2.0192174911499023, "learning_rate": 3.809493264913406e-07, "loss": 0.7752, "step": 76115 }, { "epoch": 0.9276930764262123, "grad_norm": 2.2714109420776367, "learning_rate": 3.8062860808210393e-07, "loss": 0.8479, "step": 76120 }, { "epoch": 0.9277540126503602, "grad_norm": 1.9842674732208252, "learning_rate": 3.803078896728672e-07, "loss": 0.8773, "step": 76125 }, { "epoch": 0.927814948874508, "grad_norm": 2.3211045265197754, "learning_rate": 3.7998717126363055e-07, "loss": 0.7717, "step": 76130 }, { "epoch": 0.9278758850986557, "grad_norm": 2.0200130939483643, "learning_rate": 3.7966645285439393e-07, "loss": 0.7592, "step": 76135 }, { "epoch": 0.9279368213228035, "grad_norm": 2.327225685119629, "learning_rate": 3.7934573444515716e-07, "loss": 0.8122, "step": 76140 }, { "epoch": 0.9279977575469514, "grad_norm": 1.9562941789627075, "learning_rate": 3.7902501603592055e-07, "loss": 0.8042, "step": 76145 }, { "epoch": 0.9280586937710992, "grad_norm": 2.041391611099243, "learning_rate": 3.787042976266838e-07, "loss": 0.8213, "step": 76150 }, { "epoch": 0.928119629995247, "grad_norm": 1.9171152114868164, "learning_rate": 3.7838357921744716e-07, "loss": 0.8703, "step": 76155 }, { "epoch": 0.9281805662193948, "grad_norm": 1.706559181213379, "learning_rate": 3.780628608082104e-07, "loss": 0.7766, "step": 76160 }, { "epoch": 0.9282415024435426, "grad_norm": 2.743499279022217, "learning_rate": 3.777421423989738e-07, "loss": 0.7897, "step": 76165 }, { "epoch": 0.9283024386676904, "grad_norm": 1.9052845239639282, "learning_rate": 3.7742142398973706e-07, "loss": 0.7898, "step": 76170 }, { "epoch": 0.9283633748918382, "grad_norm": 1.8156760931015015, "learning_rate": 3.771007055805004e-07, "loss": 0.8743, "step": 76175 }, { "epoch": 0.928424311115986, "grad_norm": 2.1547164916992188, "learning_rate": 3.767799871712637e-07, "loss": 0.802, "step": 76180 }, { "epoch": 0.9284852473401338, "grad_norm": 2.2747747898101807, "learning_rate": 3.76459268762027e-07, "loss": 0.8457, "step": 76185 }, { "epoch": 0.9285461835642816, "grad_norm": 1.8391488790512085, "learning_rate": 3.761385503527903e-07, "loss": 0.7674, "step": 76190 }, { "epoch": 0.9286071197884295, "grad_norm": 2.0385239124298096, "learning_rate": 3.7581783194355363e-07, "loss": 0.7821, "step": 76195 }, { "epoch": 0.9286680560125773, "grad_norm": 1.918574333190918, "learning_rate": 3.754971135343169e-07, "loss": 0.7623, "step": 76200 }, { "epoch": 0.928728992236725, "grad_norm": 1.68276047706604, "learning_rate": 3.7517639512508024e-07, "loss": 0.7623, "step": 76205 }, { "epoch": 0.9287899284608728, "grad_norm": 1.8158296346664429, "learning_rate": 3.748556767158435e-07, "loss": 0.828, "step": 76210 }, { "epoch": 0.9288508646850206, "grad_norm": 2.1237480640411377, "learning_rate": 3.7453495830660686e-07, "loss": 0.6973, "step": 76215 }, { "epoch": 0.9289118009091685, "grad_norm": 1.9677600860595703, "learning_rate": 3.7421423989737014e-07, "loss": 0.7156, "step": 76220 }, { "epoch": 0.9289727371333163, "grad_norm": 2.0975730419158936, "learning_rate": 3.738935214881335e-07, "loss": 0.7707, "step": 76225 }, { "epoch": 0.9290336733574641, "grad_norm": 1.9434524774551392, "learning_rate": 3.7357280307889676e-07, "loss": 0.7617, "step": 76230 }, { "epoch": 0.9290946095816118, "grad_norm": 1.8279834985733032, "learning_rate": 3.732520846696601e-07, "loss": 0.7963, "step": 76235 }, { "epoch": 0.9291555458057597, "grad_norm": 2.2164885997772217, "learning_rate": 3.729313662604234e-07, "loss": 0.765, "step": 76240 }, { "epoch": 0.9292164820299075, "grad_norm": 1.942838430404663, "learning_rate": 3.726106478511867e-07, "loss": 0.8677, "step": 76245 }, { "epoch": 0.9292774182540553, "grad_norm": 1.8746031522750854, "learning_rate": 3.7228992944195e-07, "loss": 0.7694, "step": 76250 }, { "epoch": 0.9293383544782031, "grad_norm": 2.115624189376831, "learning_rate": 3.719692110327133e-07, "loss": 0.7918, "step": 76255 }, { "epoch": 0.9293992907023509, "grad_norm": 1.8197358846664429, "learning_rate": 3.716484926234766e-07, "loss": 0.8181, "step": 76260 }, { "epoch": 0.9294602269264988, "grad_norm": 1.9327795505523682, "learning_rate": 3.7132777421423994e-07, "loss": 0.7986, "step": 76265 }, { "epoch": 0.9295211631506465, "grad_norm": 1.9810773134231567, "learning_rate": 3.710070558050032e-07, "loss": 0.8024, "step": 76270 }, { "epoch": 0.9295820993747943, "grad_norm": 1.9922844171524048, "learning_rate": 3.7068633739576656e-07, "loss": 0.8219, "step": 76275 }, { "epoch": 0.9296430355989421, "grad_norm": 2.0378284454345703, "learning_rate": 3.7036561898652984e-07, "loss": 0.8104, "step": 76280 }, { "epoch": 0.92970397182309, "grad_norm": 2.076444625854492, "learning_rate": 3.7004490057729317e-07, "loss": 0.8315, "step": 76285 }, { "epoch": 0.9297649080472378, "grad_norm": 1.8713396787643433, "learning_rate": 3.6972418216805645e-07, "loss": 0.7951, "step": 76290 }, { "epoch": 0.9298258442713856, "grad_norm": 1.8837546110153198, "learning_rate": 3.694034637588198e-07, "loss": 0.8125, "step": 76295 }, { "epoch": 0.9298867804955334, "grad_norm": 1.9050025939941406, "learning_rate": 3.6908274534958307e-07, "loss": 0.8016, "step": 76300 }, { "epoch": 0.9299477167196811, "grad_norm": 1.720715045928955, "learning_rate": 3.687620269403464e-07, "loss": 0.7858, "step": 76305 }, { "epoch": 0.930008652943829, "grad_norm": 1.848158359527588, "learning_rate": 3.684413085311097e-07, "loss": 0.8189, "step": 76310 }, { "epoch": 0.9300695891679768, "grad_norm": 2.573288917541504, "learning_rate": 3.68120590121873e-07, "loss": 0.746, "step": 76315 }, { "epoch": 0.9301305253921246, "grad_norm": 1.9941620826721191, "learning_rate": 3.677998717126363e-07, "loss": 0.854, "step": 76320 }, { "epoch": 0.9301914616162724, "grad_norm": 1.948179841041565, "learning_rate": 3.6747915330339964e-07, "loss": 0.8451, "step": 76325 }, { "epoch": 0.9302523978404202, "grad_norm": 1.714469313621521, "learning_rate": 3.671584348941629e-07, "loss": 0.8536, "step": 76330 }, { "epoch": 0.9303133340645681, "grad_norm": 2.101834774017334, "learning_rate": 3.6683771648492625e-07, "loss": 0.8776, "step": 76335 }, { "epoch": 0.9303742702887158, "grad_norm": 2.1582024097442627, "learning_rate": 3.6651699807568953e-07, "loss": 0.7598, "step": 76340 }, { "epoch": 0.9304352065128636, "grad_norm": 2.1603431701660156, "learning_rate": 3.661962796664529e-07, "loss": 0.8314, "step": 76345 }, { "epoch": 0.9304961427370114, "grad_norm": 1.9252216815948486, "learning_rate": 3.6587556125721615e-07, "loss": 0.8671, "step": 76350 }, { "epoch": 0.9305570789611592, "grad_norm": 2.4085612297058105, "learning_rate": 3.6555484284797954e-07, "loss": 0.7681, "step": 76355 }, { "epoch": 0.9306180151853071, "grad_norm": 1.8674218654632568, "learning_rate": 3.6523412443874276e-07, "loss": 0.7807, "step": 76360 }, { "epoch": 0.9306789514094549, "grad_norm": 1.796199917793274, "learning_rate": 3.6491340602950615e-07, "loss": 0.7926, "step": 76365 }, { "epoch": 0.9307398876336027, "grad_norm": 1.8180699348449707, "learning_rate": 3.645926876202694e-07, "loss": 0.8186, "step": 76370 }, { "epoch": 0.9308008238577504, "grad_norm": 1.8472033739089966, "learning_rate": 3.6427196921103277e-07, "loss": 0.823, "step": 76375 }, { "epoch": 0.9308617600818982, "grad_norm": 2.1198787689208984, "learning_rate": 3.6395125080179605e-07, "loss": 0.8533, "step": 76380 }, { "epoch": 0.9309226963060461, "grad_norm": 2.6123712062835693, "learning_rate": 3.636305323925594e-07, "loss": 0.82, "step": 76385 }, { "epoch": 0.9309836325301939, "grad_norm": 2.3067715167999268, "learning_rate": 3.6330981398332266e-07, "loss": 0.8094, "step": 76390 }, { "epoch": 0.9310445687543417, "grad_norm": 1.9373350143432617, "learning_rate": 3.62989095574086e-07, "loss": 0.8003, "step": 76395 }, { "epoch": 0.9311055049784895, "grad_norm": 1.622504472732544, "learning_rate": 3.626683771648493e-07, "loss": 0.7719, "step": 76400 }, { "epoch": 0.9311664412026374, "grad_norm": 2.122999668121338, "learning_rate": 3.623476587556126e-07, "loss": 0.8294, "step": 76405 }, { "epoch": 0.9312273774267851, "grad_norm": 1.9987883567810059, "learning_rate": 3.620269403463759e-07, "loss": 0.7701, "step": 76410 }, { "epoch": 0.9312883136509329, "grad_norm": 1.957457184791565, "learning_rate": 3.6170622193713923e-07, "loss": 0.8092, "step": 76415 }, { "epoch": 0.9313492498750807, "grad_norm": 1.858047604560852, "learning_rate": 3.613855035279025e-07, "loss": 0.8453, "step": 76420 }, { "epoch": 0.9314101860992285, "grad_norm": 2.250826358795166, "learning_rate": 3.6106478511866585e-07, "loss": 0.7835, "step": 76425 }, { "epoch": 0.9314711223233764, "grad_norm": 2.0849621295928955, "learning_rate": 3.6074406670942913e-07, "loss": 0.8155, "step": 76430 }, { "epoch": 0.9315320585475242, "grad_norm": 1.78413724899292, "learning_rate": 3.6042334830019246e-07, "loss": 0.8108, "step": 76435 }, { "epoch": 0.931592994771672, "grad_norm": 2.3586769104003906, "learning_rate": 3.601026298909558e-07, "loss": 0.7912, "step": 76440 }, { "epoch": 0.9316539309958197, "grad_norm": 2.0314173698425293, "learning_rate": 3.597819114817191e-07, "loss": 0.8236, "step": 76445 }, { "epoch": 0.9317148672199675, "grad_norm": 2.0552260875701904, "learning_rate": 3.594611930724824e-07, "loss": 0.7941, "step": 76450 }, { "epoch": 0.9317758034441154, "grad_norm": 1.8893556594848633, "learning_rate": 3.591404746632457e-07, "loss": 0.813, "step": 76455 }, { "epoch": 0.9318367396682632, "grad_norm": 1.8695968389511108, "learning_rate": 3.5881975625400903e-07, "loss": 0.7467, "step": 76460 }, { "epoch": 0.931897675892411, "grad_norm": 1.8688517808914185, "learning_rate": 3.584990378447723e-07, "loss": 0.803, "step": 76465 }, { "epoch": 0.9319586121165588, "grad_norm": 1.869376301765442, "learning_rate": 3.5817831943553564e-07, "loss": 0.7759, "step": 76470 }, { "epoch": 0.9320195483407067, "grad_norm": 1.8454231023788452, "learning_rate": 3.578576010262989e-07, "loss": 0.7671, "step": 76475 }, { "epoch": 0.9320804845648544, "grad_norm": 3.58280348777771, "learning_rate": 3.5753688261706226e-07, "loss": 0.8157, "step": 76480 }, { "epoch": 0.9321414207890022, "grad_norm": 1.7920135259628296, "learning_rate": 3.5721616420782554e-07, "loss": 0.8193, "step": 76485 }, { "epoch": 0.93220235701315, "grad_norm": 1.6898889541625977, "learning_rate": 3.568954457985889e-07, "loss": 0.8394, "step": 76490 }, { "epoch": 0.9322632932372978, "grad_norm": 2.0949783325195312, "learning_rate": 3.5657472738935216e-07, "loss": 0.7282, "step": 76495 }, { "epoch": 0.9323242294614457, "grad_norm": 1.7587491273880005, "learning_rate": 3.562540089801155e-07, "loss": 0.7835, "step": 76500 }, { "epoch": 0.9323851656855935, "grad_norm": 1.8870078325271606, "learning_rate": 3.559332905708788e-07, "loss": 0.8516, "step": 76505 }, { "epoch": 0.9324461019097413, "grad_norm": 2.338759660720825, "learning_rate": 3.556125721616421e-07, "loss": 0.8492, "step": 76510 }, { "epoch": 0.932507038133889, "grad_norm": 2.0743610858917236, "learning_rate": 3.552918537524054e-07, "loss": 0.85, "step": 76515 }, { "epoch": 0.9325679743580368, "grad_norm": 2.00608229637146, "learning_rate": 3.549711353431688e-07, "loss": 0.8029, "step": 76520 }, { "epoch": 0.9326289105821847, "grad_norm": 2.123915195465088, "learning_rate": 3.54650416933932e-07, "loss": 0.7764, "step": 76525 }, { "epoch": 0.9326898468063325, "grad_norm": 2.168490409851074, "learning_rate": 3.543296985246954e-07, "loss": 0.8406, "step": 76530 }, { "epoch": 0.9327507830304803, "grad_norm": 2.474323034286499, "learning_rate": 3.540089801154586e-07, "loss": 0.7696, "step": 76535 }, { "epoch": 0.9328117192546281, "grad_norm": 2.3400485515594482, "learning_rate": 3.53688261706222e-07, "loss": 0.8097, "step": 76540 }, { "epoch": 0.932872655478776, "grad_norm": 2.31811261177063, "learning_rate": 3.5336754329698524e-07, "loss": 0.784, "step": 76545 }, { "epoch": 0.9329335917029237, "grad_norm": 2.2100958824157715, "learning_rate": 3.530468248877486e-07, "loss": 0.8105, "step": 76550 }, { "epoch": 0.9329945279270715, "grad_norm": 2.1517257690429688, "learning_rate": 3.527261064785119e-07, "loss": 0.8038, "step": 76555 }, { "epoch": 0.9330554641512193, "grad_norm": 2.0352776050567627, "learning_rate": 3.5240538806927524e-07, "loss": 0.9211, "step": 76560 }, { "epoch": 0.9331164003753671, "grad_norm": 1.9736031293869019, "learning_rate": 3.520846696600385e-07, "loss": 0.8648, "step": 76565 }, { "epoch": 0.933177336599515, "grad_norm": 1.9542863368988037, "learning_rate": 3.5176395125080186e-07, "loss": 0.8123, "step": 76570 }, { "epoch": 0.9332382728236628, "grad_norm": 2.0972869396209717, "learning_rate": 3.5144323284156514e-07, "loss": 0.8185, "step": 76575 }, { "epoch": 0.9332992090478106, "grad_norm": 2.0572450160980225, "learning_rate": 3.5112251443232847e-07, "loss": 0.8478, "step": 76580 }, { "epoch": 0.9333601452719583, "grad_norm": 1.8041324615478516, "learning_rate": 3.5080179602309175e-07, "loss": 0.8067, "step": 76585 }, { "epoch": 0.9334210814961061, "grad_norm": 2.0305135250091553, "learning_rate": 3.504810776138551e-07, "loss": 0.8351, "step": 76590 }, { "epoch": 0.933482017720254, "grad_norm": 1.884069561958313, "learning_rate": 3.5016035920461837e-07, "loss": 0.8545, "step": 76595 }, { "epoch": 0.9335429539444018, "grad_norm": 1.8165966272354126, "learning_rate": 3.498396407953817e-07, "loss": 0.802, "step": 76600 }, { "epoch": 0.9336038901685496, "grad_norm": 1.7399011850357056, "learning_rate": 3.49518922386145e-07, "loss": 0.7796, "step": 76605 }, { "epoch": 0.9336648263926974, "grad_norm": 1.973567247390747, "learning_rate": 3.491982039769083e-07, "loss": 0.8137, "step": 76610 }, { "epoch": 0.9337257626168453, "grad_norm": 2.0227773189544678, "learning_rate": 3.488774855676716e-07, "loss": 0.7421, "step": 76615 }, { "epoch": 0.933786698840993, "grad_norm": 2.321450710296631, "learning_rate": 3.4855676715843494e-07, "loss": 0.7729, "step": 76620 }, { "epoch": 0.9338476350651408, "grad_norm": 1.7374155521392822, "learning_rate": 3.482360487491982e-07, "loss": 0.8111, "step": 76625 }, { "epoch": 0.9339085712892886, "grad_norm": 1.9441149234771729, "learning_rate": 3.4791533033996155e-07, "loss": 0.8941, "step": 76630 }, { "epoch": 0.9339695075134364, "grad_norm": 2.2048757076263428, "learning_rate": 3.4759461193072483e-07, "loss": 0.8018, "step": 76635 }, { "epoch": 0.9340304437375843, "grad_norm": 2.157435655593872, "learning_rate": 3.4727389352148817e-07, "loss": 0.7442, "step": 76640 }, { "epoch": 0.9340913799617321, "grad_norm": 1.7950681447982788, "learning_rate": 3.4695317511225145e-07, "loss": 0.7521, "step": 76645 }, { "epoch": 0.9341523161858799, "grad_norm": 2.086099624633789, "learning_rate": 3.466324567030148e-07, "loss": 0.7824, "step": 76650 }, { "epoch": 0.9342132524100276, "grad_norm": 2.0322160720825195, "learning_rate": 3.4631173829377807e-07, "loss": 0.8338, "step": 76655 }, { "epoch": 0.9342741886341754, "grad_norm": 2.0128746032714844, "learning_rate": 3.459910198845414e-07, "loss": 0.8466, "step": 76660 }, { "epoch": 0.9343351248583233, "grad_norm": 2.1336073875427246, "learning_rate": 3.456703014753047e-07, "loss": 0.8506, "step": 76665 }, { "epoch": 0.9343960610824711, "grad_norm": 2.303799867630005, "learning_rate": 3.45349583066068e-07, "loss": 0.8528, "step": 76670 }, { "epoch": 0.9344569973066189, "grad_norm": 1.7770406007766724, "learning_rate": 3.450288646568313e-07, "loss": 0.9074, "step": 76675 }, { "epoch": 0.9345179335307667, "grad_norm": 2.2477540969848633, "learning_rate": 3.4470814624759463e-07, "loss": 0.7853, "step": 76680 }, { "epoch": 0.9345788697549146, "grad_norm": 2.223357915878296, "learning_rate": 3.443874278383579e-07, "loss": 0.8578, "step": 76685 }, { "epoch": 0.9346398059790623, "grad_norm": 1.75985586643219, "learning_rate": 3.4406670942912125e-07, "loss": 0.8231, "step": 76690 }, { "epoch": 0.9347007422032101, "grad_norm": 2.262983560562134, "learning_rate": 3.4374599101988453e-07, "loss": 0.8059, "step": 76695 }, { "epoch": 0.9347616784273579, "grad_norm": 2.038072109222412, "learning_rate": 3.4342527261064786e-07, "loss": 0.803, "step": 76700 }, { "epoch": 0.9348226146515057, "grad_norm": 1.9768611192703247, "learning_rate": 3.4310455420141114e-07, "loss": 0.8175, "step": 76705 }, { "epoch": 0.9348835508756536, "grad_norm": 1.9272955656051636, "learning_rate": 3.427838357921745e-07, "loss": 0.8225, "step": 76710 }, { "epoch": 0.9349444870998014, "grad_norm": 2.578519105911255, "learning_rate": 3.4246311738293776e-07, "loss": 0.7946, "step": 76715 }, { "epoch": 0.9350054233239492, "grad_norm": 1.7890915870666504, "learning_rate": 3.421423989737011e-07, "loss": 0.8554, "step": 76720 }, { "epoch": 0.9350663595480969, "grad_norm": 1.897242546081543, "learning_rate": 3.418216805644644e-07, "loss": 0.7516, "step": 76725 }, { "epoch": 0.9351272957722447, "grad_norm": 2.2698440551757812, "learning_rate": 3.4150096215522776e-07, "loss": 0.7977, "step": 76730 }, { "epoch": 0.9351882319963926, "grad_norm": 1.9822903871536255, "learning_rate": 3.41180243745991e-07, "loss": 0.7525, "step": 76735 }, { "epoch": 0.9352491682205404, "grad_norm": 1.6179053783416748, "learning_rate": 3.408595253367544e-07, "loss": 0.8482, "step": 76740 }, { "epoch": 0.9353101044446882, "grad_norm": 2.1355299949645996, "learning_rate": 3.405388069275177e-07, "loss": 0.7208, "step": 76745 }, { "epoch": 0.935371040668836, "grad_norm": 1.9210532903671265, "learning_rate": 3.40218088518281e-07, "loss": 0.7714, "step": 76750 }, { "epoch": 0.9354319768929839, "grad_norm": 1.8752355575561523, "learning_rate": 3.3989737010904433e-07, "loss": 0.8244, "step": 76755 }, { "epoch": 0.9354929131171316, "grad_norm": 1.915229082107544, "learning_rate": 3.395766516998076e-07, "loss": 0.7639, "step": 76760 }, { "epoch": 0.9355538493412794, "grad_norm": 1.8836199045181274, "learning_rate": 3.3925593329057095e-07, "loss": 0.8727, "step": 76765 }, { "epoch": 0.9356147855654272, "grad_norm": 2.0868895053863525, "learning_rate": 3.3893521488133423e-07, "loss": 0.8343, "step": 76770 }, { "epoch": 0.935675721789575, "grad_norm": 1.7176021337509155, "learning_rate": 3.3861449647209756e-07, "loss": 0.7848, "step": 76775 }, { "epoch": 0.9357366580137229, "grad_norm": 2.0164480209350586, "learning_rate": 3.3829377806286084e-07, "loss": 0.801, "step": 76780 }, { "epoch": 0.9357975942378707, "grad_norm": 1.7555829286575317, "learning_rate": 3.379730596536242e-07, "loss": 0.7976, "step": 76785 }, { "epoch": 0.9358585304620185, "grad_norm": 2.115844249725342, "learning_rate": 3.3765234124438746e-07, "loss": 0.8106, "step": 76790 }, { "epoch": 0.9359194666861662, "grad_norm": 2.128567934036255, "learning_rate": 3.373316228351508e-07, "loss": 0.8182, "step": 76795 }, { "epoch": 0.935980402910314, "grad_norm": 1.9283857345581055, "learning_rate": 3.370109044259141e-07, "loss": 0.8362, "step": 76800 }, { "epoch": 0.9360413391344619, "grad_norm": 1.994149088859558, "learning_rate": 3.366901860166774e-07, "loss": 0.8573, "step": 76805 }, { "epoch": 0.9361022753586097, "grad_norm": 2.087312698364258, "learning_rate": 3.363694676074407e-07, "loss": 0.7655, "step": 76810 }, { "epoch": 0.9361632115827575, "grad_norm": 2.012537956237793, "learning_rate": 3.36048749198204e-07, "loss": 0.8518, "step": 76815 }, { "epoch": 0.9362241478069053, "grad_norm": 2.118044137954712, "learning_rate": 3.357280307889673e-07, "loss": 0.8588, "step": 76820 }, { "epoch": 0.9362850840310531, "grad_norm": 2.289340019226074, "learning_rate": 3.3540731237973064e-07, "loss": 0.7567, "step": 76825 }, { "epoch": 0.9363460202552009, "grad_norm": 1.9568034410476685, "learning_rate": 3.350865939704939e-07, "loss": 0.7665, "step": 76830 }, { "epoch": 0.9364069564793487, "grad_norm": 2.134929656982422, "learning_rate": 3.3476587556125726e-07, "loss": 0.7653, "step": 76835 }, { "epoch": 0.9364678927034965, "grad_norm": 1.8589625358581543, "learning_rate": 3.3444515715202054e-07, "loss": 0.8347, "step": 76840 }, { "epoch": 0.9365288289276443, "grad_norm": 2.1335551738739014, "learning_rate": 3.3412443874278387e-07, "loss": 0.722, "step": 76845 }, { "epoch": 0.9365897651517922, "grad_norm": 2.2270805835723877, "learning_rate": 3.3380372033354715e-07, "loss": 0.7336, "step": 76850 }, { "epoch": 0.93665070137594, "grad_norm": 1.791306495666504, "learning_rate": 3.334830019243105e-07, "loss": 0.8562, "step": 76855 }, { "epoch": 0.9367116376000878, "grad_norm": 1.9053876399993896, "learning_rate": 3.3316228351507377e-07, "loss": 0.8724, "step": 76860 }, { "epoch": 0.9367725738242355, "grad_norm": 1.818352460861206, "learning_rate": 3.328415651058371e-07, "loss": 0.7856, "step": 76865 }, { "epoch": 0.9368335100483833, "grad_norm": 2.0472402572631836, "learning_rate": 3.325208466966004e-07, "loss": 0.7267, "step": 76870 }, { "epoch": 0.9368944462725312, "grad_norm": 2.3473124504089355, "learning_rate": 3.322001282873637e-07, "loss": 0.7687, "step": 76875 }, { "epoch": 0.936955382496679, "grad_norm": 1.919815182685852, "learning_rate": 3.31879409878127e-07, "loss": 0.846, "step": 76880 }, { "epoch": 0.9370163187208268, "grad_norm": 1.8493659496307373, "learning_rate": 3.3155869146889034e-07, "loss": 0.8364, "step": 76885 }, { "epoch": 0.9370772549449746, "grad_norm": 2.2632439136505127, "learning_rate": 3.312379730596536e-07, "loss": 0.8648, "step": 76890 }, { "epoch": 0.9371381911691224, "grad_norm": 2.6950266361236572, "learning_rate": 3.3091725465041695e-07, "loss": 0.8044, "step": 76895 }, { "epoch": 0.9371991273932702, "grad_norm": 1.7115387916564941, "learning_rate": 3.3059653624118023e-07, "loss": 0.7973, "step": 76900 }, { "epoch": 0.937260063617418, "grad_norm": 1.898699164390564, "learning_rate": 3.302758178319436e-07, "loss": 0.7812, "step": 76905 }, { "epoch": 0.9373209998415658, "grad_norm": 2.3291287422180176, "learning_rate": 3.2995509942270685e-07, "loss": 0.835, "step": 76910 }, { "epoch": 0.9373819360657136, "grad_norm": 2.0918350219726562, "learning_rate": 3.2963438101347024e-07, "loss": 0.9141, "step": 76915 }, { "epoch": 0.9374428722898615, "grad_norm": 1.5570822954177856, "learning_rate": 3.2931366260423347e-07, "loss": 0.7653, "step": 76920 }, { "epoch": 0.9375038085140093, "grad_norm": 2.1910388469696045, "learning_rate": 3.2899294419499685e-07, "loss": 0.7363, "step": 76925 }, { "epoch": 0.9375647447381571, "grad_norm": 1.7728077173233032, "learning_rate": 3.286722257857601e-07, "loss": 0.8354, "step": 76930 }, { "epoch": 0.9376256809623048, "grad_norm": 2.0693416595458984, "learning_rate": 3.2835150737652347e-07, "loss": 0.8088, "step": 76935 }, { "epoch": 0.9376866171864526, "grad_norm": 2.091965913772583, "learning_rate": 3.2803078896728675e-07, "loss": 0.7897, "step": 76940 }, { "epoch": 0.9377475534106005, "grad_norm": 1.9803519248962402, "learning_rate": 3.277100705580501e-07, "loss": 0.8408, "step": 76945 }, { "epoch": 0.9378084896347483, "grad_norm": 2.236649513244629, "learning_rate": 3.2738935214881337e-07, "loss": 0.7978, "step": 76950 }, { "epoch": 0.9378694258588961, "grad_norm": 2.486215829849243, "learning_rate": 3.270686337395767e-07, "loss": 0.7478, "step": 76955 }, { "epoch": 0.9379303620830439, "grad_norm": 1.8784765005111694, "learning_rate": 3.2674791533034e-07, "loss": 0.7757, "step": 76960 }, { "epoch": 0.9379912983071917, "grad_norm": 2.155109405517578, "learning_rate": 3.264271969211033e-07, "loss": 0.8343, "step": 76965 }, { "epoch": 0.9380522345313395, "grad_norm": 2.1924641132354736, "learning_rate": 3.261064785118666e-07, "loss": 0.8543, "step": 76970 }, { "epoch": 0.9381131707554873, "grad_norm": 1.825487494468689, "learning_rate": 3.2578576010262993e-07, "loss": 0.882, "step": 76975 }, { "epoch": 0.9381741069796351, "grad_norm": 2.337129831314087, "learning_rate": 3.254650416933932e-07, "loss": 0.8743, "step": 76980 }, { "epoch": 0.9382350432037829, "grad_norm": 2.237912178039551, "learning_rate": 3.2514432328415655e-07, "loss": 0.8644, "step": 76985 }, { "epoch": 0.9382959794279307, "grad_norm": 1.9720323085784912, "learning_rate": 3.2482360487491983e-07, "loss": 0.7445, "step": 76990 }, { "epoch": 0.9383569156520786, "grad_norm": 1.854103684425354, "learning_rate": 3.2450288646568316e-07, "loss": 0.7402, "step": 76995 }, { "epoch": 0.9384178518762264, "grad_norm": 2.0087392330169678, "learning_rate": 3.2418216805644645e-07, "loss": 0.8439, "step": 77000 }, { "epoch": 0.9384787881003741, "grad_norm": 1.9849811792373657, "learning_rate": 3.238614496472098e-07, "loss": 0.7638, "step": 77005 }, { "epoch": 0.9385397243245219, "grad_norm": 2.020155191421509, "learning_rate": 3.2354073123797306e-07, "loss": 0.7735, "step": 77010 }, { "epoch": 0.9386006605486698, "grad_norm": 1.7392674684524536, "learning_rate": 3.232200128287364e-07, "loss": 0.7924, "step": 77015 }, { "epoch": 0.9386615967728176, "grad_norm": 1.8604192733764648, "learning_rate": 3.228992944194997e-07, "loss": 0.7684, "step": 77020 }, { "epoch": 0.9387225329969654, "grad_norm": 1.765476942062378, "learning_rate": 3.22578576010263e-07, "loss": 0.7806, "step": 77025 }, { "epoch": 0.9387834692211132, "grad_norm": 2.075681209564209, "learning_rate": 3.222578576010263e-07, "loss": 0.9178, "step": 77030 }, { "epoch": 0.938844405445261, "grad_norm": 1.743540644645691, "learning_rate": 3.2193713919178963e-07, "loss": 0.7693, "step": 77035 }, { "epoch": 0.9389053416694088, "grad_norm": 2.0215957164764404, "learning_rate": 3.2161642078255296e-07, "loss": 0.8155, "step": 77040 }, { "epoch": 0.9389662778935566, "grad_norm": 1.9791184663772583, "learning_rate": 3.2129570237331624e-07, "loss": 0.7792, "step": 77045 }, { "epoch": 0.9390272141177044, "grad_norm": 1.7730579376220703, "learning_rate": 3.209749839640796e-07, "loss": 0.7583, "step": 77050 }, { "epoch": 0.9390881503418522, "grad_norm": 1.646810531616211, "learning_rate": 3.2065426555484286e-07, "loss": 0.7674, "step": 77055 }, { "epoch": 0.939149086566, "grad_norm": 2.0839076042175293, "learning_rate": 3.203335471456062e-07, "loss": 0.8151, "step": 77060 }, { "epoch": 0.9392100227901479, "grad_norm": 2.1030545234680176, "learning_rate": 3.200128287363695e-07, "loss": 0.8148, "step": 77065 }, { "epoch": 0.9392709590142957, "grad_norm": 1.900844693183899, "learning_rate": 3.196921103271328e-07, "loss": 0.7521, "step": 77070 }, { "epoch": 0.9393318952384434, "grad_norm": 2.1233599185943604, "learning_rate": 3.193713919178961e-07, "loss": 0.8415, "step": 77075 }, { "epoch": 0.9393928314625912, "grad_norm": 2.574089288711548, "learning_rate": 3.190506735086595e-07, "loss": 0.7621, "step": 77080 }, { "epoch": 0.939453767686739, "grad_norm": 2.0300281047821045, "learning_rate": 3.187299550994227e-07, "loss": 0.8312, "step": 77085 }, { "epoch": 0.9395147039108869, "grad_norm": 1.8092747926712036, "learning_rate": 3.184092366901861e-07, "loss": 0.8935, "step": 77090 }, { "epoch": 0.9395756401350347, "grad_norm": 1.8093963861465454, "learning_rate": 3.180885182809493e-07, "loss": 0.8511, "step": 77095 }, { "epoch": 0.9396365763591825, "grad_norm": 1.865286111831665, "learning_rate": 3.177677998717127e-07, "loss": 0.7697, "step": 77100 }, { "epoch": 0.9396975125833303, "grad_norm": 2.0073013305664062, "learning_rate": 3.1744708146247594e-07, "loss": 0.8563, "step": 77105 }, { "epoch": 0.939758448807478, "grad_norm": 2.03127121925354, "learning_rate": 3.171263630532393e-07, "loss": 0.83, "step": 77110 }, { "epoch": 0.9398193850316259, "grad_norm": 7.5044989585876465, "learning_rate": 3.168056446440026e-07, "loss": 0.7677, "step": 77115 }, { "epoch": 0.9398803212557737, "grad_norm": 1.9412392377853394, "learning_rate": 3.1648492623476594e-07, "loss": 0.781, "step": 77120 }, { "epoch": 0.9399412574799215, "grad_norm": 1.6648249626159668, "learning_rate": 3.161642078255292e-07, "loss": 0.8475, "step": 77125 }, { "epoch": 0.9400021937040693, "grad_norm": 1.8017500638961792, "learning_rate": 3.1584348941629256e-07, "loss": 0.8016, "step": 77130 }, { "epoch": 0.9400631299282172, "grad_norm": 1.9841653108596802, "learning_rate": 3.1552277100705584e-07, "loss": 0.8613, "step": 77135 }, { "epoch": 0.940124066152365, "grad_norm": 1.9749648571014404, "learning_rate": 3.152020525978192e-07, "loss": 0.7588, "step": 77140 }, { "epoch": 0.9401850023765127, "grad_norm": 2.0814826488494873, "learning_rate": 3.1488133418858246e-07, "loss": 0.8546, "step": 77145 }, { "epoch": 0.9402459386006605, "grad_norm": 2.1939218044281006, "learning_rate": 3.145606157793458e-07, "loss": 0.8203, "step": 77150 }, { "epoch": 0.9403068748248083, "grad_norm": 1.9727203845977783, "learning_rate": 3.1423989737010907e-07, "loss": 0.7729, "step": 77155 }, { "epoch": 0.9403678110489562, "grad_norm": 2.1792354583740234, "learning_rate": 3.139191789608724e-07, "loss": 0.9516, "step": 77160 }, { "epoch": 0.940428747273104, "grad_norm": 1.824231505393982, "learning_rate": 3.135984605516357e-07, "loss": 0.8937, "step": 77165 }, { "epoch": 0.9404896834972518, "grad_norm": 1.8056261539459229, "learning_rate": 3.13277742142399e-07, "loss": 0.7371, "step": 77170 }, { "epoch": 0.9405506197213995, "grad_norm": 2.0435426235198975, "learning_rate": 3.129570237331623e-07, "loss": 0.7695, "step": 77175 }, { "epoch": 0.9406115559455474, "grad_norm": 2.084725856781006, "learning_rate": 3.1263630532392564e-07, "loss": 0.8803, "step": 77180 }, { "epoch": 0.9406724921696952, "grad_norm": 2.1427314281463623, "learning_rate": 3.123155869146889e-07, "loss": 0.8252, "step": 77185 }, { "epoch": 0.940733428393843, "grad_norm": 1.627514123916626, "learning_rate": 3.1199486850545225e-07, "loss": 0.8044, "step": 77190 }, { "epoch": 0.9407943646179908, "grad_norm": 2.0022637844085693, "learning_rate": 3.116741500962156e-07, "loss": 0.8921, "step": 77195 }, { "epoch": 0.9408553008421386, "grad_norm": 2.5966897010803223, "learning_rate": 3.1135343168697887e-07, "loss": 0.7773, "step": 77200 }, { "epoch": 0.9409162370662865, "grad_norm": 2.1539595127105713, "learning_rate": 3.110327132777422e-07, "loss": 0.8199, "step": 77205 }, { "epoch": 0.9409771732904342, "grad_norm": 2.1472792625427246, "learning_rate": 3.107119948685055e-07, "loss": 0.8382, "step": 77210 }, { "epoch": 0.941038109514582, "grad_norm": 2.290070056915283, "learning_rate": 3.103912764592688e-07, "loss": 0.7849, "step": 77215 }, { "epoch": 0.9410990457387298, "grad_norm": 1.9405689239501953, "learning_rate": 3.100705580500321e-07, "loss": 0.8666, "step": 77220 }, { "epoch": 0.9411599819628776, "grad_norm": 1.893640398979187, "learning_rate": 3.0974983964079544e-07, "loss": 0.8085, "step": 77225 }, { "epoch": 0.9412209181870255, "grad_norm": 1.8705253601074219, "learning_rate": 3.094291212315587e-07, "loss": 0.8449, "step": 77230 }, { "epoch": 0.9412818544111733, "grad_norm": 2.0722317695617676, "learning_rate": 3.0910840282232205e-07, "loss": 0.8133, "step": 77235 }, { "epoch": 0.9413427906353211, "grad_norm": 1.9600768089294434, "learning_rate": 3.0878768441308533e-07, "loss": 0.8548, "step": 77240 }, { "epoch": 0.9414037268594688, "grad_norm": 2.0113329887390137, "learning_rate": 3.0846696600384867e-07, "loss": 0.8794, "step": 77245 }, { "epoch": 0.9414646630836166, "grad_norm": 1.8254480361938477, "learning_rate": 3.0814624759461195e-07, "loss": 0.7872, "step": 77250 }, { "epoch": 0.9415255993077645, "grad_norm": 1.799750804901123, "learning_rate": 3.078255291853753e-07, "loss": 0.7987, "step": 77255 }, { "epoch": 0.9415865355319123, "grad_norm": 1.5932766199111938, "learning_rate": 3.0750481077613856e-07, "loss": 0.8088, "step": 77260 }, { "epoch": 0.9416474717560601, "grad_norm": 1.7261173725128174, "learning_rate": 3.071840923669019e-07, "loss": 0.7327, "step": 77265 }, { "epoch": 0.9417084079802079, "grad_norm": 1.8094679117202759, "learning_rate": 3.068633739576652e-07, "loss": 0.7635, "step": 77270 }, { "epoch": 0.9417693442043558, "grad_norm": 2.1289634704589844, "learning_rate": 3.065426555484285e-07, "loss": 0.7996, "step": 77275 }, { "epoch": 0.9418302804285035, "grad_norm": 1.6954573392868042, "learning_rate": 3.062219371391918e-07, "loss": 0.8312, "step": 77280 }, { "epoch": 0.9418912166526513, "grad_norm": 2.078892707824707, "learning_rate": 3.0590121872995513e-07, "loss": 0.7854, "step": 77285 }, { "epoch": 0.9419521528767991, "grad_norm": 2.0930213928222656, "learning_rate": 3.0558050032071847e-07, "loss": 0.8804, "step": 77290 }, { "epoch": 0.9420130891009469, "grad_norm": 1.8321490287780762, "learning_rate": 3.0525978191148175e-07, "loss": 0.7534, "step": 77295 }, { "epoch": 0.9420740253250948, "grad_norm": 2.056272268295288, "learning_rate": 3.049390635022451e-07, "loss": 0.7562, "step": 77300 }, { "epoch": 0.9421349615492426, "grad_norm": 2.164646625518799, "learning_rate": 3.0461834509300836e-07, "loss": 0.7813, "step": 77305 }, { "epoch": 0.9421958977733904, "grad_norm": 1.9703627824783325, "learning_rate": 3.042976266837717e-07, "loss": 0.817, "step": 77310 }, { "epoch": 0.9422568339975381, "grad_norm": 1.6353470087051392, "learning_rate": 3.03976908274535e-07, "loss": 0.8312, "step": 77315 }, { "epoch": 0.942317770221686, "grad_norm": 2.070359706878662, "learning_rate": 3.036561898652983e-07, "loss": 0.7837, "step": 77320 }, { "epoch": 0.9423787064458338, "grad_norm": 1.9287278652191162, "learning_rate": 3.033354714560616e-07, "loss": 0.8303, "step": 77325 }, { "epoch": 0.9424396426699816, "grad_norm": 1.7184096574783325, "learning_rate": 3.0301475304682493e-07, "loss": 0.8964, "step": 77330 }, { "epoch": 0.9425005788941294, "grad_norm": 1.9346299171447754, "learning_rate": 3.026940346375882e-07, "loss": 0.8112, "step": 77335 }, { "epoch": 0.9425615151182772, "grad_norm": 2.483379602432251, "learning_rate": 3.0237331622835154e-07, "loss": 0.8311, "step": 77340 }, { "epoch": 0.9426224513424251, "grad_norm": 2.0375239849090576, "learning_rate": 3.020525978191148e-07, "loss": 0.7998, "step": 77345 }, { "epoch": 0.9426833875665728, "grad_norm": 1.996249794960022, "learning_rate": 3.0173187940987816e-07, "loss": 0.8205, "step": 77350 }, { "epoch": 0.9427443237907206, "grad_norm": 1.9173290729522705, "learning_rate": 3.0141116100064144e-07, "loss": 0.7214, "step": 77355 }, { "epoch": 0.9428052600148684, "grad_norm": 2.0294673442840576, "learning_rate": 3.010904425914048e-07, "loss": 0.827, "step": 77360 }, { "epoch": 0.9428661962390162, "grad_norm": 2.1166062355041504, "learning_rate": 3.0076972418216806e-07, "loss": 0.8075, "step": 77365 }, { "epoch": 0.9429271324631641, "grad_norm": 1.779977560043335, "learning_rate": 3.004490057729314e-07, "loss": 0.7494, "step": 77370 }, { "epoch": 0.9429880686873119, "grad_norm": 2.095538377761841, "learning_rate": 3.001282873636947e-07, "loss": 0.8462, "step": 77375 }, { "epoch": 0.9430490049114597, "grad_norm": 1.7655426263809204, "learning_rate": 2.99807568954458e-07, "loss": 0.874, "step": 77380 }, { "epoch": 0.9431099411356074, "grad_norm": 1.728258728981018, "learning_rate": 2.994868505452213e-07, "loss": 0.7933, "step": 77385 }, { "epoch": 0.9431708773597552, "grad_norm": 1.9713679552078247, "learning_rate": 2.991661321359846e-07, "loss": 0.7558, "step": 77390 }, { "epoch": 0.9432318135839031, "grad_norm": 1.693694829940796, "learning_rate": 2.9884541372674796e-07, "loss": 0.7897, "step": 77395 }, { "epoch": 0.9432927498080509, "grad_norm": 2.608191967010498, "learning_rate": 2.9852469531751124e-07, "loss": 0.761, "step": 77400 }, { "epoch": 0.9433536860321987, "grad_norm": 2.0687177181243896, "learning_rate": 2.982039769082746e-07, "loss": 0.8614, "step": 77405 }, { "epoch": 0.9434146222563465, "grad_norm": 1.9432868957519531, "learning_rate": 2.9788325849903786e-07, "loss": 0.8365, "step": 77410 }, { "epoch": 0.9434755584804944, "grad_norm": 1.7169392108917236, "learning_rate": 2.975625400898012e-07, "loss": 0.7697, "step": 77415 }, { "epoch": 0.9435364947046421, "grad_norm": 1.8913558721542358, "learning_rate": 2.9724182168056447e-07, "loss": 0.7672, "step": 77420 }, { "epoch": 0.9435974309287899, "grad_norm": 1.9820233583450317, "learning_rate": 2.969211032713278e-07, "loss": 0.7987, "step": 77425 }, { "epoch": 0.9436583671529377, "grad_norm": 2.3533263206481934, "learning_rate": 2.966003848620911e-07, "loss": 0.7765, "step": 77430 }, { "epoch": 0.9437193033770855, "grad_norm": 1.7542476654052734, "learning_rate": 2.962796664528544e-07, "loss": 0.7598, "step": 77435 }, { "epoch": 0.9437802396012334, "grad_norm": 2.0810232162475586, "learning_rate": 2.959589480436177e-07, "loss": 0.7665, "step": 77440 }, { "epoch": 0.9438411758253812, "grad_norm": 2.055415630340576, "learning_rate": 2.9563822963438104e-07, "loss": 0.8143, "step": 77445 }, { "epoch": 0.943902112049529, "grad_norm": 1.7665557861328125, "learning_rate": 2.953175112251443e-07, "loss": 0.8961, "step": 77450 }, { "epoch": 0.9439630482736767, "grad_norm": 2.116283416748047, "learning_rate": 2.9499679281590765e-07, "loss": 0.8478, "step": 77455 }, { "epoch": 0.9440239844978245, "grad_norm": 2.069490432739258, "learning_rate": 2.9467607440667094e-07, "loss": 0.7996, "step": 77460 }, { "epoch": 0.9440849207219724, "grad_norm": 1.977635145187378, "learning_rate": 2.9435535599743427e-07, "loss": 0.7799, "step": 77465 }, { "epoch": 0.9441458569461202, "grad_norm": 2.1439292430877686, "learning_rate": 2.9403463758819755e-07, "loss": 0.8223, "step": 77470 }, { "epoch": 0.944206793170268, "grad_norm": 2.046072006225586, "learning_rate": 2.937139191789609e-07, "loss": 0.8547, "step": 77475 }, { "epoch": 0.9442677293944158, "grad_norm": 1.906182050704956, "learning_rate": 2.9339320076972417e-07, "loss": 0.8241, "step": 77480 }, { "epoch": 0.9443286656185637, "grad_norm": 1.8225998878479004, "learning_rate": 2.930724823604875e-07, "loss": 0.7755, "step": 77485 }, { "epoch": 0.9443896018427114, "grad_norm": 1.9144383668899536, "learning_rate": 2.927517639512508e-07, "loss": 0.7935, "step": 77490 }, { "epoch": 0.9444505380668592, "grad_norm": 1.8549588918685913, "learning_rate": 2.9243104554201417e-07, "loss": 0.7234, "step": 77495 }, { "epoch": 0.944511474291007, "grad_norm": 1.651049256324768, "learning_rate": 2.9211032713277745e-07, "loss": 0.7898, "step": 77500 }, { "epoch": 0.9445724105151548, "grad_norm": 1.9752601385116577, "learning_rate": 2.917896087235408e-07, "loss": 0.792, "step": 77505 }, { "epoch": 0.9446333467393027, "grad_norm": 2.232619524002075, "learning_rate": 2.9146889031430407e-07, "loss": 0.799, "step": 77510 }, { "epoch": 0.9446942829634505, "grad_norm": 1.8436739444732666, "learning_rate": 2.911481719050674e-07, "loss": 0.8514, "step": 77515 }, { "epoch": 0.9447552191875983, "grad_norm": 1.9658284187316895, "learning_rate": 2.908274534958307e-07, "loss": 0.8379, "step": 77520 }, { "epoch": 0.944816155411746, "grad_norm": 1.8755418062210083, "learning_rate": 2.90506735086594e-07, "loss": 0.8253, "step": 77525 }, { "epoch": 0.9448770916358938, "grad_norm": 1.8807785511016846, "learning_rate": 2.901860166773573e-07, "loss": 0.8309, "step": 77530 }, { "epoch": 0.9449380278600417, "grad_norm": 1.8401250839233398, "learning_rate": 2.8986529826812063e-07, "loss": 0.8188, "step": 77535 }, { "epoch": 0.9449989640841895, "grad_norm": 1.9163124561309814, "learning_rate": 2.895445798588839e-07, "loss": 0.7931, "step": 77540 }, { "epoch": 0.9450599003083373, "grad_norm": 2.602346420288086, "learning_rate": 2.8922386144964725e-07, "loss": 0.7583, "step": 77545 }, { "epoch": 0.9451208365324851, "grad_norm": 1.9641278982162476, "learning_rate": 2.8890314304041053e-07, "loss": 0.8373, "step": 77550 }, { "epoch": 0.945181772756633, "grad_norm": 2.4055590629577637, "learning_rate": 2.8858242463117387e-07, "loss": 0.856, "step": 77555 }, { "epoch": 0.9452427089807807, "grad_norm": 2.079267978668213, "learning_rate": 2.8826170622193715e-07, "loss": 0.8422, "step": 77560 }, { "epoch": 0.9453036452049285, "grad_norm": 1.970527172088623, "learning_rate": 2.879409878127005e-07, "loss": 0.7594, "step": 77565 }, { "epoch": 0.9453645814290763, "grad_norm": 1.9812166690826416, "learning_rate": 2.876202694034638e-07, "loss": 0.8137, "step": 77570 }, { "epoch": 0.9454255176532241, "grad_norm": 1.9903589487075806, "learning_rate": 2.872995509942271e-07, "loss": 0.8654, "step": 77575 }, { "epoch": 0.945486453877372, "grad_norm": 2.042405605316162, "learning_rate": 2.8697883258499043e-07, "loss": 0.8195, "step": 77580 }, { "epoch": 0.9455473901015198, "grad_norm": 1.7527554035186768, "learning_rate": 2.866581141757537e-07, "loss": 0.7607, "step": 77585 }, { "epoch": 0.9456083263256676, "grad_norm": 1.8778425455093384, "learning_rate": 2.8633739576651705e-07, "loss": 0.806, "step": 77590 }, { "epoch": 0.9456692625498153, "grad_norm": 1.909477949142456, "learning_rate": 2.8601667735728033e-07, "loss": 0.8299, "step": 77595 }, { "epoch": 0.9457301987739631, "grad_norm": 2.233719825744629, "learning_rate": 2.8569595894804366e-07, "loss": 0.8063, "step": 77600 }, { "epoch": 0.945791134998111, "grad_norm": 2.014228343963623, "learning_rate": 2.8537524053880695e-07, "loss": 0.7771, "step": 77605 }, { "epoch": 0.9458520712222588, "grad_norm": 1.826311707496643, "learning_rate": 2.850545221295703e-07, "loss": 0.7878, "step": 77610 }, { "epoch": 0.9459130074464066, "grad_norm": 2.1169965267181396, "learning_rate": 2.8473380372033356e-07, "loss": 0.8839, "step": 77615 }, { "epoch": 0.9459739436705544, "grad_norm": 1.9717391729354858, "learning_rate": 2.844130853110969e-07, "loss": 0.7623, "step": 77620 }, { "epoch": 0.9460348798947023, "grad_norm": 1.831066608428955, "learning_rate": 2.840923669018602e-07, "loss": 0.8007, "step": 77625 }, { "epoch": 0.94609581611885, "grad_norm": 2.0405378341674805, "learning_rate": 2.837716484926235e-07, "loss": 0.7982, "step": 77630 }, { "epoch": 0.9461567523429978, "grad_norm": 1.8686038255691528, "learning_rate": 2.834509300833868e-07, "loss": 0.856, "step": 77635 }, { "epoch": 0.9462176885671456, "grad_norm": 2.2390053272247314, "learning_rate": 2.8313021167415013e-07, "loss": 0.7902, "step": 77640 }, { "epoch": 0.9462786247912934, "grad_norm": 1.8819279670715332, "learning_rate": 2.828094932649134e-07, "loss": 0.8405, "step": 77645 }, { "epoch": 0.9463395610154413, "grad_norm": 1.7713375091552734, "learning_rate": 2.8248877485567674e-07, "loss": 0.8525, "step": 77650 }, { "epoch": 0.9464004972395891, "grad_norm": 2.144146680831909, "learning_rate": 2.8216805644644e-07, "loss": 0.8317, "step": 77655 }, { "epoch": 0.9464614334637369, "grad_norm": 2.74493408203125, "learning_rate": 2.8184733803720336e-07, "loss": 0.8491, "step": 77660 }, { "epoch": 0.9465223696878846, "grad_norm": 1.924674153327942, "learning_rate": 2.8152661962796664e-07, "loss": 0.808, "step": 77665 }, { "epoch": 0.9465833059120324, "grad_norm": 2.0271010398864746, "learning_rate": 2.8120590121873e-07, "loss": 0.8731, "step": 77670 }, { "epoch": 0.9466442421361803, "grad_norm": 1.8012064695358276, "learning_rate": 2.808851828094933e-07, "loss": 0.812, "step": 77675 }, { "epoch": 0.9467051783603281, "grad_norm": 2.290703058242798, "learning_rate": 2.805644644002566e-07, "loss": 0.7996, "step": 77680 }, { "epoch": 0.9467661145844759, "grad_norm": 2.0467958450317383, "learning_rate": 2.802437459910199e-07, "loss": 0.8123, "step": 77685 }, { "epoch": 0.9468270508086237, "grad_norm": 1.7150980234146118, "learning_rate": 2.799230275817832e-07, "loss": 0.7635, "step": 77690 }, { "epoch": 0.9468879870327715, "grad_norm": 2.015312910079956, "learning_rate": 2.7960230917254654e-07, "loss": 0.8317, "step": 77695 }, { "epoch": 0.9469489232569193, "grad_norm": 1.981701374053955, "learning_rate": 2.792815907633098e-07, "loss": 0.8026, "step": 77700 }, { "epoch": 0.9470098594810671, "grad_norm": 1.9457287788391113, "learning_rate": 2.7896087235407316e-07, "loss": 0.8516, "step": 77705 }, { "epoch": 0.9470707957052149, "grad_norm": 2.151433229446411, "learning_rate": 2.7864015394483644e-07, "loss": 0.8122, "step": 77710 }, { "epoch": 0.9471317319293627, "grad_norm": 1.9375795125961304, "learning_rate": 2.7831943553559977e-07, "loss": 0.8174, "step": 77715 }, { "epoch": 0.9471926681535106, "grad_norm": 1.9312043190002441, "learning_rate": 2.7799871712636305e-07, "loss": 0.7605, "step": 77720 }, { "epoch": 0.9472536043776584, "grad_norm": 1.973757028579712, "learning_rate": 2.776779987171264e-07, "loss": 0.7175, "step": 77725 }, { "epoch": 0.9473145406018062, "grad_norm": 1.976871132850647, "learning_rate": 2.7735728030788967e-07, "loss": 0.738, "step": 77730 }, { "epoch": 0.9473754768259539, "grad_norm": 1.9706333875656128, "learning_rate": 2.77036561898653e-07, "loss": 0.7671, "step": 77735 }, { "epoch": 0.9474364130501017, "grad_norm": 2.005901575088501, "learning_rate": 2.767158434894163e-07, "loss": 0.7584, "step": 77740 }, { "epoch": 0.9474973492742496, "grad_norm": 2.299468994140625, "learning_rate": 2.763951250801796e-07, "loss": 0.7885, "step": 77745 }, { "epoch": 0.9475582854983974, "grad_norm": 2.184192419052124, "learning_rate": 2.760744066709429e-07, "loss": 0.8513, "step": 77750 }, { "epoch": 0.9476192217225452, "grad_norm": 1.893755316734314, "learning_rate": 2.7575368826170624e-07, "loss": 0.8427, "step": 77755 }, { "epoch": 0.947680157946693, "grad_norm": 1.875852108001709, "learning_rate": 2.754329698524695e-07, "loss": 0.8295, "step": 77760 }, { "epoch": 0.9477410941708408, "grad_norm": 2.0458202362060547, "learning_rate": 2.7511225144323285e-07, "loss": 0.8404, "step": 77765 }, { "epoch": 0.9478020303949886, "grad_norm": 2.139099359512329, "learning_rate": 2.7479153303399613e-07, "loss": 0.8162, "step": 77770 }, { "epoch": 0.9478629666191364, "grad_norm": 2.2171881198883057, "learning_rate": 2.7447081462475947e-07, "loss": 0.8075, "step": 77775 }, { "epoch": 0.9479239028432842, "grad_norm": 1.9555095434188843, "learning_rate": 2.741500962155228e-07, "loss": 0.8203, "step": 77780 }, { "epoch": 0.947984839067432, "grad_norm": 1.9729183912277222, "learning_rate": 2.738293778062861e-07, "loss": 0.8228, "step": 77785 }, { "epoch": 0.9480457752915799, "grad_norm": 1.7214412689208984, "learning_rate": 2.735086593970494e-07, "loss": 0.8068, "step": 77790 }, { "epoch": 0.9481067115157277, "grad_norm": 2.223881721496582, "learning_rate": 2.7318794098781275e-07, "loss": 0.8003, "step": 77795 }, { "epoch": 0.9481676477398755, "grad_norm": 2.108870267868042, "learning_rate": 2.7286722257857603e-07, "loss": 0.8484, "step": 77800 }, { "epoch": 0.9482285839640232, "grad_norm": 1.9282047748565674, "learning_rate": 2.7254650416933937e-07, "loss": 0.7858, "step": 77805 }, { "epoch": 0.948289520188171, "grad_norm": 1.7244102954864502, "learning_rate": 2.7222578576010265e-07, "loss": 0.7675, "step": 77810 }, { "epoch": 0.9483504564123189, "grad_norm": 1.770079493522644, "learning_rate": 2.71905067350866e-07, "loss": 0.8159, "step": 77815 }, { "epoch": 0.9484113926364667, "grad_norm": 1.8097188472747803, "learning_rate": 2.7158434894162927e-07, "loss": 0.7376, "step": 77820 }, { "epoch": 0.9484723288606145, "grad_norm": 1.7266507148742676, "learning_rate": 2.712636305323926e-07, "loss": 0.7541, "step": 77825 }, { "epoch": 0.9485332650847623, "grad_norm": 2.2118263244628906, "learning_rate": 2.709429121231559e-07, "loss": 0.8308, "step": 77830 }, { "epoch": 0.9485942013089101, "grad_norm": 2.0183072090148926, "learning_rate": 2.706221937139192e-07, "loss": 0.8273, "step": 77835 }, { "epoch": 0.9486551375330579, "grad_norm": 1.8296854496002197, "learning_rate": 2.703014753046825e-07, "loss": 0.8239, "step": 77840 }, { "epoch": 0.9487160737572057, "grad_norm": 1.7928346395492554, "learning_rate": 2.6998075689544583e-07, "loss": 0.8498, "step": 77845 }, { "epoch": 0.9487770099813535, "grad_norm": 2.069674491882324, "learning_rate": 2.6966003848620917e-07, "loss": 0.7809, "step": 77850 }, { "epoch": 0.9488379462055013, "grad_norm": 2.060051441192627, "learning_rate": 2.6933932007697245e-07, "loss": 0.8758, "step": 77855 }, { "epoch": 0.9488988824296491, "grad_norm": 2.0373659133911133, "learning_rate": 2.690186016677358e-07, "loss": 0.81, "step": 77860 }, { "epoch": 0.948959818653797, "grad_norm": 1.9709432125091553, "learning_rate": 2.6869788325849906e-07, "loss": 0.7249, "step": 77865 }, { "epoch": 0.9490207548779448, "grad_norm": 2.1230640411376953, "learning_rate": 2.683771648492624e-07, "loss": 0.746, "step": 77870 }, { "epoch": 0.9490816911020925, "grad_norm": 2.018281936645508, "learning_rate": 2.680564464400257e-07, "loss": 0.8466, "step": 77875 }, { "epoch": 0.9491426273262403, "grad_norm": 1.7428464889526367, "learning_rate": 2.67735728030789e-07, "loss": 0.8652, "step": 77880 }, { "epoch": 0.9492035635503882, "grad_norm": 1.6747689247131348, "learning_rate": 2.674150096215523e-07, "loss": 0.8424, "step": 77885 }, { "epoch": 0.949264499774536, "grad_norm": 1.9693019390106201, "learning_rate": 2.6709429121231563e-07, "loss": 0.8473, "step": 77890 }, { "epoch": 0.9493254359986838, "grad_norm": 1.9827516078948975, "learning_rate": 2.667735728030789e-07, "loss": 0.8567, "step": 77895 }, { "epoch": 0.9493863722228316, "grad_norm": 1.6358938217163086, "learning_rate": 2.6645285439384225e-07, "loss": 0.7718, "step": 77900 }, { "epoch": 0.9494473084469794, "grad_norm": 2.1855618953704834, "learning_rate": 2.6613213598460553e-07, "loss": 0.8251, "step": 77905 }, { "epoch": 0.9495082446711272, "grad_norm": 1.8107936382293701, "learning_rate": 2.6581141757536886e-07, "loss": 0.7426, "step": 77910 }, { "epoch": 0.949569180895275, "grad_norm": 2.204594850540161, "learning_rate": 2.6549069916613214e-07, "loss": 0.8588, "step": 77915 }, { "epoch": 0.9496301171194228, "grad_norm": 1.982995629310608, "learning_rate": 2.651699807568955e-07, "loss": 0.8456, "step": 77920 }, { "epoch": 0.9496910533435706, "grad_norm": 1.8828352689743042, "learning_rate": 2.6484926234765876e-07, "loss": 0.79, "step": 77925 }, { "epoch": 0.9497519895677184, "grad_norm": 2.0215611457824707, "learning_rate": 2.645285439384221e-07, "loss": 0.829, "step": 77930 }, { "epoch": 0.9498129257918663, "grad_norm": 1.6597942113876343, "learning_rate": 2.642078255291854e-07, "loss": 0.7488, "step": 77935 }, { "epoch": 0.9498738620160141, "grad_norm": 2.0847554206848145, "learning_rate": 2.638871071199487e-07, "loss": 0.8532, "step": 77940 }, { "epoch": 0.9499347982401618, "grad_norm": 2.1277153491973877, "learning_rate": 2.63566388710712e-07, "loss": 0.8007, "step": 77945 }, { "epoch": 0.9499957344643096, "grad_norm": 2.2707202434539795, "learning_rate": 2.632456703014753e-07, "loss": 0.8149, "step": 77950 }, { "epoch": 0.9500566706884574, "grad_norm": 2.0103726387023926, "learning_rate": 2.6292495189223866e-07, "loss": 0.8433, "step": 77955 }, { "epoch": 0.9501176069126053, "grad_norm": 2.1083292961120605, "learning_rate": 2.6260423348300194e-07, "loss": 0.8725, "step": 77960 }, { "epoch": 0.9501785431367531, "grad_norm": 2.0583417415618896, "learning_rate": 2.622835150737653e-07, "loss": 0.8312, "step": 77965 }, { "epoch": 0.9502394793609009, "grad_norm": 1.9236507415771484, "learning_rate": 2.6196279666452856e-07, "loss": 0.7527, "step": 77970 }, { "epoch": 0.9503004155850487, "grad_norm": 1.7624001502990723, "learning_rate": 2.616420782552919e-07, "loss": 0.7855, "step": 77975 }, { "epoch": 0.9503613518091965, "grad_norm": 1.8883875608444214, "learning_rate": 2.6132135984605517e-07, "loss": 0.8477, "step": 77980 }, { "epoch": 0.9504222880333443, "grad_norm": 2.1566269397735596, "learning_rate": 2.610006414368185e-07, "loss": 0.8453, "step": 77985 }, { "epoch": 0.9504832242574921, "grad_norm": 2.5590567588806152, "learning_rate": 2.606799230275818e-07, "loss": 0.8408, "step": 77990 }, { "epoch": 0.9505441604816399, "grad_norm": 1.8971019983291626, "learning_rate": 2.603592046183451e-07, "loss": 0.7842, "step": 77995 }, { "epoch": 0.9506050967057877, "grad_norm": 2.0811312198638916, "learning_rate": 2.600384862091084e-07, "loss": 0.811, "step": 78000 }, { "epoch": 0.9506660329299356, "grad_norm": 2.1346285343170166, "learning_rate": 2.5971776779987174e-07, "loss": 0.7696, "step": 78005 }, { "epoch": 0.9507269691540834, "grad_norm": 2.5564255714416504, "learning_rate": 2.59397049390635e-07, "loss": 0.835, "step": 78010 }, { "epoch": 0.9507879053782311, "grad_norm": 1.822615385055542, "learning_rate": 2.5907633098139836e-07, "loss": 0.7814, "step": 78015 }, { "epoch": 0.9508488416023789, "grad_norm": 2.7412402629852295, "learning_rate": 2.5875561257216164e-07, "loss": 0.8188, "step": 78020 }, { "epoch": 0.9509097778265267, "grad_norm": 2.0521960258483887, "learning_rate": 2.5843489416292497e-07, "loss": 0.7708, "step": 78025 }, { "epoch": 0.9509707140506746, "grad_norm": 1.8168728351593018, "learning_rate": 2.5811417575368825e-07, "loss": 0.8582, "step": 78030 }, { "epoch": 0.9510316502748224, "grad_norm": 1.9900082349777222, "learning_rate": 2.577934573444516e-07, "loss": 0.7799, "step": 78035 }, { "epoch": 0.9510925864989702, "grad_norm": 2.167438507080078, "learning_rate": 2.5747273893521487e-07, "loss": 0.7871, "step": 78040 }, { "epoch": 0.951153522723118, "grad_norm": 1.873215675354004, "learning_rate": 2.571520205259782e-07, "loss": 0.7639, "step": 78045 }, { "epoch": 0.9512144589472658, "grad_norm": 2.058932304382324, "learning_rate": 2.568313021167415e-07, "loss": 0.7837, "step": 78050 }, { "epoch": 0.9512753951714136, "grad_norm": 2.2352676391601562, "learning_rate": 2.565105837075048e-07, "loss": 0.8542, "step": 78055 }, { "epoch": 0.9513363313955614, "grad_norm": 1.8132264614105225, "learning_rate": 2.5618986529826815e-07, "loss": 0.7404, "step": 78060 }, { "epoch": 0.9513972676197092, "grad_norm": 1.9584640264511108, "learning_rate": 2.5586914688903143e-07, "loss": 0.8832, "step": 78065 }, { "epoch": 0.951458203843857, "grad_norm": 1.7032253742218018, "learning_rate": 2.5554842847979477e-07, "loss": 0.754, "step": 78070 }, { "epoch": 0.9515191400680049, "grad_norm": 1.7395548820495605, "learning_rate": 2.5522771007055805e-07, "loss": 0.8682, "step": 78075 }, { "epoch": 0.9515800762921527, "grad_norm": 1.880156397819519, "learning_rate": 2.549069916613214e-07, "loss": 0.7284, "step": 78080 }, { "epoch": 0.9516410125163004, "grad_norm": 1.8521758317947388, "learning_rate": 2.5458627325208467e-07, "loss": 0.8264, "step": 78085 }, { "epoch": 0.9517019487404482, "grad_norm": 2.3203608989715576, "learning_rate": 2.54265554842848e-07, "loss": 0.7918, "step": 78090 }, { "epoch": 0.951762884964596, "grad_norm": 1.7360399961471558, "learning_rate": 2.539448364336113e-07, "loss": 0.808, "step": 78095 }, { "epoch": 0.9518238211887439, "grad_norm": 1.9680193662643433, "learning_rate": 2.536241180243746e-07, "loss": 0.7672, "step": 78100 }, { "epoch": 0.9518847574128917, "grad_norm": 1.9440808296203613, "learning_rate": 2.5330339961513795e-07, "loss": 0.7188, "step": 78105 }, { "epoch": 0.9519456936370395, "grad_norm": 2.132413148880005, "learning_rate": 2.5298268120590123e-07, "loss": 0.8018, "step": 78110 }, { "epoch": 0.9520066298611873, "grad_norm": 2.434140205383301, "learning_rate": 2.5266196279666457e-07, "loss": 0.773, "step": 78115 }, { "epoch": 0.952067566085335, "grad_norm": 2.029280662536621, "learning_rate": 2.5234124438742785e-07, "loss": 0.8109, "step": 78120 }, { "epoch": 0.9521285023094829, "grad_norm": 1.9123426675796509, "learning_rate": 2.520205259781912e-07, "loss": 0.8623, "step": 78125 }, { "epoch": 0.9521894385336307, "grad_norm": 2.183258056640625, "learning_rate": 2.516998075689545e-07, "loss": 0.7117, "step": 78130 }, { "epoch": 0.9522503747577785, "grad_norm": 1.7245612144470215, "learning_rate": 2.513790891597178e-07, "loss": 0.8274, "step": 78135 }, { "epoch": 0.9523113109819263, "grad_norm": 2.279611587524414, "learning_rate": 2.5105837075048113e-07, "loss": 0.829, "step": 78140 }, { "epoch": 0.9523722472060742, "grad_norm": 1.8194940090179443, "learning_rate": 2.507376523412444e-07, "loss": 0.8647, "step": 78145 }, { "epoch": 0.9524331834302219, "grad_norm": 2.1461524963378906, "learning_rate": 2.5041693393200775e-07, "loss": 0.8228, "step": 78150 }, { "epoch": 0.9524941196543697, "grad_norm": 1.915511131286621, "learning_rate": 2.5009621552277103e-07, "loss": 0.857, "step": 78155 }, { "epoch": 0.9525550558785175, "grad_norm": 2.2683956623077393, "learning_rate": 2.4977549711353437e-07, "loss": 0.8199, "step": 78160 }, { "epoch": 0.9526159921026653, "grad_norm": 1.970955729484558, "learning_rate": 2.4945477870429765e-07, "loss": 0.7653, "step": 78165 }, { "epoch": 0.9526769283268132, "grad_norm": 1.9448691606521606, "learning_rate": 2.49134060295061e-07, "loss": 0.877, "step": 78170 }, { "epoch": 0.952737864550961, "grad_norm": 1.867432951927185, "learning_rate": 2.4881334188582426e-07, "loss": 0.862, "step": 78175 }, { "epoch": 0.9527988007751088, "grad_norm": 1.8977673053741455, "learning_rate": 2.484926234765876e-07, "loss": 0.8302, "step": 78180 }, { "epoch": 0.9528597369992565, "grad_norm": 2.1219916343688965, "learning_rate": 2.481719050673509e-07, "loss": 0.8226, "step": 78185 }, { "epoch": 0.9529206732234043, "grad_norm": 1.9123321771621704, "learning_rate": 2.478511866581142e-07, "loss": 0.8096, "step": 78190 }, { "epoch": 0.9529816094475522, "grad_norm": 2.106956720352173, "learning_rate": 2.475304682488775e-07, "loss": 0.8127, "step": 78195 }, { "epoch": 0.9530425456717, "grad_norm": 1.9986193180084229, "learning_rate": 2.4720974983964083e-07, "loss": 0.7482, "step": 78200 }, { "epoch": 0.9531034818958478, "grad_norm": 2.29569411277771, "learning_rate": 2.468890314304041e-07, "loss": 0.81, "step": 78205 }, { "epoch": 0.9531644181199956, "grad_norm": 2.0860183238983154, "learning_rate": 2.4656831302116744e-07, "loss": 0.8483, "step": 78210 }, { "epoch": 0.9532253543441435, "grad_norm": 2.3301663398742676, "learning_rate": 2.462475946119307e-07, "loss": 0.8828, "step": 78215 }, { "epoch": 0.9532862905682912, "grad_norm": 2.5035479068756104, "learning_rate": 2.4592687620269406e-07, "loss": 0.7926, "step": 78220 }, { "epoch": 0.953347226792439, "grad_norm": 2.2181174755096436, "learning_rate": 2.4560615779345734e-07, "loss": 0.7489, "step": 78225 }, { "epoch": 0.9534081630165868, "grad_norm": 1.949211597442627, "learning_rate": 2.452854393842207e-07, "loss": 0.7302, "step": 78230 }, { "epoch": 0.9534690992407346, "grad_norm": 1.8078022003173828, "learning_rate": 2.44964720974984e-07, "loss": 0.7636, "step": 78235 }, { "epoch": 0.9535300354648825, "grad_norm": 2.0715034008026123, "learning_rate": 2.446440025657473e-07, "loss": 0.8727, "step": 78240 }, { "epoch": 0.9535909716890303, "grad_norm": 1.8739687204360962, "learning_rate": 2.4432328415651063e-07, "loss": 0.7973, "step": 78245 }, { "epoch": 0.9536519079131781, "grad_norm": 1.8474855422973633, "learning_rate": 2.440025657472739e-07, "loss": 0.8827, "step": 78250 }, { "epoch": 0.9537128441373258, "grad_norm": 1.701532006263733, "learning_rate": 2.4368184733803724e-07, "loss": 0.7839, "step": 78255 }, { "epoch": 0.9537737803614736, "grad_norm": 1.9694874286651611, "learning_rate": 2.433611289288005e-07, "loss": 0.7567, "step": 78260 }, { "epoch": 0.9538347165856215, "grad_norm": 2.066469192504883, "learning_rate": 2.4304041051956386e-07, "loss": 0.8571, "step": 78265 }, { "epoch": 0.9538956528097693, "grad_norm": 1.950240969657898, "learning_rate": 2.4271969211032714e-07, "loss": 0.7587, "step": 78270 }, { "epoch": 0.9539565890339171, "grad_norm": 1.9065308570861816, "learning_rate": 2.423989737010905e-07, "loss": 0.8512, "step": 78275 }, { "epoch": 0.9540175252580649, "grad_norm": 2.0861456394195557, "learning_rate": 2.4207825529185376e-07, "loss": 0.8487, "step": 78280 }, { "epoch": 0.9540784614822128, "grad_norm": 1.9143447875976562, "learning_rate": 2.417575368826171e-07, "loss": 0.812, "step": 78285 }, { "epoch": 0.9541393977063605, "grad_norm": 1.7596900463104248, "learning_rate": 2.4143681847338037e-07, "loss": 0.8088, "step": 78290 }, { "epoch": 0.9542003339305083, "grad_norm": 2.195416212081909, "learning_rate": 2.411161000641437e-07, "loss": 0.8572, "step": 78295 }, { "epoch": 0.9542612701546561, "grad_norm": 1.782577633857727, "learning_rate": 2.40795381654907e-07, "loss": 0.742, "step": 78300 }, { "epoch": 0.9543222063788039, "grad_norm": 2.071749210357666, "learning_rate": 2.404746632456703e-07, "loss": 0.7685, "step": 78305 }, { "epoch": 0.9543831426029518, "grad_norm": 2.509315013885498, "learning_rate": 2.401539448364336e-07, "loss": 0.8227, "step": 78310 }, { "epoch": 0.9544440788270996, "grad_norm": 2.031325578689575, "learning_rate": 2.3983322642719694e-07, "loss": 0.8225, "step": 78315 }, { "epoch": 0.9545050150512474, "grad_norm": 1.9023232460021973, "learning_rate": 2.395125080179602e-07, "loss": 0.8216, "step": 78320 }, { "epoch": 0.9545659512753951, "grad_norm": 1.9445099830627441, "learning_rate": 2.3919178960872355e-07, "loss": 0.7467, "step": 78325 }, { "epoch": 0.9546268874995429, "grad_norm": 1.928285837173462, "learning_rate": 2.3887107119948684e-07, "loss": 0.8189, "step": 78330 }, { "epoch": 0.9546878237236908, "grad_norm": 1.895970344543457, "learning_rate": 2.3855035279025017e-07, "loss": 0.8748, "step": 78335 }, { "epoch": 0.9547487599478386, "grad_norm": 2.2026126384735107, "learning_rate": 2.3822963438101348e-07, "loss": 0.832, "step": 78340 }, { "epoch": 0.9548096961719864, "grad_norm": 1.7268894910812378, "learning_rate": 2.3790891597177679e-07, "loss": 0.815, "step": 78345 }, { "epoch": 0.9548706323961342, "grad_norm": 1.7993358373641968, "learning_rate": 2.375881975625401e-07, "loss": 0.7256, "step": 78350 }, { "epoch": 0.9549315686202821, "grad_norm": 2.398768186569214, "learning_rate": 2.372674791533034e-07, "loss": 0.8182, "step": 78355 }, { "epoch": 0.9549925048444298, "grad_norm": 1.9079017639160156, "learning_rate": 2.369467607440667e-07, "loss": 0.7886, "step": 78360 }, { "epoch": 0.9550534410685776, "grad_norm": 1.8456085920333862, "learning_rate": 2.3662604233483002e-07, "loss": 0.7834, "step": 78365 }, { "epoch": 0.9551143772927254, "grad_norm": 1.8900022506713867, "learning_rate": 2.3630532392559333e-07, "loss": 0.7477, "step": 78370 }, { "epoch": 0.9551753135168732, "grad_norm": 2.019294500350952, "learning_rate": 2.3598460551635663e-07, "loss": 0.7701, "step": 78375 }, { "epoch": 0.9552362497410211, "grad_norm": 1.9076237678527832, "learning_rate": 2.3566388710711994e-07, "loss": 0.9033, "step": 78380 }, { "epoch": 0.9552971859651689, "grad_norm": 1.934623122215271, "learning_rate": 2.3534316869788325e-07, "loss": 0.7943, "step": 78385 }, { "epoch": 0.9553581221893167, "grad_norm": 2.3113715648651123, "learning_rate": 2.3502245028864658e-07, "loss": 0.8316, "step": 78390 }, { "epoch": 0.9554190584134644, "grad_norm": 2.241584539413452, "learning_rate": 2.347017318794099e-07, "loss": 0.8791, "step": 78395 }, { "epoch": 0.9554799946376122, "grad_norm": 2.208134889602661, "learning_rate": 2.3438101347017323e-07, "loss": 0.7877, "step": 78400 }, { "epoch": 0.9555409308617601, "grad_norm": 2.3411309719085693, "learning_rate": 2.3406029506093653e-07, "loss": 0.8457, "step": 78405 }, { "epoch": 0.9556018670859079, "grad_norm": 2.03637957572937, "learning_rate": 2.3373957665169984e-07, "loss": 0.8332, "step": 78410 }, { "epoch": 0.9556628033100557, "grad_norm": 2.1600871086120605, "learning_rate": 2.3341885824246315e-07, "loss": 0.8133, "step": 78415 }, { "epoch": 0.9557237395342035, "grad_norm": 1.8257272243499756, "learning_rate": 2.3309813983322646e-07, "loss": 0.8214, "step": 78420 }, { "epoch": 0.9557846757583514, "grad_norm": 1.5863760709762573, "learning_rate": 2.3277742142398977e-07, "loss": 0.755, "step": 78425 }, { "epoch": 0.9558456119824991, "grad_norm": 1.8475557565689087, "learning_rate": 2.3245670301475307e-07, "loss": 0.85, "step": 78430 }, { "epoch": 0.9559065482066469, "grad_norm": 1.9477837085723877, "learning_rate": 2.3213598460551638e-07, "loss": 0.8392, "step": 78435 }, { "epoch": 0.9559674844307947, "grad_norm": 1.8306913375854492, "learning_rate": 2.318152661962797e-07, "loss": 0.8044, "step": 78440 }, { "epoch": 0.9560284206549425, "grad_norm": 2.4957778453826904, "learning_rate": 2.31494547787043e-07, "loss": 0.8274, "step": 78445 }, { "epoch": 0.9560893568790904, "grad_norm": 1.9679327011108398, "learning_rate": 2.311738293778063e-07, "loss": 0.8277, "step": 78450 }, { "epoch": 0.9561502931032382, "grad_norm": 1.8555359840393066, "learning_rate": 2.308531109685696e-07, "loss": 0.7753, "step": 78455 }, { "epoch": 0.956211229327386, "grad_norm": 2.095057964324951, "learning_rate": 2.3053239255933295e-07, "loss": 0.8134, "step": 78460 }, { "epoch": 0.9562721655515337, "grad_norm": 2.0189881324768066, "learning_rate": 2.3021167415009626e-07, "loss": 0.8688, "step": 78465 }, { "epoch": 0.9563331017756815, "grad_norm": 1.7470386028289795, "learning_rate": 2.2989095574085956e-07, "loss": 0.8177, "step": 78470 }, { "epoch": 0.9563940379998294, "grad_norm": 2.2081005573272705, "learning_rate": 2.2957023733162287e-07, "loss": 0.7933, "step": 78475 }, { "epoch": 0.9564549742239772, "grad_norm": 1.9524400234222412, "learning_rate": 2.2924951892238618e-07, "loss": 0.7875, "step": 78480 }, { "epoch": 0.956515910448125, "grad_norm": 2.1888115406036377, "learning_rate": 2.289288005131495e-07, "loss": 0.8511, "step": 78485 }, { "epoch": 0.9565768466722728, "grad_norm": 2.0086331367492676, "learning_rate": 2.286080821039128e-07, "loss": 0.7627, "step": 78490 }, { "epoch": 0.9566377828964207, "grad_norm": 1.6925498247146606, "learning_rate": 2.282873636946761e-07, "loss": 0.729, "step": 78495 }, { "epoch": 0.9566987191205684, "grad_norm": 1.5427417755126953, "learning_rate": 2.279666452854394e-07, "loss": 0.7618, "step": 78500 }, { "epoch": 0.9567596553447162, "grad_norm": 2.053687810897827, "learning_rate": 2.2764592687620272e-07, "loss": 0.819, "step": 78505 }, { "epoch": 0.956820591568864, "grad_norm": 2.5029401779174805, "learning_rate": 2.2732520846696603e-07, "loss": 0.8387, "step": 78510 }, { "epoch": 0.9568815277930118, "grad_norm": 1.9063669443130493, "learning_rate": 2.2700449005772934e-07, "loss": 0.7796, "step": 78515 }, { "epoch": 0.9569424640171597, "grad_norm": 1.8433587551116943, "learning_rate": 2.2668377164849264e-07, "loss": 0.8336, "step": 78520 }, { "epoch": 0.9570034002413075, "grad_norm": 2.1962063312530518, "learning_rate": 2.2636305323925595e-07, "loss": 0.7345, "step": 78525 }, { "epoch": 0.9570643364654553, "grad_norm": 1.9509046077728271, "learning_rate": 2.2604233483001926e-07, "loss": 0.8198, "step": 78530 }, { "epoch": 0.957125272689603, "grad_norm": 2.1732656955718994, "learning_rate": 2.2572161642078257e-07, "loss": 0.7424, "step": 78535 }, { "epoch": 0.9571862089137508, "grad_norm": 2.1089107990264893, "learning_rate": 2.2540089801154587e-07, "loss": 0.7571, "step": 78540 }, { "epoch": 0.9572471451378987, "grad_norm": 2.6719932556152344, "learning_rate": 2.2508017960230918e-07, "loss": 0.8455, "step": 78545 }, { "epoch": 0.9573080813620465, "grad_norm": 1.8507800102233887, "learning_rate": 2.247594611930725e-07, "loss": 0.8107, "step": 78550 }, { "epoch": 0.9573690175861943, "grad_norm": 1.556249976158142, "learning_rate": 2.244387427838358e-07, "loss": 0.7952, "step": 78555 }, { "epoch": 0.9574299538103421, "grad_norm": 1.731817364692688, "learning_rate": 2.241180243745991e-07, "loss": 0.8142, "step": 78560 }, { "epoch": 0.95749089003449, "grad_norm": 1.620772361755371, "learning_rate": 2.2379730596536244e-07, "loss": 0.8228, "step": 78565 }, { "epoch": 0.9575518262586377, "grad_norm": 1.8354434967041016, "learning_rate": 2.2347658755612575e-07, "loss": 0.8221, "step": 78570 }, { "epoch": 0.9576127624827855, "grad_norm": 2.21413254737854, "learning_rate": 2.2315586914688906e-07, "loss": 0.8576, "step": 78575 }, { "epoch": 0.9576736987069333, "grad_norm": 1.897479772567749, "learning_rate": 2.2283515073765236e-07, "loss": 0.7621, "step": 78580 }, { "epoch": 0.9577346349310811, "grad_norm": 2.1064486503601074, "learning_rate": 2.2251443232841567e-07, "loss": 0.8004, "step": 78585 }, { "epoch": 0.957795571155229, "grad_norm": 2.1191940307617188, "learning_rate": 2.2219371391917898e-07, "loss": 0.8396, "step": 78590 }, { "epoch": 0.9578565073793768, "grad_norm": 2.330326557159424, "learning_rate": 2.218729955099423e-07, "loss": 0.7483, "step": 78595 }, { "epoch": 0.9579174436035246, "grad_norm": 1.738974690437317, "learning_rate": 2.215522771007056e-07, "loss": 0.8239, "step": 78600 }, { "epoch": 0.9579783798276723, "grad_norm": 1.8842328786849976, "learning_rate": 2.212315586914689e-07, "loss": 0.8002, "step": 78605 }, { "epoch": 0.9580393160518201, "grad_norm": 2.218191385269165, "learning_rate": 2.209108402822322e-07, "loss": 0.8042, "step": 78610 }, { "epoch": 0.958100252275968, "grad_norm": 2.0279464721679688, "learning_rate": 2.2059012187299552e-07, "loss": 0.8685, "step": 78615 }, { "epoch": 0.9581611885001158, "grad_norm": 2.1838431358337402, "learning_rate": 2.2026940346375883e-07, "loss": 0.8172, "step": 78620 }, { "epoch": 0.9582221247242636, "grad_norm": 1.9662879705429077, "learning_rate": 2.1994868505452214e-07, "loss": 0.8092, "step": 78625 }, { "epoch": 0.9582830609484114, "grad_norm": 1.7433520555496216, "learning_rate": 2.1962796664528544e-07, "loss": 0.8477, "step": 78630 }, { "epoch": 0.9583439971725592, "grad_norm": 1.8071868419647217, "learning_rate": 2.1930724823604875e-07, "loss": 0.7977, "step": 78635 }, { "epoch": 0.958404933396707, "grad_norm": 1.706072449684143, "learning_rate": 2.1898652982681206e-07, "loss": 0.7882, "step": 78640 }, { "epoch": 0.9584658696208548, "grad_norm": 1.9806394577026367, "learning_rate": 2.1866581141757537e-07, "loss": 0.7828, "step": 78645 }, { "epoch": 0.9585268058450026, "grad_norm": 2.071263551712036, "learning_rate": 2.1834509300833868e-07, "loss": 0.7525, "step": 78650 }, { "epoch": 0.9585877420691504, "grad_norm": 1.904097318649292, "learning_rate": 2.1802437459910198e-07, "loss": 0.7557, "step": 78655 }, { "epoch": 0.9586486782932983, "grad_norm": 2.0038468837738037, "learning_rate": 2.177036561898653e-07, "loss": 0.7868, "step": 78660 }, { "epoch": 0.9587096145174461, "grad_norm": 2.4362452030181885, "learning_rate": 2.173829377806286e-07, "loss": 0.7953, "step": 78665 }, { "epoch": 0.9587705507415939, "grad_norm": 1.9864976406097412, "learning_rate": 2.1706221937139193e-07, "loss": 0.8312, "step": 78670 }, { "epoch": 0.9588314869657416, "grad_norm": 1.9694533348083496, "learning_rate": 2.1674150096215524e-07, "loss": 0.7748, "step": 78675 }, { "epoch": 0.9588924231898894, "grad_norm": 1.9919553995132446, "learning_rate": 2.1642078255291855e-07, "loss": 0.8023, "step": 78680 }, { "epoch": 0.9589533594140373, "grad_norm": 2.161790609359741, "learning_rate": 2.1610006414368186e-07, "loss": 0.803, "step": 78685 }, { "epoch": 0.9590142956381851, "grad_norm": 1.9113298654556274, "learning_rate": 2.1577934573444517e-07, "loss": 0.7937, "step": 78690 }, { "epoch": 0.9590752318623329, "grad_norm": 1.9522701501846313, "learning_rate": 2.1545862732520847e-07, "loss": 0.7401, "step": 78695 }, { "epoch": 0.9591361680864807, "grad_norm": 1.934192180633545, "learning_rate": 2.1513790891597178e-07, "loss": 0.8756, "step": 78700 }, { "epoch": 0.9591971043106285, "grad_norm": 1.8786484003067017, "learning_rate": 2.1481719050673512e-07, "loss": 0.7838, "step": 78705 }, { "epoch": 0.9592580405347763, "grad_norm": 1.810584545135498, "learning_rate": 2.1449647209749842e-07, "loss": 0.8486, "step": 78710 }, { "epoch": 0.9593189767589241, "grad_norm": 2.1349542140960693, "learning_rate": 2.1417575368826173e-07, "loss": 0.8144, "step": 78715 }, { "epoch": 0.9593799129830719, "grad_norm": 2.2578508853912354, "learning_rate": 2.1385503527902504e-07, "loss": 0.7952, "step": 78720 }, { "epoch": 0.9594408492072197, "grad_norm": 1.746268391609192, "learning_rate": 2.1353431686978835e-07, "loss": 0.76, "step": 78725 }, { "epoch": 0.9595017854313675, "grad_norm": 2.1779444217681885, "learning_rate": 2.1321359846055166e-07, "loss": 0.8016, "step": 78730 }, { "epoch": 0.9595627216555154, "grad_norm": 2.0528793334960938, "learning_rate": 2.1289288005131496e-07, "loss": 0.8108, "step": 78735 }, { "epoch": 0.9596236578796632, "grad_norm": 2.202484369277954, "learning_rate": 2.125721616420783e-07, "loss": 0.7973, "step": 78740 }, { "epoch": 0.9596845941038109, "grad_norm": 2.013017177581787, "learning_rate": 2.122514432328416e-07, "loss": 0.8396, "step": 78745 }, { "epoch": 0.9597455303279587, "grad_norm": 1.7921228408813477, "learning_rate": 2.1193072482360491e-07, "loss": 0.7713, "step": 78750 }, { "epoch": 0.9598064665521066, "grad_norm": 1.78507399559021, "learning_rate": 2.1161000641436822e-07, "loss": 0.7748, "step": 78755 }, { "epoch": 0.9598674027762544, "grad_norm": 1.8485056161880493, "learning_rate": 2.1128928800513153e-07, "loss": 0.7675, "step": 78760 }, { "epoch": 0.9599283390004022, "grad_norm": 2.2024457454681396, "learning_rate": 2.1096856959589484e-07, "loss": 0.7706, "step": 78765 }, { "epoch": 0.95998927522455, "grad_norm": 2.1589438915252686, "learning_rate": 2.1064785118665815e-07, "loss": 0.7976, "step": 78770 }, { "epoch": 0.9600502114486978, "grad_norm": 1.6035833358764648, "learning_rate": 2.1032713277742145e-07, "loss": 0.8844, "step": 78775 }, { "epoch": 0.9601111476728456, "grad_norm": 2.057286024093628, "learning_rate": 2.1000641436818476e-07, "loss": 0.8059, "step": 78780 }, { "epoch": 0.9601720838969934, "grad_norm": 1.9721802473068237, "learning_rate": 2.0968569595894807e-07, "loss": 0.8119, "step": 78785 }, { "epoch": 0.9602330201211412, "grad_norm": 2.108210563659668, "learning_rate": 2.0936497754971138e-07, "loss": 0.7992, "step": 78790 }, { "epoch": 0.960293956345289, "grad_norm": 2.2872443199157715, "learning_rate": 2.0904425914047469e-07, "loss": 0.8118, "step": 78795 }, { "epoch": 0.9603548925694368, "grad_norm": 1.890059471130371, "learning_rate": 2.08723540731238e-07, "loss": 0.8039, "step": 78800 }, { "epoch": 0.9604158287935847, "grad_norm": 1.8455368280410767, "learning_rate": 2.084028223220013e-07, "loss": 0.7831, "step": 78805 }, { "epoch": 0.9604767650177325, "grad_norm": 1.9841502904891968, "learning_rate": 2.080821039127646e-07, "loss": 0.8143, "step": 78810 }, { "epoch": 0.9605377012418802, "grad_norm": 1.9199862480163574, "learning_rate": 2.0776138550352792e-07, "loss": 0.7876, "step": 78815 }, { "epoch": 0.960598637466028, "grad_norm": 1.8632758855819702, "learning_rate": 2.0744066709429123e-07, "loss": 0.8339, "step": 78820 }, { "epoch": 0.9606595736901758, "grad_norm": 2.364907741546631, "learning_rate": 2.0711994868505453e-07, "loss": 0.7661, "step": 78825 }, { "epoch": 0.9607205099143237, "grad_norm": 1.751860499382019, "learning_rate": 2.0679923027581784e-07, "loss": 0.7988, "step": 78830 }, { "epoch": 0.9607814461384715, "grad_norm": 1.9355895519256592, "learning_rate": 2.0647851186658115e-07, "loss": 0.8209, "step": 78835 }, { "epoch": 0.9608423823626193, "grad_norm": 2.0755622386932373, "learning_rate": 2.0615779345734446e-07, "loss": 0.7865, "step": 78840 }, { "epoch": 0.9609033185867671, "grad_norm": 2.0127627849578857, "learning_rate": 2.058370750481078e-07, "loss": 0.8121, "step": 78845 }, { "epoch": 0.9609642548109149, "grad_norm": 2.155092239379883, "learning_rate": 2.055163566388711e-07, "loss": 0.7804, "step": 78850 }, { "epoch": 0.9610251910350627, "grad_norm": 2.133096694946289, "learning_rate": 2.051956382296344e-07, "loss": 0.8178, "step": 78855 }, { "epoch": 0.9610861272592105, "grad_norm": 1.9912062883377075, "learning_rate": 2.0487491982039772e-07, "loss": 0.8414, "step": 78860 }, { "epoch": 0.9611470634833583, "grad_norm": 2.100433111190796, "learning_rate": 2.0455420141116102e-07, "loss": 0.8245, "step": 78865 }, { "epoch": 0.9612079997075061, "grad_norm": 2.1651406288146973, "learning_rate": 2.0423348300192433e-07, "loss": 0.7987, "step": 78870 }, { "epoch": 0.961268935931654, "grad_norm": 2.064899444580078, "learning_rate": 2.0391276459268764e-07, "loss": 0.6747, "step": 78875 }, { "epoch": 0.9613298721558018, "grad_norm": 2.478759288787842, "learning_rate": 2.0359204618345095e-07, "loss": 0.8639, "step": 78880 }, { "epoch": 0.9613908083799495, "grad_norm": 2.0669918060302734, "learning_rate": 2.0327132777421426e-07, "loss": 0.8079, "step": 78885 }, { "epoch": 0.9614517446040973, "grad_norm": 1.9952284097671509, "learning_rate": 2.0295060936497756e-07, "loss": 0.776, "step": 78890 }, { "epoch": 0.9615126808282451, "grad_norm": 2.147413492202759, "learning_rate": 2.0262989095574087e-07, "loss": 0.8721, "step": 78895 }, { "epoch": 0.961573617052393, "grad_norm": 2.0182483196258545, "learning_rate": 2.0230917254650418e-07, "loss": 0.7739, "step": 78900 }, { "epoch": 0.9616345532765408, "grad_norm": 1.691565990447998, "learning_rate": 2.019884541372675e-07, "loss": 0.8305, "step": 78905 }, { "epoch": 0.9616954895006886, "grad_norm": 2.061530351638794, "learning_rate": 2.016677357280308e-07, "loss": 0.8351, "step": 78910 }, { "epoch": 0.9617564257248364, "grad_norm": 2.0689592361450195, "learning_rate": 2.013470173187941e-07, "loss": 0.7857, "step": 78915 }, { "epoch": 0.9618173619489842, "grad_norm": 1.9104788303375244, "learning_rate": 2.010262989095574e-07, "loss": 0.7446, "step": 78920 }, { "epoch": 0.961878298173132, "grad_norm": 2.037642478942871, "learning_rate": 2.0070558050032072e-07, "loss": 0.8323, "step": 78925 }, { "epoch": 0.9619392343972798, "grad_norm": 1.9591492414474487, "learning_rate": 2.0038486209108403e-07, "loss": 0.7778, "step": 78930 }, { "epoch": 0.9620001706214276, "grad_norm": 2.052898406982422, "learning_rate": 2.0006414368184733e-07, "loss": 0.8293, "step": 78935 }, { "epoch": 0.9620611068455754, "grad_norm": 2.2691218852996826, "learning_rate": 1.9974342527261064e-07, "loss": 0.7216, "step": 78940 }, { "epoch": 0.9621220430697233, "grad_norm": 1.8577842712402344, "learning_rate": 1.9942270686337395e-07, "loss": 0.7294, "step": 78945 }, { "epoch": 0.9621829792938711, "grad_norm": 2.0432052612304688, "learning_rate": 1.9910198845413728e-07, "loss": 0.8197, "step": 78950 }, { "epoch": 0.9622439155180188, "grad_norm": 1.971920371055603, "learning_rate": 1.987812700449006e-07, "loss": 0.7904, "step": 78955 }, { "epoch": 0.9623048517421666, "grad_norm": 2.106785535812378, "learning_rate": 1.984605516356639e-07, "loss": 0.7836, "step": 78960 }, { "epoch": 0.9623657879663144, "grad_norm": 1.9741722345352173, "learning_rate": 1.981398332264272e-07, "loss": 0.7725, "step": 78965 }, { "epoch": 0.9624267241904623, "grad_norm": 2.3200812339782715, "learning_rate": 1.9781911481719052e-07, "loss": 0.8915, "step": 78970 }, { "epoch": 0.9624876604146101, "grad_norm": 2.0295298099517822, "learning_rate": 1.9749839640795382e-07, "loss": 0.8007, "step": 78975 }, { "epoch": 0.9625485966387579, "grad_norm": 1.9619559049606323, "learning_rate": 1.9717767799871713e-07, "loss": 0.7349, "step": 78980 }, { "epoch": 0.9626095328629057, "grad_norm": 2.0152995586395264, "learning_rate": 1.9685695958948044e-07, "loss": 0.7931, "step": 78985 }, { "epoch": 0.9626704690870534, "grad_norm": 2.1688947677612305, "learning_rate": 1.9653624118024375e-07, "loss": 0.8874, "step": 78990 }, { "epoch": 0.9627314053112013, "grad_norm": 2.0238072872161865, "learning_rate": 1.9621552277100706e-07, "loss": 0.748, "step": 78995 }, { "epoch": 0.9627923415353491, "grad_norm": 2.2623538970947266, "learning_rate": 1.9589480436177036e-07, "loss": 0.8017, "step": 79000 }, { "epoch": 0.9628532777594969, "grad_norm": 2.2216923236846924, "learning_rate": 1.955740859525337e-07, "loss": 0.7876, "step": 79005 }, { "epoch": 0.9629142139836447, "grad_norm": 2.0627918243408203, "learning_rate": 1.95253367543297e-07, "loss": 0.8408, "step": 79010 }, { "epoch": 0.9629751502077926, "grad_norm": 2.173358917236328, "learning_rate": 1.9493264913406031e-07, "loss": 0.8216, "step": 79015 }, { "epoch": 0.9630360864319404, "grad_norm": 2.11627459526062, "learning_rate": 1.9461193072482365e-07, "loss": 0.7603, "step": 79020 }, { "epoch": 0.9630970226560881, "grad_norm": 2.506437301635742, "learning_rate": 1.9429121231558696e-07, "loss": 0.8369, "step": 79025 }, { "epoch": 0.9631579588802359, "grad_norm": 1.701338768005371, "learning_rate": 1.9397049390635026e-07, "loss": 0.7968, "step": 79030 }, { "epoch": 0.9632188951043837, "grad_norm": 1.887890100479126, "learning_rate": 1.9364977549711357e-07, "loss": 0.7733, "step": 79035 }, { "epoch": 0.9632798313285316, "grad_norm": 1.9631537199020386, "learning_rate": 1.9332905708787688e-07, "loss": 0.7905, "step": 79040 }, { "epoch": 0.9633407675526794, "grad_norm": 2.003614664077759, "learning_rate": 1.930083386786402e-07, "loss": 0.827, "step": 79045 }, { "epoch": 0.9634017037768272, "grad_norm": 1.831721305847168, "learning_rate": 1.926876202694035e-07, "loss": 0.7977, "step": 79050 }, { "epoch": 0.963462640000975, "grad_norm": 2.0008368492126465, "learning_rate": 1.923669018601668e-07, "loss": 0.7479, "step": 79055 }, { "epoch": 0.9635235762251227, "grad_norm": 2.096043825149536, "learning_rate": 1.920461834509301e-07, "loss": 0.8028, "step": 79060 }, { "epoch": 0.9635845124492706, "grad_norm": 2.306138753890991, "learning_rate": 1.9172546504169342e-07, "loss": 0.8149, "step": 79065 }, { "epoch": 0.9636454486734184, "grad_norm": 2.169881582260132, "learning_rate": 1.9140474663245673e-07, "loss": 0.8442, "step": 79070 }, { "epoch": 0.9637063848975662, "grad_norm": 2.728588581085205, "learning_rate": 1.9108402822322004e-07, "loss": 0.8399, "step": 79075 }, { "epoch": 0.963767321121714, "grad_norm": 1.8255506753921509, "learning_rate": 1.9076330981398334e-07, "loss": 0.8005, "step": 79080 }, { "epoch": 0.9638282573458619, "grad_norm": 1.8982112407684326, "learning_rate": 1.9044259140474665e-07, "loss": 0.8315, "step": 79085 }, { "epoch": 0.9638891935700096, "grad_norm": 1.8588091135025024, "learning_rate": 1.9012187299550996e-07, "loss": 0.8605, "step": 79090 }, { "epoch": 0.9639501297941574, "grad_norm": 1.9889382123947144, "learning_rate": 1.8980115458627327e-07, "loss": 0.752, "step": 79095 }, { "epoch": 0.9640110660183052, "grad_norm": 2.0634772777557373, "learning_rate": 1.8948043617703658e-07, "loss": 0.8142, "step": 79100 }, { "epoch": 0.964072002242453, "grad_norm": 2.2919819355010986, "learning_rate": 1.8915971776779988e-07, "loss": 0.7827, "step": 79105 }, { "epoch": 0.9641329384666009, "grad_norm": 1.7542692422866821, "learning_rate": 1.888389993585632e-07, "loss": 0.7938, "step": 79110 }, { "epoch": 0.9641938746907487, "grad_norm": 1.8787726163864136, "learning_rate": 1.885182809493265e-07, "loss": 0.826, "step": 79115 }, { "epoch": 0.9642548109148965, "grad_norm": 2.012970209121704, "learning_rate": 1.881975625400898e-07, "loss": 0.7557, "step": 79120 }, { "epoch": 0.9643157471390442, "grad_norm": 1.8931965827941895, "learning_rate": 1.8787684413085314e-07, "loss": 0.7487, "step": 79125 }, { "epoch": 0.964376683363192, "grad_norm": 2.123199462890625, "learning_rate": 1.8755612572161645e-07, "loss": 0.8769, "step": 79130 }, { "epoch": 0.9644376195873399, "grad_norm": 1.9766286611557007, "learning_rate": 1.8723540731237976e-07, "loss": 0.7823, "step": 79135 }, { "epoch": 0.9644985558114877, "grad_norm": 1.8892241716384888, "learning_rate": 1.8691468890314307e-07, "loss": 0.8173, "step": 79140 }, { "epoch": 0.9645594920356355, "grad_norm": 2.281214952468872, "learning_rate": 1.8659397049390637e-07, "loss": 0.7346, "step": 79145 }, { "epoch": 0.9646204282597833, "grad_norm": 1.7444816827774048, "learning_rate": 1.8627325208466968e-07, "loss": 0.8208, "step": 79150 }, { "epoch": 0.9646813644839312, "grad_norm": 1.8994799852371216, "learning_rate": 1.85952533675433e-07, "loss": 0.8286, "step": 79155 }, { "epoch": 0.9647423007080789, "grad_norm": 1.92295241355896, "learning_rate": 1.856318152661963e-07, "loss": 0.8477, "step": 79160 }, { "epoch": 0.9648032369322267, "grad_norm": 2.0249035358428955, "learning_rate": 1.853110968569596e-07, "loss": 0.8182, "step": 79165 }, { "epoch": 0.9648641731563745, "grad_norm": 2.407835006713867, "learning_rate": 1.8499037844772291e-07, "loss": 0.8233, "step": 79170 }, { "epoch": 0.9649251093805223, "grad_norm": 2.076418876647949, "learning_rate": 1.8466966003848622e-07, "loss": 0.83, "step": 79175 }, { "epoch": 0.9649860456046702, "grad_norm": 1.823978066444397, "learning_rate": 1.8434894162924953e-07, "loss": 0.8334, "step": 79180 }, { "epoch": 0.965046981828818, "grad_norm": 2.0976290702819824, "learning_rate": 1.8402822322001284e-07, "loss": 0.7832, "step": 79185 }, { "epoch": 0.9651079180529658, "grad_norm": 1.8773165941238403, "learning_rate": 1.8370750481077615e-07, "loss": 0.8801, "step": 79190 }, { "epoch": 0.9651688542771135, "grad_norm": 2.0487122535705566, "learning_rate": 1.8338678640153945e-07, "loss": 0.7831, "step": 79195 }, { "epoch": 0.9652297905012613, "grad_norm": 1.8971096277236938, "learning_rate": 1.8306606799230276e-07, "loss": 0.7882, "step": 79200 }, { "epoch": 0.9652907267254092, "grad_norm": 1.6940504312515259, "learning_rate": 1.8274534958306607e-07, "loss": 0.7436, "step": 79205 }, { "epoch": 0.965351662949557, "grad_norm": 1.9801524877548218, "learning_rate": 1.8242463117382938e-07, "loss": 0.792, "step": 79210 }, { "epoch": 0.9654125991737048, "grad_norm": 2.352071523666382, "learning_rate": 1.8210391276459269e-07, "loss": 0.8246, "step": 79215 }, { "epoch": 0.9654735353978526, "grad_norm": 1.673599362373352, "learning_rate": 1.81783194355356e-07, "loss": 0.7252, "step": 79220 }, { "epoch": 0.9655344716220005, "grad_norm": 1.986207127571106, "learning_rate": 1.814624759461193e-07, "loss": 0.885, "step": 79225 }, { "epoch": 0.9655954078461482, "grad_norm": 2.1811349391937256, "learning_rate": 1.8114175753688264e-07, "loss": 0.8118, "step": 79230 }, { "epoch": 0.965656344070296, "grad_norm": 2.05666184425354, "learning_rate": 1.8082103912764594e-07, "loss": 0.7982, "step": 79235 }, { "epoch": 0.9657172802944438, "grad_norm": 2.076943874359131, "learning_rate": 1.8050032071840925e-07, "loss": 0.8243, "step": 79240 }, { "epoch": 0.9657782165185916, "grad_norm": 2.001002788543701, "learning_rate": 1.8017960230917256e-07, "loss": 0.9145, "step": 79245 }, { "epoch": 0.9658391527427395, "grad_norm": 2.1220874786376953, "learning_rate": 1.7985888389993587e-07, "loss": 0.779, "step": 79250 }, { "epoch": 0.9659000889668873, "grad_norm": 1.6972613334655762, "learning_rate": 1.7953816549069918e-07, "loss": 0.781, "step": 79255 }, { "epoch": 0.9659610251910351, "grad_norm": 1.739610195159912, "learning_rate": 1.7921744708146248e-07, "loss": 0.8057, "step": 79260 }, { "epoch": 0.9660219614151828, "grad_norm": 2.115734577178955, "learning_rate": 1.788967286722258e-07, "loss": 0.7425, "step": 79265 }, { "epoch": 0.9660828976393306, "grad_norm": 2.312626838684082, "learning_rate": 1.785760102629891e-07, "loss": 0.7681, "step": 79270 }, { "epoch": 0.9661438338634785, "grad_norm": 2.0761120319366455, "learning_rate": 1.782552918537524e-07, "loss": 0.8286, "step": 79275 }, { "epoch": 0.9662047700876263, "grad_norm": 2.1680402755737305, "learning_rate": 1.7793457344451572e-07, "loss": 0.9073, "step": 79280 }, { "epoch": 0.9662657063117741, "grad_norm": 1.7760510444641113, "learning_rate": 1.7761385503527902e-07, "loss": 0.7707, "step": 79285 }, { "epoch": 0.9663266425359219, "grad_norm": 1.8716365098953247, "learning_rate": 1.7729313662604233e-07, "loss": 0.8455, "step": 79290 }, { "epoch": 0.9663875787600698, "grad_norm": 1.9224673509597778, "learning_rate": 1.7697241821680564e-07, "loss": 0.7752, "step": 79295 }, { "epoch": 0.9664485149842175, "grad_norm": 2.309647798538208, "learning_rate": 1.7665169980756895e-07, "loss": 0.8276, "step": 79300 }, { "epoch": 0.9665094512083653, "grad_norm": 1.5728302001953125, "learning_rate": 1.763309813983323e-07, "loss": 0.7227, "step": 79305 }, { "epoch": 0.9665703874325131, "grad_norm": 2.1070780754089355, "learning_rate": 1.7601026298909562e-07, "loss": 0.777, "step": 79310 }, { "epoch": 0.9666313236566609, "grad_norm": 2.142930030822754, "learning_rate": 1.7568954457985892e-07, "loss": 0.7235, "step": 79315 }, { "epoch": 0.9666922598808088, "grad_norm": 2.3553736209869385, "learning_rate": 1.7536882617062223e-07, "loss": 0.7397, "step": 79320 }, { "epoch": 0.9667531961049566, "grad_norm": 1.961682915687561, "learning_rate": 1.7504810776138554e-07, "loss": 0.796, "step": 79325 }, { "epoch": 0.9668141323291044, "grad_norm": 1.781077265739441, "learning_rate": 1.7472738935214885e-07, "loss": 0.7912, "step": 79330 }, { "epoch": 0.9668750685532521, "grad_norm": 1.868172526359558, "learning_rate": 1.7440667094291216e-07, "loss": 0.8737, "step": 79335 }, { "epoch": 0.9669360047773999, "grad_norm": 1.884706735610962, "learning_rate": 1.7408595253367546e-07, "loss": 0.8527, "step": 79340 }, { "epoch": 0.9669969410015478, "grad_norm": 2.1026647090911865, "learning_rate": 1.7376523412443877e-07, "loss": 0.8225, "step": 79345 }, { "epoch": 0.9670578772256956, "grad_norm": 1.86844801902771, "learning_rate": 1.7344451571520208e-07, "loss": 0.7698, "step": 79350 }, { "epoch": 0.9671188134498434, "grad_norm": 2.000836133956909, "learning_rate": 1.731237973059654e-07, "loss": 0.8143, "step": 79355 }, { "epoch": 0.9671797496739912, "grad_norm": 2.1593236923217773, "learning_rate": 1.728030788967287e-07, "loss": 0.7898, "step": 79360 }, { "epoch": 0.967240685898139, "grad_norm": 1.646177053451538, "learning_rate": 1.72482360487492e-07, "loss": 0.748, "step": 79365 }, { "epoch": 0.9673016221222868, "grad_norm": 2.2539474964141846, "learning_rate": 1.721616420782553e-07, "loss": 0.7964, "step": 79370 }, { "epoch": 0.9673625583464346, "grad_norm": 2.197007179260254, "learning_rate": 1.7184092366901862e-07, "loss": 0.7871, "step": 79375 }, { "epoch": 0.9674234945705824, "grad_norm": 1.8002986907958984, "learning_rate": 1.7152020525978193e-07, "loss": 0.7771, "step": 79380 }, { "epoch": 0.9674844307947302, "grad_norm": 2.1842334270477295, "learning_rate": 1.7119948685054523e-07, "loss": 0.791, "step": 79385 }, { "epoch": 0.967545367018878, "grad_norm": 1.8894786834716797, "learning_rate": 1.7087876844130854e-07, "loss": 0.7744, "step": 79390 }, { "epoch": 0.9676063032430259, "grad_norm": 4.077230930328369, "learning_rate": 1.7055805003207185e-07, "loss": 0.8149, "step": 79395 }, { "epoch": 0.9676672394671737, "grad_norm": 1.873429775238037, "learning_rate": 1.7023733162283516e-07, "loss": 0.8393, "step": 79400 }, { "epoch": 0.9677281756913214, "grad_norm": 2.2264435291290283, "learning_rate": 1.699166132135985e-07, "loss": 0.8033, "step": 79405 }, { "epoch": 0.9677891119154692, "grad_norm": 1.7714320421218872, "learning_rate": 1.695958948043618e-07, "loss": 0.8048, "step": 79410 }, { "epoch": 0.9678500481396171, "grad_norm": 1.9400124549865723, "learning_rate": 1.692751763951251e-07, "loss": 0.7951, "step": 79415 }, { "epoch": 0.9679109843637649, "grad_norm": 2.4501798152923584, "learning_rate": 1.6895445798588842e-07, "loss": 0.8441, "step": 79420 }, { "epoch": 0.9679719205879127, "grad_norm": 1.761910319328308, "learning_rate": 1.6863373957665172e-07, "loss": 0.8185, "step": 79425 }, { "epoch": 0.9680328568120605, "grad_norm": 2.180053472518921, "learning_rate": 1.6831302116741503e-07, "loss": 0.8759, "step": 79430 }, { "epoch": 0.9680937930362083, "grad_norm": 1.8402177095413208, "learning_rate": 1.6799230275817834e-07, "loss": 0.8155, "step": 79435 }, { "epoch": 0.9681547292603561, "grad_norm": 1.8708608150482178, "learning_rate": 1.6767158434894165e-07, "loss": 0.7903, "step": 79440 }, { "epoch": 0.9682156654845039, "grad_norm": 2.381730318069458, "learning_rate": 1.6735086593970496e-07, "loss": 0.8511, "step": 79445 }, { "epoch": 0.9682766017086517, "grad_norm": 1.8284704685211182, "learning_rate": 1.6703014753046826e-07, "loss": 0.7814, "step": 79450 }, { "epoch": 0.9683375379327995, "grad_norm": 2.013251304626465, "learning_rate": 1.6670942912123157e-07, "loss": 0.8402, "step": 79455 }, { "epoch": 0.9683984741569474, "grad_norm": 1.9152191877365112, "learning_rate": 1.6638871071199488e-07, "loss": 0.7477, "step": 79460 }, { "epoch": 0.9684594103810952, "grad_norm": 1.9006106853485107, "learning_rate": 1.660679923027582e-07, "loss": 0.7825, "step": 79465 }, { "epoch": 0.968520346605243, "grad_norm": 1.8862500190734863, "learning_rate": 1.657472738935215e-07, "loss": 0.7339, "step": 79470 }, { "epoch": 0.9685812828293907, "grad_norm": 1.8361533880233765, "learning_rate": 1.654265554842848e-07, "loss": 0.8549, "step": 79475 }, { "epoch": 0.9686422190535385, "grad_norm": 2.019928455352783, "learning_rate": 1.651058370750481e-07, "loss": 0.8132, "step": 79480 }, { "epoch": 0.9687031552776864, "grad_norm": 1.9545156955718994, "learning_rate": 1.6478511866581142e-07, "loss": 0.7956, "step": 79485 }, { "epoch": 0.9687640915018342, "grad_norm": 1.9087194204330444, "learning_rate": 1.6446440025657473e-07, "loss": 0.7534, "step": 79490 }, { "epoch": 0.968825027725982, "grad_norm": 1.8307087421417236, "learning_rate": 1.6414368184733804e-07, "loss": 0.8463, "step": 79495 }, { "epoch": 0.9688859639501298, "grad_norm": 2.0315380096435547, "learning_rate": 1.6382296343810134e-07, "loss": 0.8121, "step": 79500 }, { "epoch": 0.9689469001742776, "grad_norm": 1.8714394569396973, "learning_rate": 1.6350224502886465e-07, "loss": 0.7903, "step": 79505 }, { "epoch": 0.9690078363984254, "grad_norm": 1.9091140031814575, "learning_rate": 1.6318152661962799e-07, "loss": 0.8552, "step": 79510 }, { "epoch": 0.9690687726225732, "grad_norm": 1.7823456525802612, "learning_rate": 1.628608082103913e-07, "loss": 0.8154, "step": 79515 }, { "epoch": 0.969129708846721, "grad_norm": 1.881030797958374, "learning_rate": 1.625400898011546e-07, "loss": 0.7602, "step": 79520 }, { "epoch": 0.9691906450708688, "grad_norm": 2.0736095905303955, "learning_rate": 1.622193713919179e-07, "loss": 0.7985, "step": 79525 }, { "epoch": 0.9692515812950167, "grad_norm": 1.9347033500671387, "learning_rate": 1.6189865298268122e-07, "loss": 0.6977, "step": 79530 }, { "epoch": 0.9693125175191645, "grad_norm": 2.405900239944458, "learning_rate": 1.6157793457344453e-07, "loss": 0.7543, "step": 79535 }, { "epoch": 0.9693734537433123, "grad_norm": 1.981160044670105, "learning_rate": 1.6125721616420783e-07, "loss": 0.7644, "step": 79540 }, { "epoch": 0.96943438996746, "grad_norm": 2.2794992923736572, "learning_rate": 1.6093649775497114e-07, "loss": 0.794, "step": 79545 }, { "epoch": 0.9694953261916078, "grad_norm": 2.2605628967285156, "learning_rate": 1.6061577934573445e-07, "loss": 0.8483, "step": 79550 }, { "epoch": 0.9695562624157557, "grad_norm": 2.0102670192718506, "learning_rate": 1.6029506093649776e-07, "loss": 0.7509, "step": 79555 }, { "epoch": 0.9696171986399035, "grad_norm": 2.0069048404693604, "learning_rate": 1.5997434252726107e-07, "loss": 0.8414, "step": 79560 }, { "epoch": 0.9696781348640513, "grad_norm": 2.1048998832702637, "learning_rate": 1.5965362411802437e-07, "loss": 0.7793, "step": 79565 }, { "epoch": 0.9697390710881991, "grad_norm": 1.9950867891311646, "learning_rate": 1.5933290570878768e-07, "loss": 0.7342, "step": 79570 }, { "epoch": 0.969800007312347, "grad_norm": 1.7354261875152588, "learning_rate": 1.59012187299551e-07, "loss": 0.8182, "step": 79575 }, { "epoch": 0.9698609435364947, "grad_norm": 1.9456121921539307, "learning_rate": 1.586914688903143e-07, "loss": 0.7715, "step": 79580 }, { "epoch": 0.9699218797606425, "grad_norm": 2.0991642475128174, "learning_rate": 1.583707504810776e-07, "loss": 0.8715, "step": 79585 }, { "epoch": 0.9699828159847903, "grad_norm": 1.8747893571853638, "learning_rate": 1.5805003207184091e-07, "loss": 0.8337, "step": 79590 }, { "epoch": 0.9700437522089381, "grad_norm": 2.2424511909484863, "learning_rate": 1.5772931366260422e-07, "loss": 0.8028, "step": 79595 }, { "epoch": 0.970104688433086, "grad_norm": 1.6947081089019775, "learning_rate": 1.5740859525336753e-07, "loss": 0.7754, "step": 79600 }, { "epoch": 0.9701656246572338, "grad_norm": 1.8449010848999023, "learning_rate": 1.5708787684413084e-07, "loss": 0.7541, "step": 79605 }, { "epoch": 0.9702265608813816, "grad_norm": 1.8757696151733398, "learning_rate": 1.567671584348942e-07, "loss": 0.8371, "step": 79610 }, { "epoch": 0.9702874971055293, "grad_norm": 2.2546961307525635, "learning_rate": 1.564464400256575e-07, "loss": 0.7968, "step": 79615 }, { "epoch": 0.9703484333296771, "grad_norm": 2.234210729598999, "learning_rate": 1.561257216164208e-07, "loss": 0.8085, "step": 79620 }, { "epoch": 0.970409369553825, "grad_norm": 1.9736100435256958, "learning_rate": 1.558050032071841e-07, "loss": 0.7894, "step": 79625 }, { "epoch": 0.9704703057779728, "grad_norm": 2.1047677993774414, "learning_rate": 1.554842847979474e-07, "loss": 0.8038, "step": 79630 }, { "epoch": 0.9705312420021206, "grad_norm": 2.248060941696167, "learning_rate": 1.551635663887107e-07, "loss": 0.7641, "step": 79635 }, { "epoch": 0.9705921782262684, "grad_norm": 1.952751636505127, "learning_rate": 1.5484284797947402e-07, "loss": 0.7279, "step": 79640 }, { "epoch": 0.9706531144504162, "grad_norm": 1.8258039951324463, "learning_rate": 1.5452212957023733e-07, "loss": 0.8306, "step": 79645 }, { "epoch": 0.970714050674564, "grad_norm": 2.0772900581359863, "learning_rate": 1.5420141116100066e-07, "loss": 0.8511, "step": 79650 }, { "epoch": 0.9707749868987118, "grad_norm": 1.7587621212005615, "learning_rate": 1.5388069275176397e-07, "loss": 0.7952, "step": 79655 }, { "epoch": 0.9708359231228596, "grad_norm": 1.9281103610992432, "learning_rate": 1.5355997434252728e-07, "loss": 0.8421, "step": 79660 }, { "epoch": 0.9708968593470074, "grad_norm": 1.8988828659057617, "learning_rate": 1.5323925593329059e-07, "loss": 0.8784, "step": 79665 }, { "epoch": 0.9709577955711552, "grad_norm": 1.4992426633834839, "learning_rate": 1.529185375240539e-07, "loss": 0.7528, "step": 79670 }, { "epoch": 0.9710187317953031, "grad_norm": 1.8355801105499268, "learning_rate": 1.525978191148172e-07, "loss": 0.9158, "step": 79675 }, { "epoch": 0.9710796680194509, "grad_norm": 2.1583340167999268, "learning_rate": 1.522771007055805e-07, "loss": 0.7877, "step": 79680 }, { "epoch": 0.9711406042435986, "grad_norm": 1.8984318971633911, "learning_rate": 1.5195638229634384e-07, "loss": 0.7976, "step": 79685 }, { "epoch": 0.9712015404677464, "grad_norm": 1.8563668727874756, "learning_rate": 1.5163566388710715e-07, "loss": 0.6935, "step": 79690 }, { "epoch": 0.9712624766918942, "grad_norm": 1.9766429662704468, "learning_rate": 1.5131494547787046e-07, "loss": 0.8248, "step": 79695 }, { "epoch": 0.9713234129160421, "grad_norm": 2.1641860008239746, "learning_rate": 1.5099422706863377e-07, "loss": 0.8693, "step": 79700 }, { "epoch": 0.9713843491401899, "grad_norm": 2.14080810546875, "learning_rate": 1.5067350865939708e-07, "loss": 0.796, "step": 79705 }, { "epoch": 0.9714452853643377, "grad_norm": 2.0320677757263184, "learning_rate": 1.5035279025016038e-07, "loss": 0.7978, "step": 79710 }, { "epoch": 0.9715062215884855, "grad_norm": 2.0654242038726807, "learning_rate": 1.500320718409237e-07, "loss": 0.7429, "step": 79715 }, { "epoch": 0.9715671578126333, "grad_norm": 1.7972337007522583, "learning_rate": 1.49711353431687e-07, "loss": 0.7611, "step": 79720 }, { "epoch": 0.9716280940367811, "grad_norm": 2.1912875175476074, "learning_rate": 1.493906350224503e-07, "loss": 0.8072, "step": 79725 }, { "epoch": 0.9716890302609289, "grad_norm": 3.6301674842834473, "learning_rate": 1.4906991661321362e-07, "loss": 0.7949, "step": 79730 }, { "epoch": 0.9717499664850767, "grad_norm": 2.1298980712890625, "learning_rate": 1.4874919820397692e-07, "loss": 0.8615, "step": 79735 }, { "epoch": 0.9718109027092245, "grad_norm": 1.9996719360351562, "learning_rate": 1.4842847979474023e-07, "loss": 0.8517, "step": 79740 }, { "epoch": 0.9718718389333724, "grad_norm": 2.0530335903167725, "learning_rate": 1.4810776138550354e-07, "loss": 0.8007, "step": 79745 }, { "epoch": 0.9719327751575202, "grad_norm": 1.9514199495315552, "learning_rate": 1.4778704297626685e-07, "loss": 0.8224, "step": 79750 }, { "epoch": 0.9719937113816679, "grad_norm": 1.9453837871551514, "learning_rate": 1.4746632456703016e-07, "loss": 0.8121, "step": 79755 }, { "epoch": 0.9720546476058157, "grad_norm": 1.8604966402053833, "learning_rate": 1.4714560615779346e-07, "loss": 0.8198, "step": 79760 }, { "epoch": 0.9721155838299635, "grad_norm": 2.2267038822174072, "learning_rate": 1.4682488774855677e-07, "loss": 0.8451, "step": 79765 }, { "epoch": 0.9721765200541114, "grad_norm": 2.329313039779663, "learning_rate": 1.4650416933932008e-07, "loss": 0.7276, "step": 79770 }, { "epoch": 0.9722374562782592, "grad_norm": 2.292529582977295, "learning_rate": 1.461834509300834e-07, "loss": 0.8636, "step": 79775 }, { "epoch": 0.972298392502407, "grad_norm": 1.7357854843139648, "learning_rate": 1.458627325208467e-07, "loss": 0.8247, "step": 79780 }, { "epoch": 0.9723593287265548, "grad_norm": 2.036123514175415, "learning_rate": 1.4554201411161e-07, "loss": 0.8042, "step": 79785 }, { "epoch": 0.9724202649507026, "grad_norm": 2.013705015182495, "learning_rate": 1.4522129570237334e-07, "loss": 0.856, "step": 79790 }, { "epoch": 0.9724812011748504, "grad_norm": 2.195437431335449, "learning_rate": 1.4490057729313665e-07, "loss": 0.7452, "step": 79795 }, { "epoch": 0.9725421373989982, "grad_norm": 2.0259218215942383, "learning_rate": 1.4457985888389995e-07, "loss": 0.8448, "step": 79800 }, { "epoch": 0.972603073623146, "grad_norm": 1.971572756767273, "learning_rate": 1.4425914047466326e-07, "loss": 0.8181, "step": 79805 }, { "epoch": 0.9726640098472938, "grad_norm": 2.389636754989624, "learning_rate": 1.4393842206542657e-07, "loss": 0.7511, "step": 79810 }, { "epoch": 0.9727249460714417, "grad_norm": 1.8448787927627563, "learning_rate": 1.4361770365618988e-07, "loss": 0.8359, "step": 79815 }, { "epoch": 0.9727858822955895, "grad_norm": 3.060657501220703, "learning_rate": 1.4329698524695318e-07, "loss": 0.7806, "step": 79820 }, { "epoch": 0.9728468185197372, "grad_norm": 2.0238113403320312, "learning_rate": 1.429762668377165e-07, "loss": 0.8033, "step": 79825 }, { "epoch": 0.972907754743885, "grad_norm": 2.054713487625122, "learning_rate": 1.426555484284798e-07, "loss": 0.8408, "step": 79830 }, { "epoch": 0.9729686909680328, "grad_norm": 2.4471874237060547, "learning_rate": 1.4233483001924314e-07, "loss": 0.8607, "step": 79835 }, { "epoch": 0.9730296271921807, "grad_norm": 2.173713207244873, "learning_rate": 1.4201411161000644e-07, "loss": 0.8289, "step": 79840 }, { "epoch": 0.9730905634163285, "grad_norm": 1.8850772380828857, "learning_rate": 1.4169339320076975e-07, "loss": 0.7774, "step": 79845 }, { "epoch": 0.9731514996404763, "grad_norm": 2.641627311706543, "learning_rate": 1.4137267479153306e-07, "loss": 0.8311, "step": 79850 }, { "epoch": 0.9732124358646241, "grad_norm": 2.1144237518310547, "learning_rate": 1.4105195638229637e-07, "loss": 0.769, "step": 79855 }, { "epoch": 0.9732733720887718, "grad_norm": 1.7754888534545898, "learning_rate": 1.4073123797305967e-07, "loss": 0.8422, "step": 79860 }, { "epoch": 0.9733343083129197, "grad_norm": 2.1456446647644043, "learning_rate": 1.4041051956382298e-07, "loss": 0.8714, "step": 79865 }, { "epoch": 0.9733952445370675, "grad_norm": 1.9058432579040527, "learning_rate": 1.400898011545863e-07, "loss": 0.7911, "step": 79870 }, { "epoch": 0.9734561807612153, "grad_norm": 2.0725197792053223, "learning_rate": 1.397690827453496e-07, "loss": 0.8114, "step": 79875 }, { "epoch": 0.9735171169853631, "grad_norm": 2.190962314605713, "learning_rate": 1.394483643361129e-07, "loss": 0.8315, "step": 79880 }, { "epoch": 0.973578053209511, "grad_norm": 1.6896108388900757, "learning_rate": 1.3912764592687621e-07, "loss": 0.8419, "step": 79885 }, { "epoch": 0.9736389894336588, "grad_norm": 2.1421449184417725, "learning_rate": 1.3880692751763952e-07, "loss": 0.8237, "step": 79890 }, { "epoch": 0.9736999256578065, "grad_norm": 1.9575603008270264, "learning_rate": 1.3848620910840283e-07, "loss": 0.8472, "step": 79895 }, { "epoch": 0.9737608618819543, "grad_norm": 2.045351505279541, "learning_rate": 1.3816549069916614e-07, "loss": 0.8584, "step": 79900 }, { "epoch": 0.9738217981061021, "grad_norm": 2.0307185649871826, "learning_rate": 1.3784477228992945e-07, "loss": 0.7892, "step": 79905 }, { "epoch": 0.97388273433025, "grad_norm": 1.9445477724075317, "learning_rate": 1.3752405388069275e-07, "loss": 0.7831, "step": 79910 }, { "epoch": 0.9739436705543978, "grad_norm": 3.02917218208313, "learning_rate": 1.3720333547145606e-07, "loss": 0.8374, "step": 79915 }, { "epoch": 0.9740046067785456, "grad_norm": 1.9426571130752563, "learning_rate": 1.3688261706221937e-07, "loss": 0.7402, "step": 79920 }, { "epoch": 0.9740655430026934, "grad_norm": 2.0974128246307373, "learning_rate": 1.3656189865298268e-07, "loss": 0.7716, "step": 79925 }, { "epoch": 0.9741264792268411, "grad_norm": 2.0619990825653076, "learning_rate": 1.36241180243746e-07, "loss": 0.8266, "step": 79930 }, { "epoch": 0.974187415450989, "grad_norm": 1.8226743936538696, "learning_rate": 1.3592046183450932e-07, "loss": 0.8305, "step": 79935 }, { "epoch": 0.9742483516751368, "grad_norm": 2.0753345489501953, "learning_rate": 1.3559974342527263e-07, "loss": 0.8692, "step": 79940 }, { "epoch": 0.9743092878992846, "grad_norm": 1.822326421737671, "learning_rate": 1.3527902501603594e-07, "loss": 0.8369, "step": 79945 }, { "epoch": 0.9743702241234324, "grad_norm": 2.3175301551818848, "learning_rate": 1.3495830660679924e-07, "loss": 0.7891, "step": 79950 }, { "epoch": 0.9744311603475803, "grad_norm": 2.027116060256958, "learning_rate": 1.3463758819756255e-07, "loss": 0.825, "step": 79955 }, { "epoch": 0.9744920965717281, "grad_norm": 2.467858076095581, "learning_rate": 1.3431686978832586e-07, "loss": 0.8531, "step": 79960 }, { "epoch": 0.9745530327958758, "grad_norm": 1.7753065824508667, "learning_rate": 1.3399615137908917e-07, "loss": 0.7804, "step": 79965 }, { "epoch": 0.9746139690200236, "grad_norm": 1.9957770109176636, "learning_rate": 1.3367543296985248e-07, "loss": 0.7383, "step": 79970 }, { "epoch": 0.9746749052441714, "grad_norm": 2.065161943435669, "learning_rate": 1.3335471456061578e-07, "loss": 0.7317, "step": 79975 }, { "epoch": 0.9747358414683193, "grad_norm": 2.030402898788452, "learning_rate": 1.330339961513791e-07, "loss": 0.8358, "step": 79980 }, { "epoch": 0.9747967776924671, "grad_norm": 1.8342065811157227, "learning_rate": 1.3271327774214243e-07, "loss": 0.7557, "step": 79985 }, { "epoch": 0.9748577139166149, "grad_norm": 1.8640738725662231, "learning_rate": 1.3239255933290573e-07, "loss": 0.783, "step": 79990 }, { "epoch": 0.9749186501407627, "grad_norm": 2.2517552375793457, "learning_rate": 1.3207184092366904e-07, "loss": 0.7687, "step": 79995 }, { "epoch": 0.9749795863649104, "grad_norm": 2.093061685562134, "learning_rate": 1.3175112251443235e-07, "loss": 0.8411, "step": 80000 }, { "epoch": 0.9750405225890583, "grad_norm": 1.7759798765182495, "learning_rate": 1.3143040410519566e-07, "loss": 0.779, "step": 80005 }, { "epoch": 0.9751014588132061, "grad_norm": 1.992464303970337, "learning_rate": 1.3110968569595897e-07, "loss": 0.7934, "step": 80010 }, { "epoch": 0.9751623950373539, "grad_norm": 2.181098222732544, "learning_rate": 1.3078896728672227e-07, "loss": 0.8767, "step": 80015 }, { "epoch": 0.9752233312615017, "grad_norm": 2.1013855934143066, "learning_rate": 1.3046824887748558e-07, "loss": 0.8252, "step": 80020 }, { "epoch": 0.9752842674856496, "grad_norm": 1.9413979053497314, "learning_rate": 1.301475304682489e-07, "loss": 0.7429, "step": 80025 }, { "epoch": 0.9753452037097974, "grad_norm": 1.9031336307525635, "learning_rate": 1.298268120590122e-07, "loss": 0.8432, "step": 80030 }, { "epoch": 0.9754061399339451, "grad_norm": 2.139993190765381, "learning_rate": 1.295060936497755e-07, "loss": 0.8247, "step": 80035 }, { "epoch": 0.9754670761580929, "grad_norm": 1.8060628175735474, "learning_rate": 1.2918537524053881e-07, "loss": 0.7547, "step": 80040 }, { "epoch": 0.9755280123822407, "grad_norm": 2.029001235961914, "learning_rate": 1.2886465683130212e-07, "loss": 0.7819, "step": 80045 }, { "epoch": 0.9755889486063886, "grad_norm": 2.0970473289489746, "learning_rate": 1.2854393842206543e-07, "loss": 0.8108, "step": 80050 }, { "epoch": 0.9756498848305364, "grad_norm": 2.033081293106079, "learning_rate": 1.2822322001282874e-07, "loss": 0.8537, "step": 80055 }, { "epoch": 0.9757108210546842, "grad_norm": 1.9108026027679443, "learning_rate": 1.2790250160359205e-07, "loss": 0.8642, "step": 80060 }, { "epoch": 0.9757717572788319, "grad_norm": 2.8125879764556885, "learning_rate": 1.2758178319435535e-07, "loss": 0.9039, "step": 80065 }, { "epoch": 0.9758326935029797, "grad_norm": 2.415221929550171, "learning_rate": 1.272610647851187e-07, "loss": 0.7738, "step": 80070 }, { "epoch": 0.9758936297271276, "grad_norm": 1.7847102880477905, "learning_rate": 1.26940346375882e-07, "loss": 0.8535, "step": 80075 }, { "epoch": 0.9759545659512754, "grad_norm": 1.809687852859497, "learning_rate": 1.266196279666453e-07, "loss": 0.7608, "step": 80080 }, { "epoch": 0.9760155021754232, "grad_norm": 1.8772978782653809, "learning_rate": 1.262989095574086e-07, "loss": 0.7264, "step": 80085 }, { "epoch": 0.976076438399571, "grad_norm": 2.0401291847229004, "learning_rate": 1.2597819114817192e-07, "loss": 0.8228, "step": 80090 }, { "epoch": 0.9761373746237189, "grad_norm": 1.9396162033081055, "learning_rate": 1.2565747273893523e-07, "loss": 0.8081, "step": 80095 }, { "epoch": 0.9761983108478666, "grad_norm": 2.1187074184417725, "learning_rate": 1.2533675432969854e-07, "loss": 0.8704, "step": 80100 }, { "epoch": 0.9762592470720144, "grad_norm": 2.1889138221740723, "learning_rate": 1.2501603592046184e-07, "loss": 0.8314, "step": 80105 }, { "epoch": 0.9763201832961622, "grad_norm": 1.7223904132843018, "learning_rate": 1.2469531751122515e-07, "loss": 0.7888, "step": 80110 }, { "epoch": 0.97638111952031, "grad_norm": 2.1525425910949707, "learning_rate": 1.2437459910198846e-07, "loss": 0.797, "step": 80115 }, { "epoch": 0.9764420557444579, "grad_norm": 1.9646611213684082, "learning_rate": 1.2405388069275177e-07, "loss": 0.8333, "step": 80120 }, { "epoch": 0.9765029919686057, "grad_norm": 1.9386274814605713, "learning_rate": 1.2373316228351508e-07, "loss": 0.8179, "step": 80125 }, { "epoch": 0.9765639281927535, "grad_norm": 2.0949149131774902, "learning_rate": 1.2341244387427838e-07, "loss": 0.7929, "step": 80130 }, { "epoch": 0.9766248644169012, "grad_norm": 2.412937879562378, "learning_rate": 1.230917254650417e-07, "loss": 0.8044, "step": 80135 }, { "epoch": 0.976685800641049, "grad_norm": 2.030829429626465, "learning_rate": 1.2277100705580503e-07, "loss": 0.7683, "step": 80140 }, { "epoch": 0.9767467368651969, "grad_norm": 2.346719741821289, "learning_rate": 1.2245028864656833e-07, "loss": 0.7788, "step": 80145 }, { "epoch": 0.9768076730893447, "grad_norm": 2.012084484100342, "learning_rate": 1.2212957023733164e-07, "loss": 0.8499, "step": 80150 }, { "epoch": 0.9768686093134925, "grad_norm": 2.0970957279205322, "learning_rate": 1.2180885182809495e-07, "loss": 0.8016, "step": 80155 }, { "epoch": 0.9769295455376403, "grad_norm": 2.084343433380127, "learning_rate": 1.2148813341885826e-07, "loss": 0.779, "step": 80160 }, { "epoch": 0.9769904817617882, "grad_norm": 1.5386396646499634, "learning_rate": 1.2116741500962157e-07, "loss": 0.7839, "step": 80165 }, { "epoch": 0.9770514179859359, "grad_norm": 1.8307403326034546, "learning_rate": 1.2084669660038487e-07, "loss": 0.7768, "step": 80170 }, { "epoch": 0.9771123542100837, "grad_norm": 1.6290898323059082, "learning_rate": 1.2052597819114818e-07, "loss": 0.7527, "step": 80175 }, { "epoch": 0.9771732904342315, "grad_norm": 1.7271404266357422, "learning_rate": 1.202052597819115e-07, "loss": 0.7629, "step": 80180 }, { "epoch": 0.9772342266583793, "grad_norm": 1.8263294696807861, "learning_rate": 1.198845413726748e-07, "loss": 0.7776, "step": 80185 }, { "epoch": 0.9772951628825272, "grad_norm": 2.293382167816162, "learning_rate": 1.195638229634381e-07, "loss": 0.773, "step": 80190 }, { "epoch": 0.977356099106675, "grad_norm": 1.758223533630371, "learning_rate": 1.192431045542014e-07, "loss": 0.7805, "step": 80195 }, { "epoch": 0.9774170353308228, "grad_norm": 1.952162504196167, "learning_rate": 1.1892238614496473e-07, "loss": 0.822, "step": 80200 }, { "epoch": 0.9774779715549705, "grad_norm": 2.0520715713500977, "learning_rate": 1.1860166773572804e-07, "loss": 0.7916, "step": 80205 }, { "epoch": 0.9775389077791183, "grad_norm": 1.6965832710266113, "learning_rate": 1.1828094932649135e-07, "loss": 0.8162, "step": 80210 }, { "epoch": 0.9775998440032662, "grad_norm": 1.8534952402114868, "learning_rate": 1.1796023091725466e-07, "loss": 0.7574, "step": 80215 }, { "epoch": 0.977660780227414, "grad_norm": 2.100562572479248, "learning_rate": 1.1763951250801797e-07, "loss": 0.8206, "step": 80220 }, { "epoch": 0.9777217164515618, "grad_norm": 2.4630448818206787, "learning_rate": 1.1731879409878127e-07, "loss": 0.8034, "step": 80225 }, { "epoch": 0.9777826526757096, "grad_norm": 2.0919435024261475, "learning_rate": 1.1699807568954458e-07, "loss": 0.8341, "step": 80230 }, { "epoch": 0.9778435888998575, "grad_norm": 2.416214942932129, "learning_rate": 1.166773572803079e-07, "loss": 0.8155, "step": 80235 }, { "epoch": 0.9779045251240052, "grad_norm": 2.6690051555633545, "learning_rate": 1.1635663887107121e-07, "loss": 0.8168, "step": 80240 }, { "epoch": 0.977965461348153, "grad_norm": 1.9241963624954224, "learning_rate": 1.1603592046183452e-07, "loss": 0.7424, "step": 80245 }, { "epoch": 0.9780263975723008, "grad_norm": 1.9988603591918945, "learning_rate": 1.1571520205259783e-07, "loss": 0.8107, "step": 80250 }, { "epoch": 0.9780873337964486, "grad_norm": 1.961029052734375, "learning_rate": 1.1539448364336113e-07, "loss": 0.8273, "step": 80255 }, { "epoch": 0.9781482700205965, "grad_norm": 1.6811503171920776, "learning_rate": 1.1507376523412444e-07, "loss": 0.7089, "step": 80260 }, { "epoch": 0.9782092062447443, "grad_norm": 2.3518824577331543, "learning_rate": 1.1475304682488775e-07, "loss": 0.8201, "step": 80265 }, { "epoch": 0.9782701424688921, "grad_norm": 2.0792033672332764, "learning_rate": 1.1443232841565106e-07, "loss": 0.7816, "step": 80270 }, { "epoch": 0.9783310786930398, "grad_norm": 2.2118582725524902, "learning_rate": 1.1411161000641437e-07, "loss": 0.7655, "step": 80275 }, { "epoch": 0.9783920149171876, "grad_norm": 1.9298832416534424, "learning_rate": 1.1379089159717767e-07, "loss": 0.7911, "step": 80280 }, { "epoch": 0.9784529511413355, "grad_norm": 1.7015436887741089, "learning_rate": 1.1347017318794098e-07, "loss": 0.7253, "step": 80285 }, { "epoch": 0.9785138873654833, "grad_norm": 1.8750226497650146, "learning_rate": 1.1314945477870432e-07, "loss": 0.806, "step": 80290 }, { "epoch": 0.9785748235896311, "grad_norm": 1.7282493114471436, "learning_rate": 1.1282873636946762e-07, "loss": 0.7447, "step": 80295 }, { "epoch": 0.9786357598137789, "grad_norm": 1.8950074911117554, "learning_rate": 1.1250801796023093e-07, "loss": 0.8171, "step": 80300 }, { "epoch": 0.9786966960379267, "grad_norm": 1.972353219985962, "learning_rate": 1.1218729955099424e-07, "loss": 0.7736, "step": 80305 }, { "epoch": 0.9787576322620745, "grad_norm": 1.9228185415267944, "learning_rate": 1.1186658114175755e-07, "loss": 0.8128, "step": 80310 }, { "epoch": 0.9788185684862223, "grad_norm": 1.8488001823425293, "learning_rate": 1.1154586273252086e-07, "loss": 0.79, "step": 80315 }, { "epoch": 0.9788795047103701, "grad_norm": 1.8530491590499878, "learning_rate": 1.1122514432328416e-07, "loss": 0.775, "step": 80320 }, { "epoch": 0.9789404409345179, "grad_norm": 2.340433120727539, "learning_rate": 1.1090442591404749e-07, "loss": 0.7836, "step": 80325 }, { "epoch": 0.9790013771586658, "grad_norm": 1.7797350883483887, "learning_rate": 1.105837075048108e-07, "loss": 0.7432, "step": 80330 }, { "epoch": 0.9790623133828136, "grad_norm": 1.7713872194290161, "learning_rate": 1.102629890955741e-07, "loss": 0.7963, "step": 80335 }, { "epoch": 0.9791232496069614, "grad_norm": 1.930019736289978, "learning_rate": 1.0994227068633741e-07, "loss": 0.8449, "step": 80340 }, { "epoch": 0.9791841858311091, "grad_norm": 1.751320242881775, "learning_rate": 1.0962155227710072e-07, "loss": 0.8023, "step": 80345 }, { "epoch": 0.9792451220552569, "grad_norm": 2.018512487411499, "learning_rate": 1.0930083386786403e-07, "loss": 0.88, "step": 80350 }, { "epoch": 0.9793060582794048, "grad_norm": 1.9190044403076172, "learning_rate": 1.0898011545862733e-07, "loss": 0.7976, "step": 80355 }, { "epoch": 0.9793669945035526, "grad_norm": 1.8740941286087036, "learning_rate": 1.0865939704939064e-07, "loss": 0.8122, "step": 80360 }, { "epoch": 0.9794279307277004, "grad_norm": 1.6812055110931396, "learning_rate": 1.0833867864015395e-07, "loss": 0.8573, "step": 80365 }, { "epoch": 0.9794888669518482, "grad_norm": 1.8694322109222412, "learning_rate": 1.0801796023091726e-07, "loss": 0.8229, "step": 80370 }, { "epoch": 0.979549803175996, "grad_norm": 2.226879835128784, "learning_rate": 1.0769724182168058e-07, "loss": 0.7864, "step": 80375 }, { "epoch": 0.9796107394001438, "grad_norm": 1.6749255657196045, "learning_rate": 1.0737652341244389e-07, "loss": 0.7607, "step": 80380 }, { "epoch": 0.9796716756242916, "grad_norm": 2.3642754554748535, "learning_rate": 1.070558050032072e-07, "loss": 0.7989, "step": 80385 }, { "epoch": 0.9797326118484394, "grad_norm": 2.2539703845977783, "learning_rate": 1.067350865939705e-07, "loss": 0.7988, "step": 80390 }, { "epoch": 0.9797935480725872, "grad_norm": 1.9669824838638306, "learning_rate": 1.0641436818473381e-07, "loss": 0.8514, "step": 80395 }, { "epoch": 0.979854484296735, "grad_norm": 2.163100481033325, "learning_rate": 1.0609364977549712e-07, "loss": 0.7597, "step": 80400 }, { "epoch": 0.9799154205208829, "grad_norm": 2.1950323581695557, "learning_rate": 1.0577293136626043e-07, "loss": 0.7979, "step": 80405 }, { "epoch": 0.9799763567450307, "grad_norm": 1.6888084411621094, "learning_rate": 1.0545221295702373e-07, "loss": 0.7996, "step": 80410 }, { "epoch": 0.9800372929691784, "grad_norm": 1.5880212783813477, "learning_rate": 1.0513149454778704e-07, "loss": 0.7094, "step": 80415 }, { "epoch": 0.9800982291933262, "grad_norm": 1.8575706481933594, "learning_rate": 1.0481077613855035e-07, "loss": 0.8442, "step": 80420 }, { "epoch": 0.980159165417474, "grad_norm": 2.0766804218292236, "learning_rate": 1.0449005772931366e-07, "loss": 0.772, "step": 80425 }, { "epoch": 0.9802201016416219, "grad_norm": 1.888007640838623, "learning_rate": 1.0416933932007698e-07, "loss": 0.7175, "step": 80430 }, { "epoch": 0.9802810378657697, "grad_norm": 1.9516146183013916, "learning_rate": 1.0384862091084029e-07, "loss": 0.7726, "step": 80435 }, { "epoch": 0.9803419740899175, "grad_norm": 1.739479422569275, "learning_rate": 1.0352790250160361e-07, "loss": 0.8323, "step": 80440 }, { "epoch": 0.9804029103140653, "grad_norm": 2.073843479156494, "learning_rate": 1.0320718409236692e-07, "loss": 0.7182, "step": 80445 }, { "epoch": 0.9804638465382131, "grad_norm": 1.9353684186935425, "learning_rate": 1.0288646568313022e-07, "loss": 0.7859, "step": 80450 }, { "epoch": 0.9805247827623609, "grad_norm": 2.42801833152771, "learning_rate": 1.0256574727389353e-07, "loss": 0.896, "step": 80455 }, { "epoch": 0.9805857189865087, "grad_norm": 1.861390233039856, "learning_rate": 1.0224502886465684e-07, "loss": 0.8001, "step": 80460 }, { "epoch": 0.9806466552106565, "grad_norm": 2.2051353454589844, "learning_rate": 1.0192431045542016e-07, "loss": 0.8101, "step": 80465 }, { "epoch": 0.9807075914348043, "grad_norm": 1.8012429475784302, "learning_rate": 1.0160359204618347e-07, "loss": 0.7946, "step": 80470 }, { "epoch": 0.9807685276589522, "grad_norm": 1.8506616353988647, "learning_rate": 1.0128287363694678e-07, "loss": 0.7862, "step": 80475 }, { "epoch": 0.9808294638831, "grad_norm": 1.856339693069458, "learning_rate": 1.0096215522771008e-07, "loss": 0.8351, "step": 80480 }, { "epoch": 0.9808904001072477, "grad_norm": 1.9634007215499878, "learning_rate": 1.0064143681847339e-07, "loss": 0.7797, "step": 80485 }, { "epoch": 0.9809513363313955, "grad_norm": 1.8919718265533447, "learning_rate": 1.003207184092367e-07, "loss": 0.7791, "step": 80490 }, { "epoch": 0.9810122725555434, "grad_norm": 1.9306552410125732, "learning_rate": 1.0000000000000001e-07, "loss": 0.7969, "step": 80495 }, { "epoch": 0.9810732087796912, "grad_norm": 2.2763123512268066, "learning_rate": 9.967928159076332e-08, "loss": 0.8179, "step": 80500 }, { "epoch": 0.981134145003839, "grad_norm": 1.7646454572677612, "learning_rate": 9.935856318152662e-08, "loss": 0.8153, "step": 80505 }, { "epoch": 0.9811950812279868, "grad_norm": 1.9977222681045532, "learning_rate": 9.903784477228993e-08, "loss": 0.8106, "step": 80510 }, { "epoch": 0.9812560174521346, "grad_norm": 2.3241612911224365, "learning_rate": 9.871712636305325e-08, "loss": 0.8026, "step": 80515 }, { "epoch": 0.9813169536762824, "grad_norm": 1.8787918090820312, "learning_rate": 9.839640795381656e-08, "loss": 0.7317, "step": 80520 }, { "epoch": 0.9813778899004302, "grad_norm": 1.6311014890670776, "learning_rate": 9.807568954457987e-08, "loss": 0.866, "step": 80525 }, { "epoch": 0.981438826124578, "grad_norm": 1.9685248136520386, "learning_rate": 9.775497113534318e-08, "loss": 0.8035, "step": 80530 }, { "epoch": 0.9814997623487258, "grad_norm": 2.084768533706665, "learning_rate": 9.743425272610649e-08, "loss": 0.7699, "step": 80535 }, { "epoch": 0.9815606985728736, "grad_norm": 1.836458683013916, "learning_rate": 9.71135343168698e-08, "loss": 0.7639, "step": 80540 }, { "epoch": 0.9816216347970215, "grad_norm": 1.8385089635849, "learning_rate": 9.67928159076331e-08, "loss": 0.7895, "step": 80545 }, { "epoch": 0.9816825710211693, "grad_norm": 2.535832166671753, "learning_rate": 9.647209749839641e-08, "loss": 0.818, "step": 80550 }, { "epoch": 0.981743507245317, "grad_norm": 2.043205499649048, "learning_rate": 9.615137908915972e-08, "loss": 0.8633, "step": 80555 }, { "epoch": 0.9818044434694648, "grad_norm": 1.8833179473876953, "learning_rate": 9.583066067992303e-08, "loss": 0.8241, "step": 80560 }, { "epoch": 0.9818653796936126, "grad_norm": 1.8206214904785156, "learning_rate": 9.550994227068633e-08, "loss": 0.842, "step": 80565 }, { "epoch": 0.9819263159177605, "grad_norm": 2.0529897212982178, "learning_rate": 9.518922386144965e-08, "loss": 0.8753, "step": 80570 }, { "epoch": 0.9819872521419083, "grad_norm": 2.1671371459960938, "learning_rate": 9.486850545221296e-08, "loss": 0.8454, "step": 80575 }, { "epoch": 0.9820481883660561, "grad_norm": 2.190502643585205, "learning_rate": 9.454778704297627e-08, "loss": 0.8802, "step": 80580 }, { "epoch": 0.9821091245902039, "grad_norm": 1.7430354356765747, "learning_rate": 9.422706863373958e-08, "loss": 0.8409, "step": 80585 }, { "epoch": 0.9821700608143517, "grad_norm": 2.1498494148254395, "learning_rate": 9.39063502245029e-08, "loss": 0.8127, "step": 80590 }, { "epoch": 0.9822309970384995, "grad_norm": 2.085792303085327, "learning_rate": 9.358563181526621e-08, "loss": 0.8971, "step": 80595 }, { "epoch": 0.9822919332626473, "grad_norm": 2.091667652130127, "learning_rate": 9.326491340602952e-08, "loss": 0.8883, "step": 80600 }, { "epoch": 0.9823528694867951, "grad_norm": 2.029606342315674, "learning_rate": 9.294419499679284e-08, "loss": 0.8186, "step": 80605 }, { "epoch": 0.982413805710943, "grad_norm": 2.2585389614105225, "learning_rate": 9.262347658755614e-08, "loss": 0.8443, "step": 80610 }, { "epoch": 0.9824747419350908, "grad_norm": 1.8182768821716309, "learning_rate": 9.230275817831945e-08, "loss": 0.7516, "step": 80615 }, { "epoch": 0.9825356781592386, "grad_norm": 1.848530650138855, "learning_rate": 9.198203976908276e-08, "loss": 0.7841, "step": 80620 }, { "epoch": 0.9825966143833863, "grad_norm": 1.8898658752441406, "learning_rate": 9.166132135984607e-08, "loss": 0.8259, "step": 80625 }, { "epoch": 0.9826575506075341, "grad_norm": 2.2536556720733643, "learning_rate": 9.134060295060938e-08, "loss": 0.7715, "step": 80630 }, { "epoch": 0.982718486831682, "grad_norm": 2.57415771484375, "learning_rate": 9.101988454137268e-08, "loss": 0.8344, "step": 80635 }, { "epoch": 0.9827794230558298, "grad_norm": 2.157621383666992, "learning_rate": 9.069916613213599e-08, "loss": 0.8171, "step": 80640 }, { "epoch": 0.9828403592799776, "grad_norm": 2.154582977294922, "learning_rate": 9.03784477228993e-08, "loss": 0.8502, "step": 80645 }, { "epoch": 0.9829012955041254, "grad_norm": 1.724750280380249, "learning_rate": 9.005772931366261e-08, "loss": 0.8963, "step": 80650 }, { "epoch": 0.9829622317282732, "grad_norm": 2.0759406089782715, "learning_rate": 8.973701090442592e-08, "loss": 0.8062, "step": 80655 }, { "epoch": 0.983023167952421, "grad_norm": 2.132598876953125, "learning_rate": 8.941629249518924e-08, "loss": 0.8517, "step": 80660 }, { "epoch": 0.9830841041765688, "grad_norm": 1.945071816444397, "learning_rate": 8.909557408595254e-08, "loss": 0.8598, "step": 80665 }, { "epoch": 0.9831450404007166, "grad_norm": 2.3428239822387695, "learning_rate": 8.877485567671585e-08, "loss": 0.7579, "step": 80670 }, { "epoch": 0.9832059766248644, "grad_norm": 2.6937601566314697, "learning_rate": 8.845413726747916e-08, "loss": 0.7646, "step": 80675 }, { "epoch": 0.9832669128490122, "grad_norm": 1.7893083095550537, "learning_rate": 8.813341885824247e-08, "loss": 0.8478, "step": 80680 }, { "epoch": 0.9833278490731601, "grad_norm": 1.9542808532714844, "learning_rate": 8.781270044900578e-08, "loss": 0.8328, "step": 80685 }, { "epoch": 0.9833887852973079, "grad_norm": 2.121021270751953, "learning_rate": 8.749198203976908e-08, "loss": 0.8678, "step": 80690 }, { "epoch": 0.9834497215214556, "grad_norm": 1.7890797853469849, "learning_rate": 8.717126363053239e-08, "loss": 0.7611, "step": 80695 }, { "epoch": 0.9835106577456034, "grad_norm": 1.799759030342102, "learning_rate": 8.68505452212957e-08, "loss": 0.8104, "step": 80700 }, { "epoch": 0.9835715939697512, "grad_norm": 1.950545072555542, "learning_rate": 8.652982681205901e-08, "loss": 0.8084, "step": 80705 }, { "epoch": 0.9836325301938991, "grad_norm": 1.9658139944076538, "learning_rate": 8.620910840282233e-08, "loss": 0.8207, "step": 80710 }, { "epoch": 0.9836934664180469, "grad_norm": 1.9498807191848755, "learning_rate": 8.588838999358564e-08, "loss": 0.7391, "step": 80715 }, { "epoch": 0.9837544026421947, "grad_norm": 2.1362144947052, "learning_rate": 8.556767158434895e-08, "loss": 0.8075, "step": 80720 }, { "epoch": 0.9838153388663425, "grad_norm": 1.86178719997406, "learning_rate": 8.524695317511225e-08, "loss": 0.7685, "step": 80725 }, { "epoch": 0.9838762750904902, "grad_norm": 2.0703999996185303, "learning_rate": 8.492623476587556e-08, "loss": 0.8585, "step": 80730 }, { "epoch": 0.9839372113146381, "grad_norm": 2.105463981628418, "learning_rate": 8.460551635663887e-08, "loss": 0.8365, "step": 80735 }, { "epoch": 0.9839981475387859, "grad_norm": 1.6929082870483398, "learning_rate": 8.428479794740219e-08, "loss": 0.6978, "step": 80740 }, { "epoch": 0.9840590837629337, "grad_norm": 2.0954530239105225, "learning_rate": 8.396407953816551e-08, "loss": 0.8979, "step": 80745 }, { "epoch": 0.9841200199870815, "grad_norm": 2.0655477046966553, "learning_rate": 8.364336112892882e-08, "loss": 0.7847, "step": 80750 }, { "epoch": 0.9841809562112294, "grad_norm": 2.3671200275421143, "learning_rate": 8.332264271969213e-08, "loss": 0.7958, "step": 80755 }, { "epoch": 0.9842418924353772, "grad_norm": 2.0810046195983887, "learning_rate": 8.300192431045544e-08, "loss": 0.7594, "step": 80760 }, { "epoch": 0.9843028286595249, "grad_norm": 2.2207298278808594, "learning_rate": 8.268120590121874e-08, "loss": 0.7884, "step": 80765 }, { "epoch": 0.9843637648836727, "grad_norm": 1.9498291015625, "learning_rate": 8.236048749198205e-08, "loss": 0.8526, "step": 80770 }, { "epoch": 0.9844247011078205, "grad_norm": 2.044062376022339, "learning_rate": 8.203976908274536e-08, "loss": 0.8281, "step": 80775 }, { "epoch": 0.9844856373319684, "grad_norm": 1.9444936513900757, "learning_rate": 8.171905067350867e-08, "loss": 0.7903, "step": 80780 }, { "epoch": 0.9845465735561162, "grad_norm": 1.5749564170837402, "learning_rate": 8.139833226427198e-08, "loss": 0.763, "step": 80785 }, { "epoch": 0.984607509780264, "grad_norm": 1.897291660308838, "learning_rate": 8.107761385503528e-08, "loss": 0.7595, "step": 80790 }, { "epoch": 0.9846684460044118, "grad_norm": 1.997496247291565, "learning_rate": 8.075689544579859e-08, "loss": 0.7712, "step": 80795 }, { "epoch": 0.9847293822285595, "grad_norm": 2.1107828617095947, "learning_rate": 8.043617703656191e-08, "loss": 0.8657, "step": 80800 }, { "epoch": 0.9847903184527074, "grad_norm": 1.7393325567245483, "learning_rate": 8.011545862732522e-08, "loss": 0.783, "step": 80805 }, { "epoch": 0.9848512546768552, "grad_norm": 1.858933687210083, "learning_rate": 7.979474021808853e-08, "loss": 0.7964, "step": 80810 }, { "epoch": 0.984912190901003, "grad_norm": 1.9149906635284424, "learning_rate": 7.947402180885184e-08, "loss": 0.7923, "step": 80815 }, { "epoch": 0.9849731271251508, "grad_norm": 2.2079150676727295, "learning_rate": 7.915330339961514e-08, "loss": 0.717, "step": 80820 }, { "epoch": 0.9850340633492987, "grad_norm": 1.8652253150939941, "learning_rate": 7.883258499037845e-08, "loss": 0.8569, "step": 80825 }, { "epoch": 0.9850949995734465, "grad_norm": 2.055224657058716, "learning_rate": 7.851186658114176e-08, "loss": 0.8359, "step": 80830 }, { "epoch": 0.9851559357975942, "grad_norm": 2.1091763973236084, "learning_rate": 7.819114817190507e-08, "loss": 0.8657, "step": 80835 }, { "epoch": 0.985216872021742, "grad_norm": 2.0242197513580322, "learning_rate": 7.787042976266838e-08, "loss": 0.7859, "step": 80840 }, { "epoch": 0.9852778082458898, "grad_norm": 2.133295774459839, "learning_rate": 7.754971135343168e-08, "loss": 0.8024, "step": 80845 }, { "epoch": 0.9853387444700377, "grad_norm": 2.0226166248321533, "learning_rate": 7.7228992944195e-08, "loss": 0.8182, "step": 80850 }, { "epoch": 0.9853996806941855, "grad_norm": 1.7545841932296753, "learning_rate": 7.690827453495831e-08, "loss": 0.7717, "step": 80855 }, { "epoch": 0.9854606169183333, "grad_norm": 1.8716174364089966, "learning_rate": 7.658755612572162e-08, "loss": 0.7225, "step": 80860 }, { "epoch": 0.9855215531424811, "grad_norm": 1.7457791566848755, "learning_rate": 7.626683771648493e-08, "loss": 0.857, "step": 80865 }, { "epoch": 0.9855824893666288, "grad_norm": 2.3352890014648438, "learning_rate": 7.594611930724825e-08, "loss": 0.8392, "step": 80870 }, { "epoch": 0.9856434255907767, "grad_norm": 2.2083373069763184, "learning_rate": 7.562540089801156e-08, "loss": 0.841, "step": 80875 }, { "epoch": 0.9857043618149245, "grad_norm": 1.8530726432800293, "learning_rate": 7.530468248877487e-08, "loss": 0.8303, "step": 80880 }, { "epoch": 0.9857652980390723, "grad_norm": 2.179208755493164, "learning_rate": 7.498396407953817e-08, "loss": 0.8111, "step": 80885 }, { "epoch": 0.9858262342632201, "grad_norm": 2.103081464767456, "learning_rate": 7.466324567030148e-08, "loss": 0.6935, "step": 80890 }, { "epoch": 0.985887170487368, "grad_norm": 2.2223944664001465, "learning_rate": 7.434252726106479e-08, "loss": 0.7642, "step": 80895 }, { "epoch": 0.9859481067115158, "grad_norm": 1.8473972082138062, "learning_rate": 7.40218088518281e-08, "loss": 0.78, "step": 80900 }, { "epoch": 0.9860090429356635, "grad_norm": 1.805967092514038, "learning_rate": 7.37010904425914e-08, "loss": 0.7945, "step": 80905 }, { "epoch": 0.9860699791598113, "grad_norm": 1.7866114377975464, "learning_rate": 7.338037203335471e-08, "loss": 0.772, "step": 80910 }, { "epoch": 0.9861309153839591, "grad_norm": 2.0391480922698975, "learning_rate": 7.305965362411802e-08, "loss": 0.8066, "step": 80915 }, { "epoch": 0.986191851608107, "grad_norm": 1.8005260229110718, "learning_rate": 7.273893521488134e-08, "loss": 0.8548, "step": 80920 }, { "epoch": 0.9862527878322548, "grad_norm": 1.9494547843933105, "learning_rate": 7.241821680564465e-08, "loss": 0.8359, "step": 80925 }, { "epoch": 0.9863137240564026, "grad_norm": 1.6529039144515991, "learning_rate": 7.209749839640796e-08, "loss": 0.8651, "step": 80930 }, { "epoch": 0.9863746602805504, "grad_norm": 2.387025833129883, "learning_rate": 7.177677998717127e-08, "loss": 0.8373, "step": 80935 }, { "epoch": 0.9864355965046981, "grad_norm": 2.00274920463562, "learning_rate": 7.145606157793459e-08, "loss": 0.8433, "step": 80940 }, { "epoch": 0.986496532728846, "grad_norm": 2.1234707832336426, "learning_rate": 7.11353431686979e-08, "loss": 0.7592, "step": 80945 }, { "epoch": 0.9865574689529938, "grad_norm": 1.8560501337051392, "learning_rate": 7.08146247594612e-08, "loss": 0.7793, "step": 80950 }, { "epoch": 0.9866184051771416, "grad_norm": 1.9562513828277588, "learning_rate": 7.049390635022451e-08, "loss": 0.8048, "step": 80955 }, { "epoch": 0.9866793414012894, "grad_norm": 1.8074373006820679, "learning_rate": 7.017318794098782e-08, "loss": 0.7388, "step": 80960 }, { "epoch": 0.9867402776254373, "grad_norm": 1.6621335744857788, "learning_rate": 6.985246953175113e-08, "loss": 0.8634, "step": 80965 }, { "epoch": 0.9868012138495851, "grad_norm": 1.8354228734970093, "learning_rate": 6.953175112251444e-08, "loss": 0.7281, "step": 80970 }, { "epoch": 0.9868621500737328, "grad_norm": 2.0038349628448486, "learning_rate": 6.921103271327774e-08, "loss": 0.7907, "step": 80975 }, { "epoch": 0.9869230862978806, "grad_norm": 1.5985946655273438, "learning_rate": 6.889031430404105e-08, "loss": 0.8062, "step": 80980 }, { "epoch": 0.9869840225220284, "grad_norm": 1.9476454257965088, "learning_rate": 6.856959589480436e-08, "loss": 0.742, "step": 80985 }, { "epoch": 0.9870449587461763, "grad_norm": 1.9034830331802368, "learning_rate": 6.824887748556768e-08, "loss": 0.77, "step": 80990 }, { "epoch": 0.9871058949703241, "grad_norm": 2.174149751663208, "learning_rate": 6.792815907633099e-08, "loss": 0.8163, "step": 80995 }, { "epoch": 0.9871668311944719, "grad_norm": 1.8000410795211792, "learning_rate": 6.76074406670943e-08, "loss": 0.8239, "step": 81000 }, { "epoch": 0.9872277674186196, "grad_norm": 1.609694242477417, "learning_rate": 6.72867222578576e-08, "loss": 0.8046, "step": 81005 }, { "epoch": 0.9872887036427674, "grad_norm": 2.143378257751465, "learning_rate": 6.696600384862093e-08, "loss": 0.8564, "step": 81010 }, { "epoch": 0.9873496398669153, "grad_norm": 1.5932875871658325, "learning_rate": 6.664528543938423e-08, "loss": 0.7667, "step": 81015 }, { "epoch": 0.9874105760910631, "grad_norm": 1.8171347379684448, "learning_rate": 6.632456703014754e-08, "loss": 0.7649, "step": 81020 }, { "epoch": 0.9874715123152109, "grad_norm": 2.040856122970581, "learning_rate": 6.600384862091085e-08, "loss": 0.8407, "step": 81025 }, { "epoch": 0.9875324485393587, "grad_norm": 1.6801185607910156, "learning_rate": 6.568313021167416e-08, "loss": 0.7254, "step": 81030 }, { "epoch": 0.9875933847635066, "grad_norm": 1.8596441745758057, "learning_rate": 6.536241180243747e-08, "loss": 0.7937, "step": 81035 }, { "epoch": 0.9876543209876543, "grad_norm": 2.008575916290283, "learning_rate": 6.504169339320077e-08, "loss": 0.8436, "step": 81040 }, { "epoch": 0.9877152572118021, "grad_norm": 2.1941609382629395, "learning_rate": 6.472097498396408e-08, "loss": 0.8055, "step": 81045 }, { "epoch": 0.9877761934359499, "grad_norm": 2.2572858333587646, "learning_rate": 6.440025657472739e-08, "loss": 0.7622, "step": 81050 }, { "epoch": 0.9878371296600977, "grad_norm": 2.351745367050171, "learning_rate": 6.40795381654907e-08, "loss": 0.8288, "step": 81055 }, { "epoch": 0.9878980658842456, "grad_norm": 2.0364646911621094, "learning_rate": 6.375881975625402e-08, "loss": 0.7919, "step": 81060 }, { "epoch": 0.9879590021083934, "grad_norm": 2.360395669937134, "learning_rate": 6.343810134701733e-08, "loss": 0.8325, "step": 81065 }, { "epoch": 0.9880199383325412, "grad_norm": 1.633528470993042, "learning_rate": 6.311738293778063e-08, "loss": 0.8036, "step": 81070 }, { "epoch": 0.9880808745566889, "grad_norm": 2.1033549308776855, "learning_rate": 6.279666452854394e-08, "loss": 0.8342, "step": 81075 }, { "epoch": 0.9881418107808367, "grad_norm": 1.9511651992797852, "learning_rate": 6.247594611930726e-08, "loss": 0.8769, "step": 81080 }, { "epoch": 0.9882027470049846, "grad_norm": 2.0812296867370605, "learning_rate": 6.215522771007057e-08, "loss": 0.7844, "step": 81085 }, { "epoch": 0.9882636832291324, "grad_norm": 2.025503158569336, "learning_rate": 6.183450930083388e-08, "loss": 0.8083, "step": 81090 }, { "epoch": 0.9883246194532802, "grad_norm": 2.231165885925293, "learning_rate": 6.151379089159719e-08, "loss": 0.8354, "step": 81095 }, { "epoch": 0.988385555677428, "grad_norm": 2.0007688999176025, "learning_rate": 6.11930724823605e-08, "loss": 0.8418, "step": 81100 }, { "epoch": 0.9884464919015759, "grad_norm": 1.9556962251663208, "learning_rate": 6.08723540731238e-08, "loss": 0.8222, "step": 81105 }, { "epoch": 0.9885074281257236, "grad_norm": 1.7426607608795166, "learning_rate": 6.055163566388711e-08, "loss": 0.8062, "step": 81110 }, { "epoch": 0.9885683643498714, "grad_norm": 2.1406798362731934, "learning_rate": 6.023091725465042e-08, "loss": 0.7933, "step": 81115 }, { "epoch": 0.9886293005740192, "grad_norm": 2.0447747707366943, "learning_rate": 5.991019884541373e-08, "loss": 0.7971, "step": 81120 }, { "epoch": 0.988690236798167, "grad_norm": 1.8356002569198608, "learning_rate": 5.958948043617704e-08, "loss": 0.8413, "step": 81125 }, { "epoch": 0.9887511730223149, "grad_norm": 1.7197846174240112, "learning_rate": 5.926876202694035e-08, "loss": 0.8307, "step": 81130 }, { "epoch": 0.9888121092464627, "grad_norm": 1.863686203956604, "learning_rate": 5.894804361770366e-08, "loss": 0.7961, "step": 81135 }, { "epoch": 0.9888730454706105, "grad_norm": 1.7297998666763306, "learning_rate": 5.8627325208466965e-08, "loss": 0.7941, "step": 81140 }, { "epoch": 0.9889339816947582, "grad_norm": 1.942186951637268, "learning_rate": 5.830660679923028e-08, "loss": 0.8468, "step": 81145 }, { "epoch": 0.988994917918906, "grad_norm": 2.0347912311553955, "learning_rate": 5.798588838999359e-08, "loss": 0.831, "step": 81150 }, { "epoch": 0.9890558541430539, "grad_norm": 1.9469630718231201, "learning_rate": 5.7665169980756896e-08, "loss": 0.8285, "step": 81155 }, { "epoch": 0.9891167903672017, "grad_norm": 1.9429315328598022, "learning_rate": 5.734445157152022e-08, "loss": 0.8128, "step": 81160 }, { "epoch": 0.9891777265913495, "grad_norm": 1.9032394886016846, "learning_rate": 5.7023733162283525e-08, "loss": 0.735, "step": 81165 }, { "epoch": 0.9892386628154973, "grad_norm": 1.9268521070480347, "learning_rate": 5.670301475304683e-08, "loss": 0.818, "step": 81170 }, { "epoch": 0.9892995990396452, "grad_norm": 1.7742477655410767, "learning_rate": 5.638229634381014e-08, "loss": 0.8321, "step": 81175 }, { "epoch": 0.9893605352637929, "grad_norm": 1.9870960712432861, "learning_rate": 5.606157793457345e-08, "loss": 0.8134, "step": 81180 }, { "epoch": 0.9894214714879407, "grad_norm": 1.9910014867782593, "learning_rate": 5.574085952533676e-08, "loss": 0.8744, "step": 81185 }, { "epoch": 0.9894824077120885, "grad_norm": 2.2010433673858643, "learning_rate": 5.542014111610007e-08, "loss": 0.8041, "step": 81190 }, { "epoch": 0.9895433439362363, "grad_norm": 1.873126745223999, "learning_rate": 5.509942270686338e-08, "loss": 0.8234, "step": 81195 }, { "epoch": 0.9896042801603842, "grad_norm": 1.71161687374115, "learning_rate": 5.477870429762669e-08, "loss": 0.8249, "step": 81200 }, { "epoch": 0.989665216384532, "grad_norm": 1.7700631618499756, "learning_rate": 5.4457985888389995e-08, "loss": 0.8271, "step": 81205 }, { "epoch": 0.9897261526086798, "grad_norm": 1.8945672512054443, "learning_rate": 5.41372674791533e-08, "loss": 0.7665, "step": 81210 }, { "epoch": 0.9897870888328275, "grad_norm": 2.106131076812744, "learning_rate": 5.381654906991662e-08, "loss": 0.7349, "step": 81215 }, { "epoch": 0.9898480250569753, "grad_norm": 2.2640185356140137, "learning_rate": 5.3495830660679925e-08, "loss": 0.8109, "step": 81220 }, { "epoch": 0.9899089612811232, "grad_norm": 1.8051153421401978, "learning_rate": 5.317511225144323e-08, "loss": 0.77, "step": 81225 }, { "epoch": 0.989969897505271, "grad_norm": 2.527323007583618, "learning_rate": 5.285439384220654e-08, "loss": 0.7182, "step": 81230 }, { "epoch": 0.9900308337294188, "grad_norm": 2.4855823516845703, "learning_rate": 5.253367543296986e-08, "loss": 0.7976, "step": 81235 }, { "epoch": 0.9900917699535666, "grad_norm": 1.779338002204895, "learning_rate": 5.221295702373317e-08, "loss": 0.7673, "step": 81240 }, { "epoch": 0.9901527061777144, "grad_norm": 1.8624321222305298, "learning_rate": 5.189223861449648e-08, "loss": 0.8478, "step": 81245 }, { "epoch": 0.9902136424018622, "grad_norm": 1.9651787281036377, "learning_rate": 5.1571520205259786e-08, "loss": 0.7963, "step": 81250 }, { "epoch": 0.99027457862601, "grad_norm": 2.103358507156372, "learning_rate": 5.12508017960231e-08, "loss": 0.8254, "step": 81255 }, { "epoch": 0.9903355148501578, "grad_norm": 1.990505576133728, "learning_rate": 5.093008338678641e-08, "loss": 0.7829, "step": 81260 }, { "epoch": 0.9903964510743056, "grad_norm": 2.1191439628601074, "learning_rate": 5.0609364977549717e-08, "loss": 0.8186, "step": 81265 }, { "epoch": 0.9904573872984535, "grad_norm": 1.9028723239898682, "learning_rate": 5.0288646568313025e-08, "loss": 0.7896, "step": 81270 }, { "epoch": 0.9905183235226013, "grad_norm": 2.0638656616210938, "learning_rate": 4.996792815907633e-08, "loss": 0.8385, "step": 81275 }, { "epoch": 0.9905792597467491, "grad_norm": 1.8035911321640015, "learning_rate": 4.964720974983964e-08, "loss": 0.8572, "step": 81280 }, { "epoch": 0.9906401959708968, "grad_norm": 2.291639804840088, "learning_rate": 4.9326491340602955e-08, "loss": 0.7426, "step": 81285 }, { "epoch": 0.9907011321950446, "grad_norm": 1.8891377449035645, "learning_rate": 4.900577293136626e-08, "loss": 0.7686, "step": 81290 }, { "epoch": 0.9907620684191925, "grad_norm": 2.267057180404663, "learning_rate": 4.868505452212957e-08, "loss": 0.8608, "step": 81295 }, { "epoch": 0.9908230046433403, "grad_norm": 1.7531973123550415, "learning_rate": 4.836433611289288e-08, "loss": 0.7633, "step": 81300 }, { "epoch": 0.9908839408674881, "grad_norm": 1.7401478290557861, "learning_rate": 4.804361770365619e-08, "loss": 0.8232, "step": 81305 }, { "epoch": 0.9909448770916359, "grad_norm": 2.1109511852264404, "learning_rate": 4.772289929441951e-08, "loss": 0.8245, "step": 81310 }, { "epoch": 0.9910058133157837, "grad_norm": 2.2197558879852295, "learning_rate": 4.7402180885182816e-08, "loss": 0.7701, "step": 81315 }, { "epoch": 0.9910667495399315, "grad_norm": 2.5190627574920654, "learning_rate": 4.7081462475946124e-08, "loss": 0.8335, "step": 81320 }, { "epoch": 0.9911276857640793, "grad_norm": 1.878282904624939, "learning_rate": 4.676074406670944e-08, "loss": 0.7947, "step": 81325 }, { "epoch": 0.9911886219882271, "grad_norm": 1.8639864921569824, "learning_rate": 4.6440025657472746e-08, "loss": 0.8344, "step": 81330 }, { "epoch": 0.9912495582123749, "grad_norm": 1.7980526685714722, "learning_rate": 4.6119307248236054e-08, "loss": 0.7613, "step": 81335 }, { "epoch": 0.9913104944365227, "grad_norm": 1.9412386417388916, "learning_rate": 4.579858883899936e-08, "loss": 0.8513, "step": 81340 }, { "epoch": 0.9913714306606706, "grad_norm": 2.1164305210113525, "learning_rate": 4.547787042976267e-08, "loss": 0.8089, "step": 81345 }, { "epoch": 0.9914323668848184, "grad_norm": 2.0168557167053223, "learning_rate": 4.515715202052598e-08, "loss": 0.8359, "step": 81350 }, { "epoch": 0.9914933031089661, "grad_norm": 1.6763149499893188, "learning_rate": 4.483643361128929e-08, "loss": 0.8079, "step": 81355 }, { "epoch": 0.9915542393331139, "grad_norm": 2.0752971172332764, "learning_rate": 4.45157152020526e-08, "loss": 0.8966, "step": 81360 }, { "epoch": 0.9916151755572618, "grad_norm": 2.047135353088379, "learning_rate": 4.419499679281591e-08, "loss": 0.8591, "step": 81365 }, { "epoch": 0.9916761117814096, "grad_norm": 1.9382387399673462, "learning_rate": 4.3874278383579217e-08, "loss": 0.8064, "step": 81370 }, { "epoch": 0.9917370480055574, "grad_norm": 1.7284653186798096, "learning_rate": 4.3553559974342524e-08, "loss": 0.8251, "step": 81375 }, { "epoch": 0.9917979842297052, "grad_norm": 1.853331446647644, "learning_rate": 4.323284156510584e-08, "loss": 0.8045, "step": 81380 }, { "epoch": 0.991858920453853, "grad_norm": 2.3579764366149902, "learning_rate": 4.2912123155869154e-08, "loss": 0.776, "step": 81385 }, { "epoch": 0.9919198566780008, "grad_norm": 2.172311782836914, "learning_rate": 4.259140474663246e-08, "loss": 0.8254, "step": 81390 }, { "epoch": 0.9919807929021486, "grad_norm": 1.9058012962341309, "learning_rate": 4.2270686337395776e-08, "loss": 0.88, "step": 81395 }, { "epoch": 0.9920417291262964, "grad_norm": 1.846209168434143, "learning_rate": 4.1949967928159084e-08, "loss": 0.8618, "step": 81400 }, { "epoch": 0.9921026653504442, "grad_norm": 2.116858959197998, "learning_rate": 4.162924951892239e-08, "loss": 0.7915, "step": 81405 }, { "epoch": 0.992163601574592, "grad_norm": 1.9812467098236084, "learning_rate": 4.13085311096857e-08, "loss": 0.81, "step": 81410 }, { "epoch": 0.9922245377987399, "grad_norm": 1.9476662874221802, "learning_rate": 4.098781270044901e-08, "loss": 0.7731, "step": 81415 }, { "epoch": 0.9922854740228877, "grad_norm": 2.114924430847168, "learning_rate": 4.0667094291212316e-08, "loss": 0.7829, "step": 81420 }, { "epoch": 0.9923464102470354, "grad_norm": 1.8783458471298218, "learning_rate": 4.034637588197563e-08, "loss": 0.8419, "step": 81425 }, { "epoch": 0.9924073464711832, "grad_norm": 2.172462224960327, "learning_rate": 4.002565747273894e-08, "loss": 0.8604, "step": 81430 }, { "epoch": 0.992468282695331, "grad_norm": 1.940732479095459, "learning_rate": 3.9704939063502246e-08, "loss": 0.7265, "step": 81435 }, { "epoch": 0.9925292189194789, "grad_norm": 1.7146391868591309, "learning_rate": 3.9384220654265554e-08, "loss": 0.7588, "step": 81440 }, { "epoch": 0.9925901551436267, "grad_norm": 2.2316582202911377, "learning_rate": 3.906350224502886e-08, "loss": 0.811, "step": 81445 }, { "epoch": 0.9926510913677745, "grad_norm": 1.7234456539154053, "learning_rate": 3.874278383579218e-08, "loss": 0.8218, "step": 81450 }, { "epoch": 0.9927120275919223, "grad_norm": 2.084172248840332, "learning_rate": 3.8422065426555485e-08, "loss": 0.7963, "step": 81455 }, { "epoch": 0.99277296381607, "grad_norm": 1.8764817714691162, "learning_rate": 3.81013470173188e-08, "loss": 0.7933, "step": 81460 }, { "epoch": 0.9928339000402179, "grad_norm": 3.4685754776000977, "learning_rate": 3.778062860808211e-08, "loss": 0.7872, "step": 81465 }, { "epoch": 0.9928948362643657, "grad_norm": 2.1160175800323486, "learning_rate": 3.7459910198845415e-08, "loss": 0.7742, "step": 81470 }, { "epoch": 0.9929557724885135, "grad_norm": 2.145418405532837, "learning_rate": 3.713919178960872e-08, "loss": 0.767, "step": 81475 }, { "epoch": 0.9930167087126613, "grad_norm": 2.1071553230285645, "learning_rate": 3.681847338037204e-08, "loss": 0.7509, "step": 81480 }, { "epoch": 0.9930776449368092, "grad_norm": 2.1125500202178955, "learning_rate": 3.6497754971135346e-08, "loss": 0.8358, "step": 81485 }, { "epoch": 0.993138581160957, "grad_norm": 1.9347715377807617, "learning_rate": 3.6177036561898653e-08, "loss": 0.8024, "step": 81490 }, { "epoch": 0.9931995173851047, "grad_norm": 1.7634177207946777, "learning_rate": 3.585631815266197e-08, "loss": 0.8337, "step": 81495 }, { "epoch": 0.9932604536092525, "grad_norm": 1.8034465312957764, "learning_rate": 3.5535599743425276e-08, "loss": 0.7584, "step": 81500 }, { "epoch": 0.9933213898334003, "grad_norm": 2.3614554405212402, "learning_rate": 3.5214881334188584e-08, "loss": 0.8192, "step": 81505 }, { "epoch": 0.9933823260575482, "grad_norm": 2.242375373840332, "learning_rate": 3.489416292495189e-08, "loss": 0.8031, "step": 81510 }, { "epoch": 0.993443262281696, "grad_norm": 1.8957873582839966, "learning_rate": 3.4573444515715206e-08, "loss": 0.844, "step": 81515 }, { "epoch": 0.9935041985058438, "grad_norm": 2.2270071506500244, "learning_rate": 3.4252726106478514e-08, "loss": 0.8572, "step": 81520 }, { "epoch": 0.9935651347299916, "grad_norm": 2.049468994140625, "learning_rate": 3.393200769724182e-08, "loss": 0.8785, "step": 81525 }, { "epoch": 0.9936260709541394, "grad_norm": 2.190319299697876, "learning_rate": 3.361128928800514e-08, "loss": 0.7743, "step": 81530 }, { "epoch": 0.9936870071782872, "grad_norm": 1.8931113481521606, "learning_rate": 3.3290570878768445e-08, "loss": 0.8313, "step": 81535 }, { "epoch": 0.993747943402435, "grad_norm": 1.7727612257003784, "learning_rate": 3.296985246953175e-08, "loss": 0.8132, "step": 81540 }, { "epoch": 0.9938088796265828, "grad_norm": 1.9192460775375366, "learning_rate": 3.264913406029506e-08, "loss": 0.8384, "step": 81545 }, { "epoch": 0.9938698158507306, "grad_norm": 1.8220241069793701, "learning_rate": 3.232841565105837e-08, "loss": 0.8602, "step": 81550 }, { "epoch": 0.9939307520748785, "grad_norm": 1.7055597305297852, "learning_rate": 3.200769724182168e-08, "loss": 0.7871, "step": 81555 }, { "epoch": 0.9939916882990263, "grad_norm": 1.8658076524734497, "learning_rate": 3.168697883258499e-08, "loss": 0.8155, "step": 81560 }, { "epoch": 0.994052624523174, "grad_norm": 3.0985090732574463, "learning_rate": 3.1366260423348306e-08, "loss": 0.7804, "step": 81565 }, { "epoch": 0.9941135607473218, "grad_norm": 2.0313243865966797, "learning_rate": 3.1045542014111614e-08, "loss": 0.7891, "step": 81570 }, { "epoch": 0.9941744969714696, "grad_norm": 1.9787147045135498, "learning_rate": 3.072482360487492e-08, "loss": 0.7463, "step": 81575 }, { "epoch": 0.9942354331956175, "grad_norm": 2.2728970050811768, "learning_rate": 3.040410519563823e-08, "loss": 0.8253, "step": 81580 }, { "epoch": 0.9942963694197653, "grad_norm": 1.8056323528289795, "learning_rate": 3.008338678640154e-08, "loss": 0.8065, "step": 81585 }, { "epoch": 0.9943573056439131, "grad_norm": 2.310173749923706, "learning_rate": 2.9762668377164855e-08, "loss": 0.898, "step": 81590 }, { "epoch": 0.9944182418680609, "grad_norm": 1.9912821054458618, "learning_rate": 2.9441949967928163e-08, "loss": 0.8106, "step": 81595 }, { "epoch": 0.9944791780922086, "grad_norm": 1.7765966653823853, "learning_rate": 2.912123155869147e-08, "loss": 0.8106, "step": 81600 }, { "epoch": 0.9945401143163565, "grad_norm": 1.7172409296035767, "learning_rate": 2.8800513149454783e-08, "loss": 0.8162, "step": 81605 }, { "epoch": 0.9946010505405043, "grad_norm": 2.136746406555176, "learning_rate": 2.847979474021809e-08, "loss": 0.8646, "step": 81610 }, { "epoch": 0.9946619867646521, "grad_norm": 2.266066074371338, "learning_rate": 2.81590763309814e-08, "loss": 0.758, "step": 81615 }, { "epoch": 0.9947229229887999, "grad_norm": 2.0318851470947266, "learning_rate": 2.783835792174471e-08, "loss": 0.7799, "step": 81620 }, { "epoch": 0.9947838592129478, "grad_norm": 2.2135233879089355, "learning_rate": 2.7517639512508018e-08, "loss": 0.7894, "step": 81625 }, { "epoch": 0.9948447954370956, "grad_norm": 2.147404432296753, "learning_rate": 2.7196921103271332e-08, "loss": 0.8118, "step": 81630 }, { "epoch": 0.9949057316612433, "grad_norm": 1.8457846641540527, "learning_rate": 2.687620269403464e-08, "loss": 0.8317, "step": 81635 }, { "epoch": 0.9949666678853911, "grad_norm": 2.0569047927856445, "learning_rate": 2.655548428479795e-08, "loss": 0.8354, "step": 81640 }, { "epoch": 0.9950276041095389, "grad_norm": 2.0446434020996094, "learning_rate": 2.623476587556126e-08, "loss": 0.8451, "step": 81645 }, { "epoch": 0.9950885403336868, "grad_norm": 2.020120143890381, "learning_rate": 2.5914047466324567e-08, "loss": 0.826, "step": 81650 }, { "epoch": 0.9951494765578346, "grad_norm": 1.9737427234649658, "learning_rate": 2.559332905708788e-08, "loss": 0.8515, "step": 81655 }, { "epoch": 0.9952104127819824, "grad_norm": 1.9538421630859375, "learning_rate": 2.5272610647851186e-08, "loss": 0.7826, "step": 81660 }, { "epoch": 0.9952713490061302, "grad_norm": 1.8194496631622314, "learning_rate": 2.4951892238614498e-08, "loss": 0.8097, "step": 81665 }, { "epoch": 0.995332285230278, "grad_norm": 1.7347097396850586, "learning_rate": 2.463117382937781e-08, "loss": 0.7926, "step": 81670 }, { "epoch": 0.9953932214544258, "grad_norm": 1.9241434335708618, "learning_rate": 2.431045542014112e-08, "loss": 0.7659, "step": 81675 }, { "epoch": 0.9954541576785736, "grad_norm": 2.402512311935425, "learning_rate": 2.3989737010904428e-08, "loss": 0.7924, "step": 81680 }, { "epoch": 0.9955150939027214, "grad_norm": 1.8758985996246338, "learning_rate": 2.3669018601667736e-08, "loss": 0.7409, "step": 81685 }, { "epoch": 0.9955760301268692, "grad_norm": 2.5212597846984863, "learning_rate": 2.3348300192431047e-08, "loss": 0.8196, "step": 81690 }, { "epoch": 0.9956369663510171, "grad_norm": 1.8109673261642456, "learning_rate": 2.3027581783194355e-08, "loss": 0.8717, "step": 81695 }, { "epoch": 0.9956979025751649, "grad_norm": 2.292048454284668, "learning_rate": 2.2706863373957667e-08, "loss": 0.829, "step": 81700 }, { "epoch": 0.9957588387993126, "grad_norm": 1.9412744045257568, "learning_rate": 2.2386144964720978e-08, "loss": 0.8128, "step": 81705 }, { "epoch": 0.9958197750234604, "grad_norm": 1.9425780773162842, "learning_rate": 2.206542655548429e-08, "loss": 0.7628, "step": 81710 }, { "epoch": 0.9958807112476082, "grad_norm": 2.044882297515869, "learning_rate": 2.1744708146247597e-08, "loss": 0.8655, "step": 81715 }, { "epoch": 0.9959416474717561, "grad_norm": 2.0844106674194336, "learning_rate": 2.1423989737010905e-08, "loss": 0.83, "step": 81720 }, { "epoch": 0.9960025836959039, "grad_norm": 2.278439521789551, "learning_rate": 2.1103271327774216e-08, "loss": 0.8225, "step": 81725 }, { "epoch": 0.9960635199200517, "grad_norm": 1.9277828931808472, "learning_rate": 2.0782552918537524e-08, "loss": 0.7987, "step": 81730 }, { "epoch": 0.9961244561441995, "grad_norm": 2.0464022159576416, "learning_rate": 2.0461834509300835e-08, "loss": 0.8036, "step": 81735 }, { "epoch": 0.9961853923683472, "grad_norm": 2.181192636489868, "learning_rate": 2.0141116100064143e-08, "loss": 0.7818, "step": 81740 }, { "epoch": 0.9962463285924951, "grad_norm": 1.7275575399398804, "learning_rate": 1.9820397690827458e-08, "loss": 0.8865, "step": 81745 }, { "epoch": 0.9963072648166429, "grad_norm": 2.320075750350952, "learning_rate": 1.9499679281590766e-08, "loss": 0.7467, "step": 81750 }, { "epoch": 0.9963682010407907, "grad_norm": 2.079035520553589, "learning_rate": 1.9178960872354074e-08, "loss": 0.7689, "step": 81755 }, { "epoch": 0.9964291372649385, "grad_norm": 2.7788097858428955, "learning_rate": 1.8858242463117385e-08, "loss": 0.7488, "step": 81760 }, { "epoch": 0.9964900734890864, "grad_norm": 1.9581990242004395, "learning_rate": 1.8537524053880693e-08, "loss": 0.7563, "step": 81765 }, { "epoch": 0.9965510097132342, "grad_norm": 2.187617063522339, "learning_rate": 1.8216805644644004e-08, "loss": 0.7633, "step": 81770 }, { "epoch": 0.9966119459373819, "grad_norm": 2.3636438846588135, "learning_rate": 1.7896087235407315e-08, "loss": 0.8458, "step": 81775 }, { "epoch": 0.9966728821615297, "grad_norm": 1.715108036994934, "learning_rate": 1.7575368826170623e-08, "loss": 0.8266, "step": 81780 }, { "epoch": 0.9967338183856775, "grad_norm": 1.8991144895553589, "learning_rate": 1.725465041693393e-08, "loss": 0.804, "step": 81785 }, { "epoch": 0.9967947546098254, "grad_norm": 2.3018946647644043, "learning_rate": 1.6933932007697243e-08, "loss": 0.7559, "step": 81790 }, { "epoch": 0.9968556908339732, "grad_norm": 2.0974931716918945, "learning_rate": 1.6613213598460554e-08, "loss": 0.8704, "step": 81795 }, { "epoch": 0.996916627058121, "grad_norm": 2.1308951377868652, "learning_rate": 1.6292495189223862e-08, "loss": 0.789, "step": 81800 }, { "epoch": 0.9969775632822688, "grad_norm": 1.9018877744674683, "learning_rate": 1.5971776779987173e-08, "loss": 0.8288, "step": 81805 }, { "epoch": 0.9970384995064165, "grad_norm": 2.071955442428589, "learning_rate": 1.5651058370750484e-08, "loss": 0.8241, "step": 81810 }, { "epoch": 0.9970994357305644, "grad_norm": 1.9344464540481567, "learning_rate": 1.5330339961513792e-08, "loss": 0.8127, "step": 81815 }, { "epoch": 0.9971603719547122, "grad_norm": 1.5494614839553833, "learning_rate": 1.50096215522771e-08, "loss": 0.8156, "step": 81820 }, { "epoch": 0.99722130817886, "grad_norm": 1.9805138111114502, "learning_rate": 1.4688903143040411e-08, "loss": 0.8466, "step": 81825 }, { "epoch": 0.9972822444030078, "grad_norm": 2.1229469776153564, "learning_rate": 1.4368184733803723e-08, "loss": 0.8472, "step": 81830 }, { "epoch": 0.9973431806271557, "grad_norm": 2.619534730911255, "learning_rate": 1.404746632456703e-08, "loss": 0.8372, "step": 81835 }, { "epoch": 0.9974041168513035, "grad_norm": 2.375450611114502, "learning_rate": 1.372674791533034e-08, "loss": 0.785, "step": 81840 }, { "epoch": 0.9974650530754512, "grad_norm": 2.7380213737487793, "learning_rate": 1.3406029506093652e-08, "loss": 0.8297, "step": 81845 }, { "epoch": 0.997525989299599, "grad_norm": 2.0282020568847656, "learning_rate": 1.3085311096856961e-08, "loss": 0.7838, "step": 81850 }, { "epoch": 0.9975869255237468, "grad_norm": 2.1449410915374756, "learning_rate": 1.276459268762027e-08, "loss": 0.8603, "step": 81855 }, { "epoch": 0.9976478617478947, "grad_norm": 2.077646017074585, "learning_rate": 1.244387427838358e-08, "loss": 0.7719, "step": 81860 }, { "epoch": 0.9977087979720425, "grad_norm": 2.1676557064056396, "learning_rate": 1.2123155869146892e-08, "loss": 0.8281, "step": 81865 }, { "epoch": 0.9977697341961903, "grad_norm": 2.1042699813842773, "learning_rate": 1.18024374599102e-08, "loss": 0.8728, "step": 81870 }, { "epoch": 0.9978306704203381, "grad_norm": 2.045186996459961, "learning_rate": 1.1481719050673509e-08, "loss": 0.7338, "step": 81875 }, { "epoch": 0.9978916066444858, "grad_norm": 1.9489303827285767, "learning_rate": 1.1161000641436819e-08, "loss": 0.7914, "step": 81880 }, { "epoch": 0.9979525428686337, "grad_norm": 2.0789542198181152, "learning_rate": 1.084028223220013e-08, "loss": 0.7686, "step": 81885 }, { "epoch": 0.9980134790927815, "grad_norm": 2.0418412685394287, "learning_rate": 1.051956382296344e-08, "loss": 0.8525, "step": 81890 }, { "epoch": 0.9980744153169293, "grad_norm": 2.037126064300537, "learning_rate": 1.0198845413726749e-08, "loss": 0.8632, "step": 81895 }, { "epoch": 0.9981353515410771, "grad_norm": 1.92988920211792, "learning_rate": 9.878127004490057e-09, "loss": 0.839, "step": 81900 }, { "epoch": 0.998196287765225, "grad_norm": 1.8853763341903687, "learning_rate": 9.557408595253368e-09, "loss": 0.7615, "step": 81905 }, { "epoch": 0.9982572239893728, "grad_norm": 2.19502854347229, "learning_rate": 9.236690186016678e-09, "loss": 0.8256, "step": 81910 }, { "epoch": 0.9983181602135205, "grad_norm": 1.9263882637023926, "learning_rate": 8.915971776779988e-09, "loss": 0.809, "step": 81915 }, { "epoch": 0.9983790964376683, "grad_norm": 1.7684417963027954, "learning_rate": 8.595253367543297e-09, "loss": 0.7871, "step": 81920 }, { "epoch": 0.9984400326618161, "grad_norm": 1.771628499031067, "learning_rate": 8.274534958306608e-09, "loss": 0.7591, "step": 81925 }, { "epoch": 0.998500968885964, "grad_norm": 2.5982398986816406, "learning_rate": 7.953816549069918e-09, "loss": 0.7457, "step": 81930 }, { "epoch": 0.9985619051101118, "grad_norm": 2.1202147006988525, "learning_rate": 7.633098139833228e-09, "loss": 0.8164, "step": 81935 }, { "epoch": 0.9986228413342596, "grad_norm": 2.047769546508789, "learning_rate": 7.312379730596536e-09, "loss": 0.8313, "step": 81940 }, { "epoch": 0.9986837775584074, "grad_norm": 1.9175969362258911, "learning_rate": 6.991661321359847e-09, "loss": 0.8216, "step": 81945 }, { "epoch": 0.9987447137825551, "grad_norm": 2.319065809249878, "learning_rate": 6.670942912123156e-09, "loss": 0.8531, "step": 81950 }, { "epoch": 0.998805650006703, "grad_norm": 1.8024057149887085, "learning_rate": 6.350224502886467e-09, "loss": 0.8085, "step": 81955 }, { "epoch": 0.9988665862308508, "grad_norm": 1.9753048419952393, "learning_rate": 6.0295060936497756e-09, "loss": 0.854, "step": 81960 }, { "epoch": 0.9989275224549986, "grad_norm": 2.459956407546997, "learning_rate": 5.708787684413086e-09, "loss": 0.7855, "step": 81965 }, { "epoch": 0.9989884586791464, "grad_norm": 1.902904987335205, "learning_rate": 5.388069275176396e-09, "loss": 0.7729, "step": 81970 }, { "epoch": 0.9990493949032943, "grad_norm": 2.1772348880767822, "learning_rate": 5.067350865939705e-09, "loss": 0.8638, "step": 81975 }, { "epoch": 0.999110331127442, "grad_norm": 1.9171198606491089, "learning_rate": 4.746632456703016e-09, "loss": 0.842, "step": 81980 }, { "epoch": 0.9991712673515898, "grad_norm": 1.8548849821090698, "learning_rate": 4.425914047466325e-09, "loss": 0.8148, "step": 81985 }, { "epoch": 0.9992322035757376, "grad_norm": 1.9633116722106934, "learning_rate": 4.105195638229635e-09, "loss": 0.7993, "step": 81990 }, { "epoch": 0.9992931397998854, "grad_norm": 1.809938669204712, "learning_rate": 3.7844772289929444e-09, "loss": 0.7124, "step": 81995 }, { "epoch": 0.9993540760240333, "grad_norm": 2.0251123905181885, "learning_rate": 3.4637588197562544e-09, "loss": 0.7376, "step": 82000 }, { "epoch": 0.9994150122481811, "grad_norm": 2.023805618286133, "learning_rate": 3.143040410519564e-09, "loss": 0.8193, "step": 82005 }, { "epoch": 0.9994759484723289, "grad_norm": 1.8836480379104614, "learning_rate": 2.8223220012828736e-09, "loss": 0.7474, "step": 82010 }, { "epoch": 0.9995368846964766, "grad_norm": 1.799195647239685, "learning_rate": 2.5016035920461836e-09, "loss": 0.7953, "step": 82015 }, { "epoch": 0.9995978209206244, "grad_norm": 2.11269211769104, "learning_rate": 2.1808851828094932e-09, "loss": 0.8419, "step": 82020 }, { "epoch": 0.9996587571447723, "grad_norm": 1.9764626026153564, "learning_rate": 1.8601667735728035e-09, "loss": 0.7638, "step": 82025 }, { "epoch": 0.9997196933689201, "grad_norm": 1.7934012413024902, "learning_rate": 1.539448364336113e-09, "loss": 0.7902, "step": 82030 }, { "epoch": 0.9997806295930679, "grad_norm": 1.993320345878601, "learning_rate": 1.2187299550994229e-09, "loss": 0.7815, "step": 82035 }, { "epoch": 0.9998415658172157, "grad_norm": 1.853386402130127, "learning_rate": 8.980115458627327e-10, "loss": 0.8426, "step": 82040 }, { "epoch": 0.9999025020413636, "grad_norm": 1.8868683576583862, "learning_rate": 5.772931366260424e-10, "loss": 0.8363, "step": 82045 }, { "epoch": 0.9999634382655113, "grad_norm": 2.32006573677063, "learning_rate": 2.565747273893522e-10, "loss": 0.8048, "step": 82050 }, { "epoch": 1.0, "step": 82053, "total_flos": 5.792041773377677e+19, "train_loss": 0.8552232229628122, "train_runtime": 63052.9814, "train_samples_per_second": 41.643, "train_steps_per_second": 1.301 } ], "logging_steps": 5, "max_steps": 82053, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.792041773377677e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }